diff --git a/.hailort.jpg b/.hailort.jpg
index 84d29889..cd625648 100644
Binary files a/.hailort.jpg and b/.hailort.jpg differ
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 55d54df7..7d02330b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,8 +1,12 @@
 cmake_minimum_required(VERSION 3.0.0)
 
 find_program(CCACHE_PROGRAM ccache)
+find_program(CLACHE_PROGRAM clcache)
+
 if(CCACHE_PROGRAM)
     set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
+elseif(CLCACHE_PROGRAM)
+    set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CLCACHE_PROGRAM}")
 endif()
 
 project(HailoRT)
diff --git a/common/include/context_switch_defs.h b/common/include/context_switch_defs.h
index d7fe8f23..d3ce3664 100644
--- a/common/include/context_switch_defs.h
+++ b/common/include/context_switch_defs.h
@@ -56,6 +56,13 @@ extern "C" {
 
 #define CONTEXT_SWITCH_DEFS__WRITE_ACTION_BY_TYPE_MAX_SIZE (4)
 
+// TODO HRT-12512: Update variable when / If DDR has it's own CMA region
+#define CONTEXT_SWITCH_DEFS__START_M4_MAPPED_DDR_ADDRESS (0x80000000)
+#define CONTEXT_SWITCH_DEFS__END_M4_MAPPED_DDR_ADDRESS (0x90000000)
+#define CONTEXT_SWITCH_DEFS__START_M4_MAPPED_DDR_ADDRESS_AFTER_LUT (0x50000000)
+#define CONTEXT_SWITCH_DEFS__DDR_ADDRESS_MASK (0x0FFFFFFF)
+#define CONTEXT_SWITCH_DEFS__INVALID_DDR_CONTEXTS_BUFFER_ADDRESS (0)
+
 
 #pragma pack(push, 1)
 typedef struct {
@@ -207,7 +214,7 @@ typedef struct {
     uint32_t kernel_done_count;
 } CONTEXT_SWITCH_DEFS__enable_lcu_action_non_default_data_t;
 
-/* Default action - kernel_done_address and kernel_done_count has default values */
+/* Default action - kernel_done_address, kernel_done_count have default values */
 typedef struct {
     uint8_t packed_lcu_id;
     uint8_t network_index;
diff --git a/common/include/control_protocol.h b/common/include/control_protocol.h
index 9e022bdc..60ea8e31 100644
--- a/common/include/control_protocol.h
+++ b/common/include/control_protocol.h
@@ -81,6 +81,7 @@ extern "C" {
 /* Value to represent an operation should be performed on all streams. */
 #define CONTROL_PROTOCOL__ALL_DATAFLOW_MANAGERS (0xFF)
 
+#define CONTROL_PROTOCOL__MAX_CONTEXT_SIZE (3072)
 
 #define CONTROL_PROTOCOL__OPCODES_VARIABLES \
     CONTROL_PROTOCOL__OPCODE_X(HAILO_CONTROL_OPCODE_IDENTIFY,                                  true,  CPU_ID_APP_CPU)\
@@ -868,15 +869,18 @@ typedef struct {
 
 typedef struct {
     bool preliminary_run_asap;
+    bool batch_register_config;
+    bool can_fast_batch_switch;
 } CONTROL_PROTOCOL__INFER_FEATURE_LIST_t;
 
 typedef struct {
-    uint8_t dynamic_contexts_count;
+    uint16_t dynamic_contexts_count;
     CONTROL_PROTOCOL__INFER_FEATURE_LIST_t infer_features;
     CONTROL_PROTOCOL__VALIDATION_FEATURE_LIST_t validation_features;
     uint8_t networks_count;
     uint16_t csm_buffer_size;
     uint16_t batch_size[CONTROL_PROTOCOL__MAX_NETWORKS_PER_NETWORK_GROUP];
+    uint32_t external_action_list_address;
     uint32_t boundary_channels_bitmap[CONTROL_PROTOCOL__MAX_VDMA_ENGINES_COUNT];
 } CONTROL_PROTOCOL__application_header_t;
 
@@ -954,10 +958,10 @@ typedef struct {
 #pragma warning(disable: 4200)
 #endif
 typedef struct {
-    uint32_t is_first_control_per_context_length;
-    uint8_t is_first_control_per_context;
-    uint32_t is_last_control_per_context_length;
-    uint8_t is_last_control_per_context;
+    uint32_t is_first_chunk_per_context_length;
+    uint8_t is_first_chunk_per_context;
+    uint32_t is_last_chunk_per_context_length;
+    uint8_t is_last_chunk_per_context;
     uint32_t context_type_length;
     uint8_t context_type; // CONTROL_PROTOCOL__context_switch_context_type_t
     uint32_t context_network_data_length;
@@ -988,7 +992,7 @@ typedef struct {
     uint32_t context_type_length;
     uint8_t context_type; // CONTROL_PROTOCOL__context_switch_context_type_t
     uint32_t context_index_length;
-    uint8_t context_index;
+    uint16_t context_index;
     uint32_t action_list_offset_length;
     uint16_t action_list_offset;
 } CONTROL_PROTOCOL__download_context_action_list_request_t;
@@ -1160,7 +1164,7 @@ typedef struct {
     bool break_at_any_batch_index;
     uint16_t batch_index;
     bool break_at_any_context_index;
-    uint8_t context_index;
+    uint16_t context_index;
     bool break_at_any_action_index;
     uint16_t action_index;
 } CONTROL_PROTOCOL__context_switch_breakpoint_data_t;
@@ -1470,15 +1474,21 @@ typedef enum {
     CONTROL_PROTOCOL__CONTEXT_SWITCH_INDEX_COUNT,
 } CONTROL_PROTOCOL__context_switch_context_index_t;
 
-#define CONTROL_PROTOCOL__MAX_CONTEXTS_PER_NETWORK_GROUP (64)
+#define CONTROL_PROTOCOL__MAX_CONTEXTS_PER_NETWORK_GROUP (1024)
 
+// This struct will be used for both ControlActionList and DDRActionlist (in order to keep flow in FW as similar as possible)
+// The context_network_data array will never have more data than CONTROL_PROTOCOL__CONTEXT_NETWORK_DATA_SINGLE_CONTROL_MAX_SIZE
+// In case of ControlActionList - this is verified when sending and receiving control. We make it larger here to be
+// able to hold DDRActionList Contexts without needing to copy or do more processing in fw.
+// In both cases this struct holds a chunk of the context - in ControlActionList - it will be as much of the context a
+// Single control message is able to carry and in DDRActionlist will be the whole context
 typedef struct {
-    bool is_first_control_per_context;
-    bool is_last_control_per_context;
+    bool is_first_chunk_per_context;
+    bool is_last_chunk_per_context;
     uint8_t context_type; // CONTROL_PROTOCOL__context_switch_context_type_t
     uint32_t context_network_data_length;
-    uint8_t context_network_data[CONTROL_PROTOCOL__CONTEXT_NETWORK_DATA_SINGLE_CONTROL_MAX_SIZE];
-} CONTROL_PROTOCOL__context_switch_context_info_single_control_t;
+    uint8_t context_network_data[CONTROL_PROTOCOL__MAX_CONTEXT_SIZE];
+} CONTROL_PROTOCOL__context_switch_context_info_chunk_t;
 
 CASSERT(sizeof(CONTROL_PROTOCOL__context_switch_context_index_t)<=UINT8_MAX, control_protocol_h);
 CASSERT(sizeof(CONTROL_PROTOCOL__context_switch_context_type_t)<=UINT8_MAX, control_protocol_h);
diff --git a/common/include/d2h_events.h b/common/include/d2h_events.h
index 1e402d6c..5d26cd5e 100644
--- a/common/include/d2h_events.h
+++ b/common/include/d2h_events.h
@@ -123,14 +123,14 @@ typedef struct {
 
 #define D2H_EVENT_HEALTH_MONITOR_CPU_ECC_EVENT_PARAMETER_COUNT  (1)
 
-/* D2H_EVENT_context_switch_breakpoint_reached_event_massage_t should be the same as 
+/* D2H_EVENT_context_switch_breakpoint_reached_event_message_t should be the same as 
  * CONTROL_PROTOCOL__context_switch_breakpoint_data_t and hailo_context_switch_breakpoint_reached_notification_message_t */
 typedef struct {
     uint8_t application_index;
     uint16_t batch_index;
-    uint8_t context_index;
+    uint16_t context_index;
     uint16_t action_index;
-} D2H_EVENT_context_switch_breakpoint_reached_event_massage_t;
+} D2H_EVENT_context_switch_breakpoint_reached_event_message_t;
 
 #define D2H_EVENT_CONTEXT_SWITCH_BREAKPOINT_REACHED_EVENT_PARAMETER_COUNT  (4)
 
@@ -151,7 +151,7 @@ typedef struct {
     uint32_t exit_status;
     uint8_t application_index;
     uint16_t batch_index;
-    uint8_t context_index;
+    uint16_t context_index;
     uint16_t action_index;
 } D2H_EVENT_context_switch_run_time_error_event_message_t;
 
@@ -166,7 +166,7 @@ typedef union {
    D2H_EVENT_health_monitor_overcurrent_alert_event_message_t health_monitor_overcurrent_alert_event;
    D2H_EVENT_health_monitor_lcu_ecc_error_event_message_t health_monitor_lcu_ecc_error_event;
    D2H_EVENT_health_monitor_cpu_ecc_event_message_t health_monitor_cpu_ecc_event;
-   D2H_EVENT_context_switch_breakpoint_reached_event_massage_t context_switch_breakpoint_reached_event;
+   D2H_EVENT_context_switch_breakpoint_reached_event_message_t context_switch_breakpoint_reached_event;
    D2H_EVENT_health_monitor_clock_changed_event_message_t health_monitor_clock_changed_event;
    D2H_EVENT_hw_infer_mamager_infer_done_message_t hw_infer_manager_infer_done_event;
    D2H_EVENT_context_switch_run_time_error_event_message_t context_switch_run_time_error_event;
diff --git a/common/include/firmware_status.h b/common/include/firmware_status.h
index b33f5a1b..1aa6bf5d 100644
--- a/common/include/firmware_status.h
+++ b/common/include/firmware_status.h
@@ -527,6 +527,7 @@ Updating rules:
    FIRMWARE_STATUS__X(QSPI_STATUS_MISALIGNED_ADDRESS)\
    FIRMWARE_STATUS__X(QSPI_STATUS_BLOCK_ERASE_FAILED)\
    FIRMWARE_STATUS__X(QSPI_STATUS_CLEAR_AHB_REMAP_FAILED)\
+   FIRMWARE_STATUS__X(QSPI_STATUS_NOT_SUPPORTED)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__PCIE_SERVICE)\
    FIRMWARE_STATUS__X(PCIE_SERVICE_STATUS_INVALID_PARAMETERS)\
@@ -763,6 +764,7 @@ Updating rules:
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_WRITE_DATA_BY_TYPE_ACTION_INVALID_TYPE)\
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_WRITE_DATA_BY_TYPE_ACTION_INVALID_MEMORY_SPACE)\
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_REACHED_TIMEOUT_WHILE_WAITING_FOR_BATCH_SWITCH_CONTEXT_TO_END)\
+   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_EXTERNAL_ACTION_LIST_ADDRESS)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__D2H_EVENT_MANAGER)\
    FIRMWARE_STATUS__X(HAILO_D2H_EVENT_MANAGER_STATUS_MESSAGE_HIGH_PRIORITY_QUEUE_CREATE_FAILED)\
@@ -1080,6 +1082,7 @@ Updating rules:
    FIRMWARE_STATUS__X(BURST_CREDITS_TASK_STATUS_FAILED_TO_FIND_STREAM_INDEX)\
    FIRMWARE_STATUS__X(BURST_CREDITS_TASK_STATUS_TASK_NO_CONFIGURED_ACTIONS)\
    FIRMWARE_STATUS__X(BURST_CREDITS_TASK_STATUS_TASK_EXPECTED_HIGHER_BATCH)\
+   FIRMWARE_STATUS__X(BURST_CREDITS_TASK_STATUS_TASK_REACHED_TIMEOUT_WAITING_FOR_DEACTIVATION)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__TASK_SYNC_EVENTS)\
    FIRMWARE_STATUS__X(TASK_SYNC_EVENTS_STATUS_START_TASK_WHILE_IT_IS_RUNNING)\
@@ -1111,13 +1114,19 @@ Updating rules:
    FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_RECEIVED_UNEXPECTED_INTERRUPT)\
    FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_NETWORK_INDEX)\
    FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_KERNEL_DONE_COUNT)\
+   FIRMWARE_STATUS__X(CLUSTER_MANAGER_STATUS_INVALID_EXTENSION)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__HW_INFER_MANAGER)\
    FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_NETWORK_GROUP_NOT_CONFIGURED_BEFORE_INFER_START)\
    FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_NETWORK_GROUP_ALREADY_ACTIVATED)\
    FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_STATE_MACHINE_NOT_IN_RESET_STATE_BEFORE_DEACTIVATE)\
    FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_INVALID_STATE)\
-
+   \
+   FIRMWARE_MODULE__X(FIRMWARE_MODULE__INFINITE_CONTEXT_LOADER)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_EVENT_BITS_NOT_CLEARED_BEFORE_COPY_CALL)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_TIMEOUT_OCCURED_WAITING_FOR_COPY)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NOT_SUPPORTED)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NOT_MODULE_NOT_INITIALIZED)\
 
 typedef enum {
 #define FIRMWARE_MODULE__X(module) module,
diff --git a/common/include/utils.h b/common/include/utils.h
index 860d1fac..c2d50a7d 100644
--- a/common/include/utils.h
+++ b/common/include/utils.h
@@ -54,6 +54,10 @@
 #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
 #endif
 
+#ifndef DIV_ROUND_DOWN
+#define DIV_ROUND_DOWN(n,d) ((n) / (d))
+#endif
+
 #ifndef ROUND_UNSIGNED_FLOAT
 #define ROUND_UNSIGNED_FLOAT(n) ((n - (uint32_t)(n)) > 0.5) ? (uint32_t)(n + 1) : (uint32_t)(n)
 #endif
diff --git a/hailort/.gitignore b/hailort/.gitignore
index e544d4c6..80fa472a 100644
--- a/hailort/.gitignore
+++ b/hailort/.gitignore
@@ -1,3 +1,4 @@
+build/
+dist/
 /external/
 cmake/external/*/
-prepare_externals/build/
diff --git a/hailort/CMakeLists.txt b/hailort/CMakeLists.txt
index 307878f8..32e115bf 100644
--- a/hailort/CMakeLists.txt
+++ b/hailort/CMakeLists.txt
@@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 option(HAILO_BUILD_PYBIND "Build Python binding" OFF)
 option(HAILO_BUILD_EMULATOR "Build hailort for emulator" OFF)
 option(HAILO_BUILD_UT "Build Unit Tests" OFF)
+option(HAILO_BUILD_DMABUF_TESTS "Build DMA buffer tests. Relevant only if HAILO_BUILD_UT is ON" OFF)
 option(HAILO_BUILD_HW_DEBUG_TOOL "Build hw debug tool" OFF)
 option(HAILO_BUILD_GSTREAMER "Compile gstreamer plugins" OFF)
 option(HAILO_BUILD_EXAMPLES "Build examples" OFF)
@@ -30,8 +31,8 @@ endif()
 
 # Set firmware version
 add_definitions( -DFIRMWARE_VERSION_MAJOR=4 )
-add_definitions( -DFIRMWARE_VERSION_MINOR=16 )
-add_definitions( -DFIRMWARE_VERSION_REVISION=2 )
+add_definitions( -DFIRMWARE_VERSION_MINOR=17 )
+add_definitions( -DFIRMWARE_VERSION_REVISION=0 )
 if(HAILO_BUILD_SERVICE)
     add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS )
 endif()
diff --git a/hailort/LICENSE-3RD-PARTY.md b/hailort/LICENSE-3RD-PARTY.md
index 4868b784..473d5a0d 100644
--- a/hailort/LICENSE-3RD-PARTY.md
+++ b/hailort/LICENSE-3RD-PARTY.md
@@ -1,16 +1,17 @@
-| Package                          | Copyright (c)                     | License            | Version        | Notes                                         | References                                                                    |
-|:---------------------------------|:----------------------------------|:-------------------|:---------------|:----------------------------------------------|:------------------------------------------------------------------------------|
-| CLI11                            | University of Cincinnati          | 3-Clause BSD       | 2.2.0          | Fork                                          | https://github.com/hailo-ai/CLI11                                             |
-| Catch2                           | Catch2 Authors                    | BSL-1.0            | 2.13.7         | Cloned entire package                         | https://github.com/catchorg/Catch2                                            |
-| protobuf                         | Google Inc.                       | BSD                | 21.12          | Cloned entire package                         | https://github.com/protocolbuffers/protobuf                                   |
-| pybind11                         | Wenzel Jakob                      | BSD                | 2.10.1         | Cloned entire package                         | https://github.com/pybind/pybind11                                            |
-| spdlog                           | Gabi Melman                       | MIT                | 1.6.1          | Cloned entire package                         | https://github.com/gabime/spdlog                                              |
-| folly                            | Facebook, Inc. and its affiliates | Apache License 2.0 | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h`    | https://github.com/facebook/folly                                             |
-| nlohmann_json_cmake_fetchcontent | ArthurSonzogni                    | MIT License        | v3.9.1         | Cloned entire package                         | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent            |
-| readerwriterqueue                | Cameron Desrochers                | Simplified BSD     | 1.0.3          | Cloned entire package                         | https://github.com/cameron314/readerwriterqueue                               |
-| DotWriter                        | John Vilk                         | MIT License        | master         | Fork                                          | https://github.com/hailo-ai/DotWriter                                         |
-| benchmark                        | Google Inc.                       | Apache License 2.0 | 1.6.0          | Cloned entire package                         | https://github.com/google/benchmark.git                                       |
-| md5                              | Alexander Peslyak                 | cut-down BSD       | -              | Copied code from website                      | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
-| pevents                          | Mahmoud Al-Qudsi                  | MIT License        | master         | Cloned entire package                         | https://github.com/neosmart/pevents.git                                       |
-| grpc                             | Google Inc.                       | Apache License 2.0 | 1.46.3         | Cloned entire package                         | https://github.com/grpc/grpc                                                  |
-| stb                              | Sean Barrett                      | MIT License        | 0.97           | Copied only the file `stb/stb_image_resize.h` | https://github.com/nothings/stb                                               |
\ No newline at end of file
+| Package                          | Copyright (c)                     | License                    | Version        | Notes                                         | References                                                                    |
+|:---------------------------------|:----------------------------------|:---------------------------|:---------------|:----------------------------------------------|:------------------------------------------------------------------------------|
+| CLI11                            | University of Cincinnati          | 3-Clause BSD               | 2.2.0          | Fork                                          | https://github.com/hailo-ai/CLI11                                             |
+| Catch2                           | Catch2 Authors                    | BSL-1.0                    | 2.13.7         | Cloned entire package                         | https://github.com/catchorg/Catch2                                            |
+| protobuf                         | Google Inc.                       | BSD                        | 21.12          | Cloned entire package                         | https://github.com/protocolbuffers/protobuf                                   |
+| pybind11                         | Wenzel Jakob                      | BSD                        | 2.10.1         | Cloned entire package                         | https://github.com/pybind/pybind11                                            |
+| spdlog                           | Gabi Melman                       | MIT                        | 1.6.1          | Cloned entire package                         | https://github.com/gabime/spdlog                                              |
+| folly                            | Facebook, Inc. and its affiliates | Apache License 2.0         | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h`    | https://github.com/facebook/folly                                             |
+| nlohmann_json_cmake_fetchcontent | ArthurSonzogni                    | MIT License                | v3.9.1         | Cloned entire package                         | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent            |
+| readerwriterqueue                | Cameron Desrochers                | Simplified BSD             | 1.0.3          | Cloned entire package                         | https://github.com/cameron314/readerwriterqueue                               |
+| DotWriter                        | John Vilk                         | MIT License                | master         | Fork                                          | https://github.com/hailo-ai/DotWriter                                         |
+| benchmark                        | Google Inc.                       | Apache License 2.0         | 1.6.0          | Cloned entire package                         | https://github.com/google/benchmark.git                                       |
+| md5                              | Alexander Peslyak                 | cut-down BSD               | -              | Copied code from website                      | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
+| pevents                          | Mahmoud Al-Qudsi                  | MIT License                | master         | Cloned entire package                         | https://github.com/neosmart/pevents.git                                       |
+| grpc                             | Google Inc.                       | Apache License 2.0         | 1.46.3         | Cloned entire package                         | https://github.com/grpc/grpc                                                  |
+| stb                              | Sean Barrett                      | MIT License                | 0.97           | Copied only the file `stb/stb_image_resize.h` | https://github.com/nothings/stb                                               |
+| eigen                            |                                   | Mozilla Public License 2.0 | 3.4.0          | Cloned entire package                         | https://gitlab.com/libeigen/eigen                                             |
\ No newline at end of file
diff --git a/hailort/cmake/external/eigen.cmake b/hailort/cmake/external/eigen.cmake
new file mode 100644
index 00000000..1bb1f66f
--- /dev/null
+++ b/hailort/cmake/external/eigen.cmake
@@ -0,0 +1,28 @@
+cmake_minimum_required(VERSION 3.11.0)
+
+include(FetchContent)
+
+FetchContent_Declare(
+    eigen
+    GIT_REPOSITORY https://gitlab.com/libeigen/eigen
+    GIT_TAG 3147391d946bb4b6c68edd901f2add6ac1f31f8c # Version 3.4.0
+    GIT_SHALLOW TRUE
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/eigen-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/eigen-subbuild
+)
+
+
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(eigen)
+if(NOT eigen_POPULATED)
+    FetchContent_Populate(eigen)
+    option(EIGEN_BUILD_DOC OFF)
+    option(BUILD_TESTING OFF)
+    option(EIGEN_LEAVE_TEST_IN_ALL_TARGET OFF)
+    option(EIGEN_BUILD_PKGCONFIG OFF)
+    option(CMAKE_Fortran_COMPILER OFF)
+
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        add_subdirectory(${eigen_SOURCE_DIR} ${eigen_BINARY_DIR} EXCLUDE_FROM_ALL)
+    endif()
+endif()
\ No newline at end of file
diff --git a/hailort/common/barrier.cpp b/hailort/common/barrier.cpp
index 4342170f..db3c1209 100644
--- a/hailort/common/barrier.cpp
+++ b/hailort/common/barrier.cpp
@@ -36,6 +36,9 @@ void Barrier::arrive_and_wait()
 void Barrier::terminate()
 {
     m_is_activated.store(false);
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+    }
     m_cv.notify_all();
 }
 
diff --git a/hailort/common/device_measurements.cpp b/hailort/common/device_measurements.cpp
index d27966a6..ae15885f 100644
--- a/hailort/common/device_measurements.cpp
+++ b/hailort/common/device_measurements.cpp
@@ -134,7 +134,7 @@ hailo_status PowerMeasurement::start_measurement()
     CHECK_SUCCESS(status, "Failed to start power measurement");
    
     m_is_thread_running = true;
-    m_thread = std::thread([this] () {
+    m_thread = std::thread([this] () -> hailo_status {
         const bool clear_power_measurement_history = true;
         while (m_is_thread_running.load()) { 
             std::this_thread::sleep_for(DEFAULT_MEASUREMENTS_INTERVAL);
diff --git a/hailort/common/os/posix/os_utils.cpp b/hailort/common/os/posix/os_utils.cpp
index 6ea226e1..3e73605a 100644
--- a/hailort/common/os/posix/os_utils.cpp
+++ b/hailort/common/os/posix/os_utils.cpp
@@ -100,7 +100,7 @@ size_t OsUtils::get_dma_able_alignment()
 // TODO: implement on qnx (HRT-12356) - only needed when async api is implemented on qnx
 // TODO - URT-13534 - use sys call for QNX OS to get page size
 #elif defined(__QNX__)
-    return OS_UTILS__QNX_PAGE_SIZE
+    return OS_UTILS__QNX_PAGE_SIZE;
 #endif
 }
 
diff --git a/hailort/common/os/posix/socket.cpp b/hailort/common/os/posix/socket.cpp
index 4a964fb2..0260e9be 100644
--- a/hailort/common/os/posix/socket.cpp
+++ b/hailort/common/os/posix/socket.cpp
@@ -230,7 +230,7 @@ hailo_status Socket::send_to(const uint8_t *src_buffer, size_t src_buffer_size,
         } else if (EPIPE == errno) {
             // When socket is aborted from another thread sendto will return errno EPIPE
             LOGGER__INFO("Udp send aborted!");
-            return HAILO_STREAM_ABORTED_BY_USER;
+            return HAILO_STREAM_ABORT;
         } else {
             LOGGER__ERROR("Udp failed to send data, errno:{}.", errno);
             return HAILO_ETH_SEND_FAILURE;
@@ -272,7 +272,7 @@ hailo_status Socket::recv_from(uint8_t *dest_buffer, size_t dest_buffer_size, in
     }
     else if ((0 == number_of_received_bytes) && (0 != dest_buffer_size)) {
         LOGGER__INFO("Udp socket was aborted");
-        return HAILO_STREAM_ABORTED_BY_USER;
+        return HAILO_STREAM_ABORT;
     }
 
     if (result_src_addr_size > src_addr_size) {
diff --git a/hailort/common/runtime_statistics_internal.hpp b/hailort/common/runtime_statistics_internal.hpp
index 707a7079..c089fda8 100644
--- a/hailort/common/runtime_statistics_internal.hpp
+++ b/hailort/common/runtime_statistics_internal.hpp
@@ -15,10 +15,69 @@
 #include <cmath>
 #include <mutex>
 #include <limits>
+#include <ostream>
+#include <sstream>
+#include <iomanip>
 
 namespace hailort
 {
 
+class AccumulatorResultsHelper final
+{
+public:
+    AccumulatorResultsHelper() = delete;
+
+    static const uint32_t DEFAULT_FLOATING_POINT_PRECISION = 4;
+
+    static std::string format_results(const AccumulatorResults &results, bool verbose = false,
+        uint32_t precision = DEFAULT_FLOATING_POINT_PRECISION)
+    {
+        std::stringstream stream;
+        stream << format_statistic(results.count(), "count") << ", ";
+        stream << format_statistic(results.mean(), "mean", precision);
+        if (verbose) {
+            stream << ", ";
+            stream << format_statistic(results.min(), "min", precision) << ", ";
+            stream << format_statistic(results.max(), "max", precision) << ", ";
+            stream << format_statistic(results.var(), "var", precision) << ", ";
+            stream << format_statistic(results.sd(), "sd", precision) << ", ";
+            stream << format_statistic(results.mean_sd(), "mean_sd", precision);
+        }
+        return stream.str();
+    }
+
+    static std::string format_statistic(const Expected<double> &statistic, const std::string &name = "",
+        uint32_t precision = DEFAULT_FLOATING_POINT_PRECISION)
+    {
+        return format_statistic<double>(statistic, name, precision);
+    }
+
+    static std::string format_statistic(const Expected<size_t> &statistic, const std::string &name = "")
+    {
+        return format_statistic<size_t>(statistic, name);
+    }
+
+private:
+    template<typename T, std::enable_if_t<std::is_arithmetic<T>::value, int> = 0>
+    static std::string format_statistic(const Expected<T> &statistic, const std::string &name,
+        uint32_t precision = DEFAULT_FLOATING_POINT_PRECISION)
+    {
+        static const std::string NO_VALUE = "-";
+        std::stringstream stream;
+        if (!name.empty()) {
+            stream << name << "=";
+        }
+
+        if (statistic.has_value()) {
+            stream << std::fixed << std::setprecision(precision) << statistic.value();
+        } else {
+            stream << NO_VALUE;
+        }
+
+        return stream.str();
+    }
+};
+
 template<typename T, std::enable_if_t<std::is_arithmetic<T>::value, int> = 0>
 class FullAccumulator : public Accumulator<T>
 {
diff --git a/hailort/common/utils.hpp b/hailort/common/utils.hpp
index 57046ed5..dfde750f 100644
--- a/hailort/common/utils.hpp
+++ b/hailort/common/utils.hpp
@@ -12,10 +12,13 @@
 #ifndef HAILO_UTILS_H_
 #define HAILO_UTILS_H_
 
-#include <assert.h>
-#include <hailo/hailort.h>
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+
 #include "common/logger_macros.hpp"
 #include <spdlog/fmt/bundled/core.h>
+
+#include <assert.h>
 #include <map>
 #include <set>
 #include <unordered_set>
@@ -166,6 +169,17 @@ _ISEMPTY(                                                               \
 #define CONSTRUCT_MSG(dft_fmt, ...) _CONSTRUCT_MSG(ISEMPTY(__VA_ARGS__), dft_fmt, "" __VA_ARGS__)
 
 
+inline hailo_status get_status(hailo_status status)
+{
+    return status;
+}
+
+template<typename T>
+inline hailo_status get_status(const Expected<T> &exp)
+{
+    return exp.status();
+}
+
 #define _CHECK(cond, ret_val, ...)      \
     do {                                \
         if (!(cond)) {                  \
@@ -175,39 +189,31 @@ _ISEMPTY(                                                               \
     } while(0)
 
 /** Returns ret_val when cond is false */
-#define CHECK(cond, ret_val, ...) _CHECK((cond), (ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__))
-#define CHECK_AS_EXPECTED(cond, ret_val, ...) \
-    _CHECK((cond), (make_unexpected(ret_val)), CONSTRUCT_MSG("CHECK_AS_EXPECTED failed", ##__VA_ARGS__))
-
-#define CHECK_ARG_NOT_NULL(arg) _CHECK(nullptr != (arg), HAILO_INVALID_ARGUMENT, "CHECK_ARG_NOT_NULL for {} failed", #arg)
+#define CHECK(cond, ret_val, ...) \
+    _CHECK((cond), make_unexpected(ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__))
+#define CHECK_AS_EXPECTED CHECK
 
-#define CHECK_ARG_NOT_NULL_AS_EXPECTED(arg) _CHECK(nullptr != (arg), make_unexpected(HAILO_INVALID_ARGUMENT), "CHECK_ARG_NOT_NULL_AS_EXPECTED for {} failed", #arg)
+#define CHECK_ARG_NOT_NULL(arg) _CHECK(nullptr != (arg), make_unexpected(HAILO_INVALID_ARGUMENT), "CHECK_ARG_NOT_NULL for {} failed", #arg)
+#define CHECK_ARG_NOT_NULL_AS_EXPECTED CHECK_ARG_NOT_NULL
 
-#define CHECK_NOT_NULL(arg, status) _CHECK(nullptr != (arg), status, "CHECK_NOT_NULL for {} failed", #arg)
+#define CHECK_NOT_NULL(arg, status) _CHECK(nullptr != (arg), make_unexpected(status), "CHECK_NOT_NULL for {} failed", #arg)
+#define CHECK_NOT_NULL_AS_EXPECTED CHECK_NOT_NULL
 
-#define CHECK_NOT_NULL_AS_EXPECTED(arg, status) _CHECK(nullptr != (arg), make_unexpected(status), "CHECK_NOT_NULL_AS_EXPECTED for {} failed", #arg)
-
-#define _CHECK_SUCCESS(status, is_default, fmt, ...)                                                                            \
+#define _CHECK_SUCCESS(res, is_default, fmt, ...)                                                                               \
     do {                                                                                                                        \
-        const auto &__check_success_status = (status);                                                                          \
+        const auto &__check_success_status = get_status(res);                                                                   \
         _CHECK(                                                                                                                 \
-            HAILO_SUCCESS == __check_success_status,                                                                            \
-            __check_success_status,                                                                                             \
+            (HAILO_SUCCESS == __check_success_status),                                                                          \
+            make_unexpected(__check_success_status),                                                                            \
             _CONSTRUCT_MSG(is_default, "CHECK_SUCCESS failed with status={}", fmt, __check_success_status, ##__VA_ARGS__)       \
         );                                                                                                                      \
     } while(0)
 #define CHECK_SUCCESS(status, ...) _CHECK_SUCCESS(status, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__)
+#define CHECK_SUCCESS_AS_EXPECTED CHECK_SUCCESS
 
-#define _CHECK_SUCCESS_AS_EXPECTED(status, is_default, fmt, ...)                                                                       \
-    do {                                                                                                                               \
-        const auto &__check_success_status = (status);                                                                                 \
-        _CHECK(                                                                                                                        \
-            HAILO_SUCCESS == __check_success_status,                                                                                   \
-            make_unexpected(__check_success_status),                                                                                   \
-            _CONSTRUCT_MSG(is_default, "CHECK_SUCCESS_AS_EXPECTED failed with status={}", fmt, __check_success_status, ##__VA_ARGS__)  \
-        );                                                                                                                             \
-    } while(0)
-#define CHECK_SUCCESS_AS_EXPECTED(status, ...) _CHECK_SUCCESS_AS_EXPECTED(status, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__)
+#define _CHECK_EXPECTED _CHECK_SUCCESS
+#define CHECK_EXPECTED(obj, ...) _CHECK_EXPECTED(obj, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__)
+#define CHECK_EXPECTED_AS_STATUS CHECK_EXPECTED
 
 // Define macro CHECK_IN_DEBUG - that checks cond in debug with CHECK macro but in release does nothing and will get optimized out
 #ifdef NDEBUG
@@ -258,28 +264,30 @@ _ISEMPTY(                                                               \
 #define CHECK_GRPC_STATUS_AS_EXPECTED(status) _CHECK_GRPC_STATUS(status, make_unexpected(HAILO_RPC_FAILED), SERVICE_WARNING_MSG)
 #endif
 
-#define _CHECK_EXPECTED(obj, is_default, fmt, ...)                                                                                      \
-    do {                                                                                                                                \
-        const auto &__check_expected_obj = (obj);                                                                                       \
-        _CHECK(                                                                                                                         \
-            __check_expected_obj.has_value(),                                                                                           \
-            make_unexpected(__check_expected_obj.status()),                                                                             \
-            _CONSTRUCT_MSG(is_default, "CHECK_EXPECTED failed with status={}", fmt, __check_expected_obj.status(), ##__VA_ARGS__)       \
-        );                                                                                                                              \
-    } while(0)
-#define CHECK_EXPECTED(obj, ...) _CHECK_EXPECTED(obj, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__)
+#define __HAILO_CONCAT(x, y) x ## y
+#define _HAILO_CONCAT(x, y) __HAILO_CONCAT(x, y)
 
+#define _TRY(expected_var_name, var_decl, expr, ...) \
+    auto expected_var_name = (expr); \
+    CHECK_EXPECTED(expected_var_name, __VA_ARGS__); \
+    var_decl = expected_var_name.release()
 
-#define _CHECK_EXPECTED_AS_STATUS(obj, is_default, fmt, ...)                                                                                      \
-    do {                                                                                                                                          \
-        const auto &__check_expected_obj = (obj);                                                                                                 \
-        _CHECK(                                                                                                                                   \
-            __check_expected_obj.has_value(),                                                                                                     \
-            __check_expected_obj.status(),                                                                                                        \
-            _CONSTRUCT_MSG(is_default, "CHECK_EXPECTED_AS_STATUS failed with status={}", fmt, __check_expected_obj.status(), ##__VA_ARGS__)       \
-        );                                                                                                                                        \
-    } while(0)
-#define CHECK_EXPECTED_AS_STATUS(obj, ...) _CHECK_EXPECTED_AS_STATUS(obj, ISEMPTY(__VA_ARGS__), "" __VA_ARGS__)
+/**
+ * The TRY macro is used to allow easier validation and access for variables returned as Expected<T>.
+ * If the expression returns an Expected<T> with status HAILO_SUCCESS, the macro will release the expected and assign
+ * the var_decl.
+ * Otherwise, the macro will cause current function to return the failed status.
+ *
+ * Usage example:
+ *
+ * Expected<int> func() {
+ *     TRY(auto var, return_5());
+ *     // Now var is int with value 5
+ *
+ *     // func will return Unexpected with status HAILO_INTERNAL_FAILURE
+ *     TRY(auto var2, return_error(HAILO_INTERNAL_FAILURE), "Failed doing stuff {}", 5);
+ */
+#define TRY(var_decl, expr, ...) _TRY(_HAILO_CONCAT(__expected, __COUNTER__), var_decl, expr, __VA_ARGS__)
 
 #ifndef _MSC_VER
 #define IGNORE_DEPRECATION_WARNINGS_BEGIN _Pragma("GCC diagnostic push") \
diff --git a/hailort/drivers/common/hailo_ioctl_common.h b/hailort/drivers/common/hailo_ioctl_common.h
index 4d7dc6c4..0911f422 100644
--- a/hailort/drivers/common/hailo_ioctl_common.h
+++ b/hailort/drivers/common/hailo_ioctl_common.h
@@ -14,7 +14,10 @@
 #define SIZE_OF_VDMA_DESCRIPTOR         (16)
 #define VDMA_DEST_CHANNELS_START        (16)
 
-#define CHANNEL_IRQ_TIMESTAMPS_SIZE (128 * 2) // Should be same as MAX_IRQ_TIMESTAMPS_SIZE (hailort_driver.hpp)
+#define HAILO_VDMA_MAX_ONGOING_TRANSFERS (128)
+#define HAILO_VDMA_MAX_ONGOING_TRANSFERS_MASK (HAILO_VDMA_MAX_ONGOING_TRANSFERS - 1)
+
+#define CHANNEL_IRQ_TIMESTAMPS_SIZE (HAILO_VDMA_MAX_ONGOING_TRANSFERS * 2)
 #define CHANNEL_IRQ_TIMESTAMPS_SIZE_MASK (CHANNEL_IRQ_TIMESTAMPS_SIZE - 1)
 
 #define INVALID_DRIVER_HANDLE_VALUE     ((uintptr_t)-1)
@@ -35,14 +38,13 @@ typedef ULONG uint32_t;
 typedef UCHAR uint8_t;
 typedef USHORT uint16_t;
 typedef ULONGLONG uint64_t;
-typedef uint64_t u64;
-typedef uint32_t u32;
-typedef uint16_t u16;
-typedef uint8_t  u8;
 #endif /*  !defined(__cplusplus) && defined(NTDDI_VERSION) */
 
 
 #ifdef _MSC_VER
+
+#include <initguid.h>
+
 #if !defined(bool) && !defined(__cplusplus)
 typedef uint8_t bool;
 #endif // !defined(bool) && !defined(__cplusplus)
@@ -51,6 +53,48 @@ typedef uint8_t bool;
 #define INT_MAX 0x7FFFFFFF
 #endif // !defined(INT_MAX)
 
+
+// {d88d31f1-fede-4e71-ac2a-6ce0018c1501}
+DEFINE_GUID (GUID_DEVINTERFACE_HailoKM,
+    0xd88d31f1,0xfede,0x4e71,0xac,0x2a,0x6c,0xe0,0x01,0x8c,0x15,0x01);
+
+#define HAILO_GENERAL_IOCTL_MAGIC 0
+#define HAILO_VDMA_IOCTL_MAGIC    1
+#define HAILO_NON_LINUX_IOCTL_MAGIC 2
+
+#define HAILO_IOCTL_COMPATIBLE                  CTL_CODE(FILE_DEVICE_UNKNOWN, 0x802, METHOD_BUFFERED, FILE_ANY_ACCESS)
+
+
+typedef struct tCompatibleHailoIoctlParam
+{
+    union {
+        struct {
+            ULONG Size : 16;
+            ULONG Code : 8;
+            ULONG Type : 6;
+            ULONG Read : 1;
+            ULONG Write : 1;
+        } bits;
+        ULONG value;
+    } u;
+} tCompatibleHailoIoctlParam;
+
+static ULONG FORCEINLINE _IOC_(ULONG nr, ULONG type, ULONG size, bool read, bool write)
+{
+    struct tCompatibleHailoIoctlParam param;
+    param.u.bits.Code = nr;
+    param.u.bits.Size = size;
+    param.u.bits.Type = type;
+    param.u.bits.Read = read ? 1 : 0;
+    param.u.bits.Write = write ? 1 : 0;
+    return param.u.value;
+}
+
+#define _IOW_(type,nr,size) _IOC_(nr, type, sizeof(size), true, false)
+#define _IOR_(type,nr,size) _IOC_(nr, type, sizeof(size), false, true)
+#define _IOWR_(type,nr,size) _IOC_(nr, type, sizeof(size), true, true)
+#define _IO_(type,nr) _IOC_(nr, type, 0, false, false)
+
 #elif defined(__linux__) // #ifdef _MSC_VER
 #ifndef __KERNEL__
 // include the userspace headers only if this file is included by user space program
@@ -149,11 +193,17 @@ struct hailo_vdma_buffer_unmap_params {
 /* structure used in ioctl HAILO_DESC_LIST_CREATE */
 struct hailo_desc_list_create_params {
     size_t desc_count;          // in
+    uint16_t desc_page_size;    // in
     bool is_circular;           // in
     uintptr_t desc_handle;      // out
     uint64_t dma_address;       // out
 };
 
+/* structure used in ioctl HAILO_DESC_LIST_RELEASE */
+struct hailo_desc_list_release_params {
+    uintptr_t desc_handle;      // in
+};
+
 /* structure used in ioctl HAILO_NON_LINUX_DESC_LIST_MMAP */
 struct hailo_non_linux_desc_list_mmap_params {
     uintptr_t desc_handle;  // in
@@ -164,8 +214,9 @@ struct hailo_non_linux_desc_list_mmap_params {
 /* structure used in ioctl HAILO_DESC_LIST_BIND_VDMA_BUFFER */
 struct hailo_desc_list_bind_vdma_buffer_params {
     size_t buffer_handle;       // in
+    size_t buffer_size;         // in
+    size_t buffer_offset;       // in
     uintptr_t desc_handle;      // in
-    uint16_t desc_page_size;    // in
     uint8_t channel_index;      // in
     uint32_t starting_desc;     // in
 };
@@ -189,6 +240,7 @@ struct hailo_vdma_interrupts_channel_data {
     uint16_t host_num_processed;
     uint8_t host_error;             // Channel errors bits on source side
     uint8_t device_error;           // Channel errors bits on dest side
+    bool validation_success;        // If the validation of the channel was successful
 };
 
 struct hailo_vdma_interrupts_wait_params {
@@ -272,26 +324,6 @@ struct hailo_memory_transfer_params {
     uint8_t buffer[MAX_MEMORY_TRANSFER_LENGTH];         // in/out
 };
 
-/* structure used in ioctl HAILO_VDMA_CHANNEL_READ_REGISTER */
-struct hailo_vdma_channel_read_register_params {
-    uint8_t engine_index;                       // in
-    uint8_t channel_index;                      // in
-    enum hailo_dma_data_direction direction;    // in
-    size_t offset;                              // in
-    size_t reg_size;                            // in, can be either 1, 2 or 4
-    uint32_t data;                              // out
-};
-
-/* structure used in ioctl HAILO_VDMA_CHANNEL_WRITE_REGISTER */
-struct hailo_vdma_channel_write_register_params {
-    uint8_t engine_index;                       // in
-    uint8_t channel_index;                      // in
-    enum hailo_dma_data_direction direction;    // in
-    size_t offset;                              // in
-    size_t reg_size;                            // in, can be either 1, 2 or 4
-    uint32_t data;                              // in
-};
-
 /* structure used in ioctl HAILO_VDMA_BUFFER_SYNC */
 enum hailo_vdma_buffer_sync_type {
     HAILO_SYNC_FOR_CPU,
@@ -362,21 +394,103 @@ struct hailo_read_log_params {
     size_t read_bytes;                          // out
 };
 
+/* structure used in ioctl HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC */
 struct hailo_allocate_low_memory_buffer_params {
     size_t      buffer_size;    // in
     uintptr_t   buffer_handle;  // out
 };
 
+/* structure used in ioctl HAILO_VDMA_LOW_MEMORY_BUFFER_FREE */
+struct hailo_free_low_memory_buffer_params {
+    uintptr_t  buffer_handle;  // in
+};
+
 struct hailo_mark_as_in_use_params {
     bool in_use;           // out
 };
 
+/* structure used in ioctl HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC */
 struct hailo_allocate_continuous_buffer_params {
     size_t buffer_size;         // in
     uintptr_t buffer_handle;    // out
     uint64_t dma_address;       // out
 };
 
+/* structure used in ioctl HAILO_VDMA_CONTINUOUS_BUFFER_FREE */
+struct hailo_free_continuous_buffer_params {
+    uintptr_t buffer_handle;    // in
+};
+
+/* structures used in ioctl HAILO_VDMA_LAUNCH_TRANSFER */
+struct hailo_vdma_transfer_buffer {
+    size_t mapped_buffer_handle;       // in
+    uint32_t offset;                   // in
+    uint32_t size;                     // in
+};
+
+enum hailo_vdma_interrupts_domain {
+    HAILO_VDMA_INTERRUPTS_DOMAIN_NONE   = 0,
+    HAILO_VDMA_INTERRUPTS_DOMAIN_DEVICE = (1 << 0),
+    HAILO_VDMA_INTERRUPTS_DOMAIN_HOST   = (1 << 1),
+
+    /** Max enum value to maintain ABI Integrity */
+    HAILO_VDMA_INTERRUPTS_DOMAIN_MAX_ENUM = INT_MAX,
+};
+
+// We allow maximum 2 buffers per transfer since we may have an extra buffer 
+// to make sure each buffer is aligned to page size.
+#define HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER (2)
+
+struct hailo_vdma_launch_transfer_params {
+    uint8_t engine_index;                                               // in
+    uint8_t channel_index;                                              // in
+
+    uintptr_t desc_handle;                                              // in
+    uint32_t starting_desc;                                             // in
+
+    bool should_bind;                                                   // in, if false, assumes buffer already bound.
+    uint8_t buffers_count;                                              // in
+    struct hailo_vdma_transfer_buffer
+        buffers[HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER];                 // in
+
+    enum hailo_vdma_interrupts_domain first_interrupts_domain;          // in
+    enum hailo_vdma_interrupts_domain last_interrupts_domain;           // in
+
+    bool is_debug;                                                      // in, if set program hw to send
+                                                                        // more info (e.g desc complete status)
+
+    uint32_t descs_programed;                                           // out, amount of descriptors programed.
+};
+
+#ifdef _MSC_VER
+struct tCompatibleHailoIoctlData
+{
+    tCompatibleHailoIoctlParam Parameters;
+    ULONG_PTR Value;
+    union {
+        struct hailo_memory_transfer_params MemoryTransfer;
+        struct hailo_vdma_interrupts_enable_params VdmaInterruptsEnable;
+        struct hailo_vdma_interrupts_disable_params VdmaInterruptsDisable;
+        struct hailo_vdma_interrupts_read_timestamp_params VdmaInterruptsReadTimestamps;
+        struct hailo_vdma_interrupts_wait_params VdmaInterruptsWait;
+        struct hailo_vdma_buffer_sync_params VdmaBufferSync;
+        struct hailo_fw_control FirmwareControl;
+        struct hailo_vdma_buffer_map_params VdmaBufferMap;
+        struct hailo_vdma_buffer_unmap_params VdmaBufferUnmap;
+        struct hailo_desc_list_create_params DescListCreate;
+        struct hailo_desc_list_release_params DescListReleaseParam;
+        struct hailo_desc_list_bind_vdma_buffer_params DescListBind;
+        struct hailo_d2h_notification D2HNotification;
+        struct hailo_device_properties DeviceProperties;
+        struct hailo_driver_info DriverInfo;
+        struct hailo_non_linux_desc_list_mmap_params DescListMmap;
+        struct hailo_read_log_params ReadLog;
+        struct hailo_mark_as_in_use_params MarkAsInUse;
+        struct hailo_vdma_launch_transfer_params LaunchTransfer;
+    } Buffer;
+};
+#endif // _MSC_VER
+
 #pragma pack(pop)
 
 enum hailo_general_ioctl_code {
@@ -407,8 +521,6 @@ enum hailo_vdma_ioctl_code {
     HAILO_VDMA_INTERRUPTS_DISABLE_CODE,
     HAILO_VDMA_INTERRUPTS_WAIT_CODE,
     HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS_CODE,
-    HAILO_VDMA_CHANNEL_READ_REGISTER_CODE,
-    HAILO_VDMA_CHANNEL_WRITE_REGISTER_CODE,
     HAILO_VDMA_BUFFER_MAP_CODE,
     HAILO_VDMA_BUFFER_UNMAP_CODE,
     HAILO_VDMA_BUFFER_SYNC_CODE,
@@ -420,6 +532,7 @@ enum hailo_vdma_ioctl_code {
     HAILO_MARK_AS_IN_USE_CODE,
     HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC_CODE,
     HAILO_VDMA_CONTINUOUS_BUFFER_FREE_CODE,
+    HAILO_VDMA_LAUNCH_TRANSFER_CODE,
 
     // Must be last
     HAILO_VDMA_IOCTL_MAX_NR,
@@ -430,24 +543,23 @@ enum hailo_vdma_ioctl_code {
 #define HAILO_VDMA_INTERRUPTS_WAIT            _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_INTERRUPTS_WAIT_CODE,              struct hailo_vdma_interrupts_wait_params)
 #define HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS_CODE,   struct hailo_vdma_interrupts_read_timestamp_params)
 
-#define HAILO_VDMA_CHANNEL_READ_REGISTER      _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CHANNEL_READ_REGISTER_CODE,         struct hailo_vdma_channel_read_register_params)
-#define HAILO_VDMA_CHANNEL_WRITE_REGISTER     _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_CHANNEL_WRITE_REGISTER_CODE,        struct hailo_vdma_channel_write_register_params)
-
 #define HAILO_VDMA_BUFFER_MAP                 _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_BUFFER_MAP_CODE,                    struct hailo_vdma_buffer_map_params)
 #define HAILO_VDMA_BUFFER_UNMAP               _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_BUFFER_UNMAP_CODE,                  struct hailo_vdma_buffer_unmap_params)
 #define HAILO_VDMA_BUFFER_SYNC                _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_BUFFER_SYNC_CODE,                   struct hailo_vdma_buffer_sync_params)
 
 #define HAILO_DESC_LIST_CREATE                _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_DESC_LIST_CREATE_CODE,                   struct hailo_desc_list_create_params)
-#define HAILO_DESC_LIST_RELEASE               _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_DESC_LIST_RELEASE_CODE,                  uintptr_t)
+#define HAILO_DESC_LIST_RELEASE               _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_DESC_LIST_RELEASE_CODE,                  struct hailo_desc_list_release_params)
 #define HAILO_DESC_LIST_BIND_VDMA_BUFFER      _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_DESC_LIST_BIND_VDMA_BUFFER_CODE,         struct hailo_desc_list_bind_vdma_buffer_params)
 
 #define HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC    _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC_CODE,       struct hailo_allocate_low_memory_buffer_params)
-#define HAILO_VDMA_LOW_MEMORY_BUFFER_FREE     _IO_(HAILO_VDMA_IOCTL_MAGIC,   HAILO_VDMA_LOW_MEMORY_BUFFER_FREE_CODE)
+#define HAILO_VDMA_LOW_MEMORY_BUFFER_FREE     _IOR_(HAILO_VDMA_IOCTL_MAGIC,   HAILO_VDMA_LOW_MEMORY_BUFFER_FREE_CODE,       struct hailo_free_low_memory_buffer_params)
 
 #define HAILO_MARK_AS_IN_USE                  _IOW_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_MARK_AS_IN_USE_CODE,                     struct hailo_mark_as_in_use_params)
 
 #define HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC    _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC_CODE,       struct hailo_allocate_continuous_buffer_params)
-#define HAILO_VDMA_CONTINUOUS_BUFFER_FREE     _IO_(HAILO_VDMA_IOCTL_MAGIC,   HAILO_VDMA_CONTINUOUS_BUFFER_FREE_CODE)
+#define HAILO_VDMA_CONTINUOUS_BUFFER_FREE     _IOR_(HAILO_VDMA_IOCTL_MAGIC,   HAILO_VDMA_CONTINUOUS_BUFFER_FREE_CODE,       struct hailo_free_continuous_buffer_params)
+
+#define HAILO_VDMA_LAUNCH_TRANSFER           _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_LAUNCH_TRANSFER_CODE,                struct hailo_vdma_launch_transfer_params)
 
 
 enum hailo_non_linux_ioctl_code {
diff --git a/hailort/drivers/win/include/Public.h b/hailort/drivers/win/include/Public.h
index 8dfecf63..6181033e 100644
--- a/hailort/drivers/win/include/Public.h
+++ b/hailort/drivers/win/include/Public.h
@@ -15,13 +15,9 @@ Module Name:
 
 --*/
 
-//
-// Define an Interface Guid so that apps can find the device and talk to it.
-//
+#ifndef _HAILO_PUBLIC_H_
+#define _HAILO_PUBLIC_H_
 
-DEFINE_GUID (GUID_DEVINTERFACE_HailoKM,
-    0xd88d31f1,0xfede,0x4e71,0xac,0x2a,0x6c,0xe0,0x01,0x8c,0x15,0x01);
-// {d88d31f1-fede-4e71-ac2a-6ce0018c1501}
 
 #define HAILO_IOCTL_COMMON                      CTL_CODE(FILE_DEVICE_UNKNOWN, 0x801, METHOD_BUFFERED, FILE_ANY_ACCESS)
 #define IOCTL_FUNC(x)                           (((x) >> 2) & 0xfff)
@@ -57,69 +53,7 @@ struct tCommonHailoIoctlParam
 #define HAILO_CMD_FREE_MEMORY           0x0060
 #define HAILO_CMD_ALLOC_MEMORY          0x0061
 
-#define HAILO_IOCTL_COMPATIBLE                  CTL_CODE(FILE_DEVICE_UNKNOWN, 0x802, METHOD_BUFFERED, FILE_ANY_ACCESS)
-struct tCompatibleHailoIoctlParam
-{
-    union {
-        struct {
-            ULONG Size : 16;
-            ULONG Code : 8;
-            ULONG Type : 6;
-            ULONG Read : 1;
-            ULONG Write : 1;
-        } bits;
-        ULONG value;
-    } u;
-};
-
-#define HAILO_GENERAL_IOCTL_MAGIC 0
-#define HAILO_VDMA_IOCTL_MAGIC    1
-#define HAILO_NON_LINUX_IOCTL_MAGIC 2
-
-
-
-static ULONG FORCEINLINE _IOC_(ULONG nr, ULONG type, ULONG size, bool read, bool write)
-{
-    tCompatibleHailoIoctlParam param;
-    param.u.bits.Code = nr;
-    param.u.bits.Size = size;
-    param.u.bits.Type = type;
-    param.u.bits.Read = read ? 1 : 0;
-    param.u.bits.Write = write ? 1 : 0;
-    return param.u.value;
-}
-
-#define _IOW_(type,nr,size) _IOC_(nr, type, sizeof(size), true, false)
-#define _IOR_(type,nr,size) _IOC_(nr, type, sizeof(size), false, true)
-#define _IOWR_(type,nr,size) _IOC_(nr, type, sizeof(size), true, true)
-#define _IO_(type,nr) _IOC_(nr, type, 0, false, false)
 
 #include "..\..\common\hailo_ioctl_common.h"
 
-struct tCompatibleHailoIoctlData
-{
-    tCompatibleHailoIoctlParam Parameters;
-    ULONG_PTR Value;
-    union {
-        hailo_memory_transfer_params MemoryTransfer;
-        hailo_vdma_interrupts_enable_params VdmaInterruptsEnable;
-        hailo_vdma_interrupts_disable_params VdmaInterruptsDisable;
-        hailo_vdma_interrupts_read_timestamp_params VdmaInterruptsReadTimestamps;
-        hailo_vdma_interrupts_wait_params VdmaInterruptsWait;
-        hailo_vdma_buffer_sync_params VdmaBufferSync;
-        hailo_fw_control FirmwareControl;
-        hailo_vdma_buffer_map_params VdmaBufferMap;
-        hailo_vdma_buffer_unmap_params VdmaBufferUnmap;
-        hailo_desc_list_create_params DescListCreate;
-        uintptr_t DescListReleaseParam;
-        hailo_desc_list_bind_vdma_buffer_params DescListBind;
-        hailo_d2h_notification D2HNotification;
-        hailo_device_properties DeviceProperties;
-        hailo_driver_info DriverInfo;
-        hailo_vdma_channel_read_register_params ChannelRegisterRead;
-        hailo_vdma_channel_write_register_params ChannelRegisterWrite;
-        hailo_non_linux_desc_list_mmap_params DescListMmap;
-        hailo_read_log_params ReadLog;
-        hailo_mark_as_in_use_params MarkAsInUse;
-    } Buffer;
-};
+#endif /* _HAILO_PUBLIC_H_ */
\ No newline at end of file
diff --git a/hailort/hailort_service/CMakeLists.txt b/hailort/hailort_service/CMakeLists.txt
index d302b2ee..a470a3dd 100644
--- a/hailort/hailort_service/CMakeLists.txt
+++ b/hailort/hailort_service/CMakeLists.txt
@@ -12,6 +12,7 @@ endif()
 
 add_executable(hailort_service
     hailort_rpc_service.cpp
+    cng_buffer_pool.cpp
     service_resource_manager.hpp
     ${HAILORT_SERVICE_OS_DIR}/hailort_service.cpp
     ${HAILORT_COMMON_CPP_SOURCES}
diff --git a/hailort/hailort_service/cng_buffer_pool.cpp b/hailort/hailort_service/cng_buffer_pool.cpp
new file mode 100644
index 00000000..b0541646
--- /dev/null
+++ b/hailort/hailort_service/cng_buffer_pool.cpp
@@ -0,0 +1,163 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file cng_buffer_pool.cpp
+ * @brief Network group buffer pool implementation
+ **/
+
+#include "cng_buffer_pool.hpp"
+#include "service_resource_manager.hpp"
+#include "hailo/hailort.h"
+
+namespace hailort
+{
+
+
+Expected<std::shared_ptr<ServiceStreamBufferPool>> ServiceStreamBufferPool::create(uint32_t vdevice_handle,
+    size_t buffer_size, size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event)
+{
+    auto map_buffer_lambda = [direction](std::shared_ptr<VDevice> vdevice, BufferPtr buffer) {
+        return DmaMappedBuffer::create(*vdevice, buffer->data(), buffer->size(), direction);
+    };
+    auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
+
+    auto free_buffers_queue = SpscQueue<BufferPtr>::create(buffer_count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT);
+    CHECK_EXPECTED(free_buffers_queue);
+
+    std::vector<AllocatedMappedBuffer> buffers;
+    buffers.reserve(buffer_count);
+    for (size_t i = 0; i < buffer_count; i++) {
+        auto buffer = Buffer::create_shared(buffer_size, BufferStorageParams::create_dma());
+        CHECK_EXPECTED(buffer);
+
+        auto mapped_buffer = vdevice_manager.execute<Expected<DmaMappedBuffer>>(vdevice_handle, map_buffer_lambda, buffer.value());
+        CHECK_EXPECTED(mapped_buffer);
+
+        auto status = free_buffers_queue->enqueue(buffer.value());
+        CHECK_SUCCESS(status);
+
+        buffers.emplace_back(AllocatedMappedBuffer{ buffer.release(), mapped_buffer.release()});
+    }
+
+    auto buffer_pool_ptr = make_shared_nothrow<ServiceStreamBufferPool>(buffer_size, std::move(buffers),
+        free_buffers_queue.release(), buffer_count);
+    CHECK_NOT_NULL_AS_EXPECTED(buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return buffer_pool_ptr;
+}
+
+ServiceStreamBufferPool::ServiceStreamBufferPool(size_t buffer_size, std::vector<AllocatedMappedBuffer> &&buffers,
+        SpscQueue<BufferPtr> &&free_buffers_queue, size_t buffers_count) :
+    m_buffer_size(buffer_size),
+    m_buffers_count(buffers_count),
+    m_buffers(std::move(buffers)),
+    m_free_buffers_queue(std::move(free_buffers_queue))
+{}
+
+Expected<BufferPtr> ServiceStreamBufferPool::acquire_buffer()
+{
+    auto buffer = m_free_buffers_queue.dequeue(DEFAULT_TRANSFER_TIMEOUT);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        return make_unexpected(buffer.status());
+    }
+    else if (HAILO_TIMEOUT == buffer.status()) {
+        LOGGER__WARNING(
+            "Failed to acquire buffer because the buffer pool is empty. This could be caused by uneven reading and writing speeds");
+        return make_unexpected(buffer.status());
+    }
+    CHECK_EXPECTED(buffer);
+
+    return buffer.release();
+}
+
+hailo_status ServiceStreamBufferPool::return_to_pool(BufferPtr buffer)
+{
+    CHECK(buffer->size() == m_buffer_size, HAILO_INTERNAL_FAILURE,
+        "Buffer size is not the same as expected for pool! ({} != {})", buffer->size(), m_buffer_size);
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto status = m_free_buffers_queue.enqueue(buffer);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+size_t ServiceStreamBufferPool::buffers_count()
+{
+    return m_buffers_count;
+}
+
+Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> ServiceNetworkGroupBufferPool::create(uint32_t vdevice_handle)
+{
+    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
+    CHECK_EXPECTED(shutdown_event_exp);
+    auto shutdown_event = shutdown_event_exp.release();
+
+    auto cng_buffer_pool_ptr = make_shared_nothrow<ServiceNetworkGroupBufferPool>(shutdown_event, vdevice_handle);
+    CHECK_NOT_NULL_AS_EXPECTED(cng_buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return cng_buffer_pool_ptr;
+}
+
+ServiceNetworkGroupBufferPool::ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle) :
+    m_output_name_to_buffer_pool(), m_shutdown_event(shutdown_event), m_vdevice_handle(vdevice_handle)
+{}
+
+hailo_status ServiceNetworkGroupBufferPool::allocate_pool(const std::string &name, size_t frame_size, size_t pool_size)
+{
+    auto buffer_pool = ServiceStreamBufferPool::create(m_vdevice_handle, frame_size,
+        pool_size, HAILO_DMA_BUFFER_DIRECTION_D2H, m_shutdown_event);
+    CHECK_EXPECTED(buffer_pool);
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    m_output_name_to_buffer_pool[name] = buffer_pool.release();
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status ServiceNetworkGroupBufferPool::reallocate_pool(const std::string &name, size_t frame_size)
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto pool_size = m_output_name_to_buffer_pool[name]->buffers_count();
+    m_output_name_to_buffer_pool[name].reset();
+
+    auto buffer_pool = ServiceStreamBufferPool::create(m_vdevice_handle, frame_size,
+        pool_size, HAILO_DMA_BUFFER_DIRECTION_D2H, m_shutdown_event);
+    CHECK_EXPECTED(buffer_pool);
+    m_output_name_to_buffer_pool[name] = buffer_pool.release();
+
+    return HAILO_SUCCESS;
+}
+
+Expected<BufferPtr> ServiceNetworkGroupBufferPool::acquire_buffer(const std::string &output_name)
+{
+    CHECK_AS_EXPECTED(contains(m_output_name_to_buffer_pool, output_name), HAILO_INTERNAL_FAILURE,
+        "acquire_buffer() for output {} failed, output name does not exist in buffer pool", output_name);
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto buffer = m_output_name_to_buffer_pool.at(output_name)->acquire_buffer();
+    CHECK_EXPECTED(buffer);
+
+    return buffer.release();
+}
+
+hailo_status ServiceNetworkGroupBufferPool::return_to_pool(const std::string &output_name, BufferPtr buffer)
+{
+    CHECK(contains(m_output_name_to_buffer_pool, output_name), HAILO_INTERNAL_FAILURE,
+        "acquire_buffer() for output {} failed, output name does not exist in buffer pool", output_name);
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto status = m_output_name_to_buffer_pool.at(output_name)->return_to_pool(buffer);
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status ServiceNetworkGroupBufferPool::shutdown()
+{
+    return m_shutdown_event->signal();
+}
+
+} /* namespace hailort */
diff --git a/hailort/hailort_service/cng_buffer_pool.hpp b/hailort/hailort_service/cng_buffer_pool.hpp
new file mode 100644
index 00000000..86172a3c
--- /dev/null
+++ b/hailort/hailort_service/cng_buffer_pool.hpp
@@ -0,0 +1,88 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file cng_buffer_pool.hpp
+ * @brief This model represents the buffer pools for the output reads for each network group. Used in async API
+ **/
+
+#ifndef _HAILO_CNG_BUFFER_POOL_HPP_
+#define _HAILO_CNG_BUFFER_POOL_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/hailort_common.hpp"
+#include "hailo/buffer.hpp"
+#include "hailo/vdevice.hpp"
+#include "hailo/dma_mapped_buffer.hpp"
+#include "utils/thread_safe_queue.hpp"
+
+namespace hailort
+{
+
+class ServiceStreamBufferPool
+{
+public:
+    static Expected<std::shared_ptr<ServiceStreamBufferPool>> create(uint32_t vdevice_handle, size_t buffer_size,
+        size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event);
+
+    struct AllocatedMappedBuffer {
+        BufferPtr buffer;
+        DmaMappedBuffer mapped_buffer;
+    };
+
+    ServiceStreamBufferPool(size_t buffer_size, std::vector<AllocatedMappedBuffer> &&buffers,
+        SpscQueue<BufferPtr> &&m_free_buffers_queue, size_t buffers_count);
+    virtual ~ServiceStreamBufferPool() = default;
+
+    Expected<BufferPtr> acquire_buffer();
+    hailo_status return_to_pool(BufferPtr buffer);
+    size_t buffers_count();
+
+private:
+
+    size_t m_buffer_size;
+    size_t m_buffers_count;
+    std::vector<AllocatedMappedBuffer> m_buffers;
+    SpscQueue<BufferPtr> m_free_buffers_queue;
+    std::mutex m_mutex;
+};
+
+using BufferPoolPtr = std::shared_ptr<ServiceStreamBufferPool>;
+using output_name_t = std::string;
+
+// This object holds a buffer pool for each output streams of the network group.
+// It is used to pre-allocate all the buffers necessary for the reads from the device.
+// The buffers are reuseable, which also prevents allocation during inference.
+// The buffers are mapped to the device during their creation, which prevent lazy mapping each frame inference.
+// Currently only used in async API.
+class ServiceNetworkGroupBufferPool
+{
+public:
+    static Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> create(uint32_t vdevice_handle);
+
+    hailo_status allocate_pool(const std::string &name, size_t frame_size, size_t pool_size);
+    // Used in order to reallocate the pool buffers with different frame_size
+    hailo_status reallocate_pool(const std::string &name, size_t frame_size);
+
+    ServiceNetworkGroupBufferPool(ServiceNetworkGroupBufferPool &&) = delete;
+    ServiceNetworkGroupBufferPool(const ServiceNetworkGroupBufferPool &) = delete;
+    ServiceNetworkGroupBufferPool &operator=(ServiceNetworkGroupBufferPool &&) = delete;
+    ServiceNetworkGroupBufferPool &operator=(const ServiceNetworkGroupBufferPool &) = delete;
+    virtual ~ServiceNetworkGroupBufferPool() = default;
+
+    ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle);
+    Expected<BufferPtr> acquire_buffer(const std::string &output_name);
+    hailo_status return_to_pool(const std::string &output_name, BufferPtr buffer);
+    hailo_status shutdown();
+
+private:
+    std::unordered_map<output_name_t, BufferPoolPtr> m_output_name_to_buffer_pool;
+    EventPtr m_shutdown_event;
+    uint32_t m_vdevice_handle;
+    std::mutex m_mutex;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_CNG_BUFFER_POOL_HPP_ */
diff --git a/hailort/hailort_service/hailort_rpc_service.cpp b/hailort/hailort_service/hailort_rpc_service.cpp
index 482c93a7..da67ac08 100644
--- a/hailort/hailort_service/hailort_rpc_service.cpp
+++ b/hailort/hailort_service/hailort_rpc_service.cpp
@@ -16,20 +16,23 @@
 #include "common/os_utils.hpp"
 
 #include "hailort_rpc_service.hpp"
+#include "cng_buffer_pool.hpp"
 #include "rpc/rpc_definitions.hpp"
 #include "service_resource_manager.hpp"
-#include "net_flow/ops/op_metadata.hpp"
-#include "net_flow/ops/nms_post_process.hpp"
-#include "net_flow/ops/yolov8_post_process.hpp"
-#include "net_flow/ops/ssd_post_process.hpp"
-#include "net_flow/ops/yolox_post_process.hpp"
-#include "net_flow/ops/yolov5_op_metadata.hpp"
-#include "net_flow/ops/yolov5_seg_op_metadata.hpp"
+#include "net_flow/ops_metadata/op_metadata.hpp"
+#include "net_flow/ops_metadata/nms_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov8_op_metadata.hpp"
+#include "net_flow/ops_metadata/ssd_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolox_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov5_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp"
 
 #include "hef/layer_info.hpp"
 
 #include <thread>
 
+
+#define MAX_GRPC_BUFFER_SIZE (2ULL * 1024 * 1024 * 1024) // 2GB
 namespace hailort
 {
 
@@ -97,7 +100,7 @@ void HailoRtRpcService::remove_disconnected_clients()
     auto now = std::chrono::high_resolution_clock::now();
     std::set<uint32_t> pids_to_remove;
     {
-        std::unique_lock<std::mutex> lock(m_mutex);
+        std::unique_lock<std::mutex> lock(m_keep_alive_mutex);
         for (auto pid_to_last_alive : m_clients_pids) {
             auto duration = std::chrono::duration_cast<std::chrono::seconds>(now - pid_to_last_alive.second);
             if (duration > hailort::HAILO_KEEPALIVE_INTERVAL) {
@@ -133,7 +136,7 @@ void HailoRtRpcService::keep_alive()
 
 void HailoRtRpcService::update_client_id_timestamp(uint32_t pid)
 {
-    std::unique_lock<std::mutex> lock(m_mutex);
+    std::unique_lock<std::mutex> lock(m_keep_alive_mutex);
     m_clients_pids[pid] = std::chrono::high_resolution_clock::now();
 }
 
@@ -186,7 +189,7 @@ grpc::Status HailoRtRpcService::VDevice_create(grpc::ServerContext *, const VDev
     CHECK_EXPECTED_AS_RPC_STATUS(vdevice, reply);
 
     update_client_id_timestamp(request->pid());
-    std::unique_lock<std::mutex> lock(m_vdevice_creation_mutex);
+    std::unique_lock<std::mutex> lock(m_vdevice_mutex);
     auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
     auto vdevice_handle = vdevice_manager.register_resource(request->pid(), std::move(vdevice.release()));
 
@@ -271,6 +274,7 @@ grpc::Status HailoRtRpcService::VDevice_configure(grpc::ServerContext*, const VD
     }
 
     update_client_id_timestamp(request->pid());
+    std::unique_lock<std::mutex> lock(m_vdevice_mutex);
     auto lambda = [](std::shared_ptr<VDevice> vdevice, Hef &hef, NetworkGroupsParamsMap &configure_params_map) {
         return vdevice->configure(hef, configure_params_map);
     };
@@ -281,14 +285,57 @@ grpc::Status HailoRtRpcService::VDevice_configure(grpc::ServerContext*, const VD
 
     auto &networks_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     for (auto network : networks.value()) {
-        auto handle = networks_manager.register_resource(request->pid(), network);
-        reply->add_networks_handles(handle);
+        auto ng_handle = networks_manager.register_resource(request->pid(), network);
+        reply->add_networks_handles(ng_handle);
+
+        bool allocate_for_raw_streams = false;
+        // The network_group's buffer pool is used for the read's buffers,
+        // On async flow - we allocate for raw-streams. This way they are already pre-allocated and mapped to the device
+        if ((configure_params_map.size() > 0) &&
+            (configure_params_map.begin()->second.stream_params_by_name.begin()->second.flags == HAILO_STREAM_FLAGS_ASYNC)) {
+            // We assume that if 1 stream is marked as ASYNC, they all are
+            allocate_for_raw_streams = true;
+        }
+        auto status = create_buffer_pools_for_ng(request->identifier().vdevice_handle(), ng_handle, request->pid(), allocate_for_raw_streams);
+        CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
     }
 
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
 
+hailo_status HailoRtRpcService::create_buffer_pools_for_ng(uint32_t vdevice_handle, uint32_t ng_handle, uint32_t request_pid,
+    bool allocate_for_raw_streams)
+{
+    auto cng_buffer_pool = ServiceNetworkGroupBufferPool::create(vdevice_handle);
+    CHECK_EXPECTED_AS_STATUS(cng_buffer_pool);
+
+    auto &cng_buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
+    auto cng_buffer_pool_handle = cng_buffer_pool_manager.register_resource(request_pid, cng_buffer_pool.release());
+    CHECK(cng_buffer_pool_handle == ng_handle, HAILO_INTERNAL_FAILURE,
+        "cng_buffer_pool_handle = {} must be equal to network_group_handle ={}", cng_buffer_pool_handle, ng_handle);
+
+    if (allocate_for_raw_streams) {
+        // For Async API - The buffer size in the pool will be the stream's hw frame size as used in the infer_model pipeline
+        auto min_buffer_pool_size = get_min_buffer_pool_size(ng_handle);
+        CHECK_EXPECTED_AS_STATUS(min_buffer_pool_size);
+
+        auto streams_infos = get_all_stream_infos(ng_handle);
+        CHECK_EXPECTED_AS_STATUS(streams_infos);
+
+        for (const auto &stream_info : streams_infos.value()) {
+            if (stream_info.direction == HAILO_D2H_STREAM) {
+                auto allocate_lambda = [&](std::shared_ptr<ServiceNetworkGroupBufferPool> cng_buffer_pool) {
+                    return cng_buffer_pool->allocate_pool(stream_info.name, stream_info.hw_frame_size, min_buffer_pool_size.value());
+                };
+                CHECK_SUCCESS(cng_buffer_pool_manager.execute(ng_handle, allocate_lambda));
+            }
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
 grpc::Status HailoRtRpcService::VDevice_get_physical_devices_ids(grpc::ServerContext*,
     const VDevice_get_physical_devices_ids_Request* request, VDevice_get_physical_devices_ids_Reply* reply)
 {
@@ -370,96 +417,192 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_dup_handle(grpc::ServerCo
     return grpc::Status::OK;
 }
 
+ProtoCallbackIdentifier serialize_callback_identifier(uint32_t vdevice_handle, uint32_t ng_handle,
+    callback_type_t cb_type, const std::string &stream_name, uint32_t cb_idx,  hailo_status status, BufferPtr buffer = nullptr)
+{
+    ProtoCallbackIdentifier cb_identifier;
+    cb_identifier.set_vdevice_handle(vdevice_handle);
+    cb_identifier.set_network_group_handle(ng_handle);
+    cb_identifier.set_cb_type(cb_type);
+    cb_identifier.set_stream_name(stream_name);
+    cb_identifier.set_cb_idx(cb_idx);
+    cb_identifier.set_status(status);
+    if (buffer != nullptr) {
+        cb_identifier.set_data(buffer->data(), buffer->size());
+    }
+
+    return cb_identifier;
+}
+
 grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_release(grpc::ServerContext*, const Release_Request *request,
     Release_Reply *reply)
 {
+    auto buffer_shutdown_lambda = [](std::shared_ptr<ServiceNetworkGroupBufferPool> cng_buffer_pool) {
+        return cng_buffer_pool->shutdown();
+    };
+
+    auto &buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
+    auto status = buffer_pool_manager.execute(request->network_group_identifier().network_group_handle(), buffer_shutdown_lambda);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+    buffer_pool_manager.release_resource(request->network_group_identifier().network_group_handle(), request->pid());
+
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     manager.release_resource(request->network_group_identifier().network_group_handle(), request->pid());
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
 
-grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_infer_async(grpc::ServerContext*,
-    const ConfiguredNetworkGroup_infer_async_Request *request, ConfiguredNetworkGroup_infer_async_Reply *reply)
+hailo_status HailoRtRpcService::add_input_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request,
+    uint32_t vdevice_handle, uint32_t ng_handle, std::shared_ptr<ConfiguredNetworkGroup_infer_async_Request> infer_async_request,
+    NamedBuffersCallbacks &named_buffers_callbacks)
 {
-    auto vdevice_handle = request->identifier().vdevice_handle();
-    auto ng_handle = request->identifier().network_group_handle();
-    auto infer_request_done_cb_idx = request->infer_request_done_cb_idx();
+    // Prepare input buffer
+    BufferPtr buffer;
+    MemoryView mem_view;
+    auto *data = reinterpret_cast<const uint8_t*>(proto_stream_transfer_request.data().c_str());
+    if (reinterpret_cast<size_t>(data) % HailoRTCommon::HW_DATA_ALIGNMENT == 0) {
+        // Input buffers is aligned to 8
+        mem_view = MemoryView::create_const(data, proto_stream_transfer_request.data().size());
+    } else {
+        // The memory is not aligned to 8, therefore we need to copy the data into a buffer
+        auto buffer_exp = Buffer::create_shared(data, proto_stream_transfer_request.data().size(),
+            BufferStorageParams::create_dma());
+        CHECK_EXPECTED(buffer_exp);
+        buffer = buffer_exp.release();
+        mem_view = MemoryView(*buffer);
+    }
+
+    // Preparing callback
+    auto &stream_name = proto_stream_transfer_request.stream_name();
+    auto cb_idx = proto_stream_transfer_request.cb_idx();
+    std::function<void(hailo_status)> transfer_done = [this, vdevice_handle, ng_handle, cb_idx, stream_name, buffer, infer_async_request]
+        (hailo_status status)
+    {
+        // We pass the request (which is shared_ptr) to the callback in order to keep the input's memory alive until inference is done.
+        (void)infer_async_request;
+        (void)buffer;
+
+        auto cb_identifier = serialize_callback_identifier(vdevice_handle, ng_handle, CALLBACK_TYPE_TRANSFER,
+            stream_name, cb_idx, status);
+        enqueue_cb_identifier(vdevice_handle, std::move(cb_identifier));
+    };
+
+    named_buffers_callbacks.emplace(stream_name, std::make_pair(mem_view, transfer_done));
+    return HAILO_SUCCESS;
+}
+
+hailo_status HailoRtRpcService::add_output_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request, uint32_t vdevice_handle,
+    uint32_t ng_handle, NamedBuffersCallbacks &named_buffers_callbacks)
+{
+    // Prepare output buffer
+    auto &stream_name = proto_stream_transfer_request.stream_name();
+    auto buffer_exp = acquire_buffer_from_cng_pool(ng_handle, stream_name);
+    CHECK_EXPECTED(buffer_exp);
+    auto buffer = buffer_exp.release();
+
+    // Prepare callback
+    auto cb_idx = proto_stream_transfer_request.cb_idx();
+    std::function<void(hailo_status)> transfer_done = [this, vdevice_handle, ng_handle, cb_idx, stream_name, buffer]
+        (hailo_status status)
+    {
+        auto cb_identifier = serialize_callback_identifier(vdevice_handle, ng_handle, CALLBACK_TYPE_TRANSFER,
+            stream_name, cb_idx, status, buffer);
+        return_buffer_to_cng_pool(ng_handle, stream_name, buffer);
+        enqueue_cb_identifier(vdevice_handle, std::move(cb_identifier));
+    };
+
+    named_buffers_callbacks.emplace(stream_name, std::make_pair(MemoryView(*buffer), transfer_done));
+    return HAILO_SUCCESS;
+}
 
+Expected<NamedBuffersCallbacks> HailoRtRpcService::prepare_named_buffers_callbacks(uint32_t vdevice_handle,
+    uint32_t ng_handle, std::shared_ptr<ConfiguredNetworkGroup_infer_async_Request> infer_async_request)
+{
     NamedBuffersCallbacks named_buffers_callbacks;
-    for (const auto &proto_transfer_request : request->transfer_requests()) {
-        auto &stream_name = proto_transfer_request.stream_name();
-        auto direction = proto_transfer_request.direction();
-        auto cb_idx = proto_transfer_request.cb_idx();
-        BufferPtr buffer;
+    for (const auto &proto_stream_transfer_request : infer_async_request->transfer_requests()) {
+        auto direction = proto_stream_transfer_request.direction();
+        auto status = HAILO_SUCCESS;
         if (direction == HAILO_H2D_STREAM) {
-            // TODO: Remove memcpy after HRT-12238
-            auto buffer_exp = Buffer::create_shared(reinterpret_cast<const uint8_t*>(proto_transfer_request.data().c_str()),
-                proto_transfer_request.size(), BufferStorageParams::create_dma());
-            CHECK_EXPECTED_AS_RPC_STATUS(buffer_exp, reply);
-            buffer = buffer_exp.release();
+            status = add_input_named_buffer(proto_stream_transfer_request, vdevice_handle, ng_handle, infer_async_request, named_buffers_callbacks);
         } else {
-            // TODO: HRT-12360 - Use buffer pool for the service reads
-            auto buffer_exp = Buffer::create_shared(proto_transfer_request.size(), BufferStorageParams::create_dma());
-            CHECK_EXPECTED_AS_RPC_STATUS(buffer_exp, reply);
-            buffer = buffer_exp.release();
+            status = add_output_named_buffer(proto_stream_transfer_request, vdevice_handle, ng_handle, named_buffers_callbacks);
         }
+        CHECK_SUCCESS_AS_EXPECTED(status);
+    }
 
-        std::function<void(hailo_status)> transfer_done = [vdevice_handle, ng_handle, cb_idx, stream_name, direction, buffer]
-            (hailo_status status)
-        {
-            ProtoCallbackIdentifier cb_identifier;
-            cb_identifier.set_vdevice_handle(vdevice_handle);
-            cb_identifier.set_network_group_handle(ng_handle);
-            cb_identifier.set_cb_type(CALLBACK_TYPE_TRANSFER);
-            cb_identifier.set_stream_name(stream_name);
-            cb_identifier.set_cb_idx(cb_idx);
-            cb_identifier.set_status(status);
-
-            auto lambda = [direction](std::shared_ptr<VDeviceCallbacksQueue> cb_queue, ProtoCallbackIdentifier &cb_identifier, BufferPtr buffer) {
-                if (direction == HAILO_D2H_STREAM) {
-                    cb_identifier.set_data(buffer->data(), buffer->size());
-                }
-                return cb_queue->enqueue(std::move(cb_identifier));
-            };
+    return named_buffers_callbacks;
+}
 
-            auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
-            auto exc_status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier), buffer);
-            if (exc_status != HAILO_SUCCESS) {
-                LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status);
-            }
-        };
-        named_buffers_callbacks.emplace(stream_name, std::make_pair(MemoryView(*buffer), transfer_done));
+void HailoRtRpcService::enqueue_cb_identifier(uint32_t vdevice_handle, ProtoCallbackIdentifier &&cb_identifier)
+{
+    auto lambda = [](std::shared_ptr<VDeviceCallbacksQueue> cb_queue, ProtoCallbackIdentifier &cb_identifier) {
+        return cb_queue->enqueue(std::move(cb_identifier));
+    };
+
+    auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
+    auto status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier));
+    if (status != HAILO_SUCCESS) {
+        LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status);
     }
+}
 
-    auto infer_request_done_cb = [vdevice_handle, ng_handle, infer_request_done_cb_idx](hailo_status status){
-        ProtoCallbackIdentifier cb_identifier;
-        cb_identifier.set_vdevice_handle(vdevice_handle);
-        cb_identifier.set_network_group_handle(ng_handle);
-        cb_identifier.set_cb_type(CALLBACK_TYPE_INFER_REQUEST);
-        cb_identifier.set_cb_idx(infer_request_done_cb_idx);
+hailo_status HailoRtRpcService::return_buffer_to_cng_pool(uint32_t ng_handle, const std::string &output_name, BufferPtr buffer)
+{
+    auto &cng_buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
+    auto lambda_return_to_pool = [](std::shared_ptr<ServiceNetworkGroupBufferPool> cng_buffer_pool,
+        const std::string &stream_name, BufferPtr buffer) {
+        return cng_buffer_pool->return_to_pool(stream_name, buffer);
+    };
+    auto status = cng_buffer_pool_manager.execute(ng_handle, lambda_return_to_pool,
+        output_name, buffer);
+    CHECK_SUCCESS(status);
 
-        auto lambda = [](std::shared_ptr<VDeviceCallbacksQueue> cb_queue, ProtoCallbackIdentifier &cb_identifier) {
-            return cb_queue->enqueue(std::move(cb_identifier));
-        };
+    return HAILO_SUCCESS;
+}
 
-        auto &cb_queue_manager = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance();
-        auto exc_status = cb_queue_manager.execute(vdevice_handle, lambda, std::move(cb_identifier));
-        if (exc_status != HAILO_SUCCESS) {
-            LOGGER__ERROR("Failed to enqueue callback to VDeviceCallbacksQueue with status={}", status);
-        }
+Expected<BufferPtr> HailoRtRpcService::acquire_buffer_from_cng_pool(uint32_t ng_handle, const std::string &output_name)
+{
+    auto &cng_buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
+    auto lambda_acquire_buffer = [](std::shared_ptr<ServiceNetworkGroupBufferPool> cng_buffer_pool, const std::string &output_name) {
+        return cng_buffer_pool->acquire_buffer(output_name);
     };
+    auto buffer = cng_buffer_pool_manager.execute<Expected<BufferPtr>>(ng_handle, lambda_acquire_buffer, output_name);
+    CHECK_EXPECTED(buffer);
+    return buffer.release();
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_infer_async(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_infer_async_Request *raw_request, ConfiguredNetworkGroup_infer_async_Reply *reply)
+{
+    // Moving ownership of the request, so we can use the request's memory as the input buffers instead of allocating new memory for it.
+    auto request = make_shared_nothrow<ConfiguredNetworkGroup_infer_async_Request>(std::move(*raw_request));
 
+    auto vdevice_handle = request->identifier().vdevice_handle();
+    auto ng_handle = request->identifier().network_group_handle();
+    auto infer_request_done_cb_idx = request->infer_request_done_cb_idx();
+
+    // Prepare buffers
+    auto named_buffers_callbacks = prepare_named_buffers_callbacks(vdevice_handle, ng_handle, request);
+    CHECK_EXPECTED_AS_RPC_STATUS(named_buffers_callbacks, reply);
+
+    // Prepare request finish callback
+    auto infer_request_done_cb = [this, vdevice_handle, ng_handle, infer_request_done_cb_idx](hailo_status status) {
+        auto cb_identifier = serialize_callback_identifier(vdevice_handle, ng_handle, CALLBACK_TYPE_INFER_REQUEST,
+            "", infer_request_done_cb_idx, status);
+        enqueue_cb_identifier(vdevice_handle, std::move(cb_identifier));
+    };
+
+    // Run infer async
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, NamedBuffersCallbacks &named_buffers_callbacks,
         const std::function<void(hailo_status)> &infer_request_done_cb) {
             return cng->infer_async(named_buffers_callbacks, infer_request_done_cb);
     };
 
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto status = manager.execute(request->identifier().network_group_handle(), lambda, named_buffers_callbacks, infer_request_done_cb);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    auto status = manager.execute(request->identifier().network_group_handle(), lambda, named_buffers_callbacks.release(), infer_request_done_cb);
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("User aborted inference");
-        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORTED_BY_USER));
+        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORT));
         return grpc::Status::OK;
     }
     CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
@@ -629,7 +772,7 @@ void serialize_vstream_info(const hailo_vstream_info_t &info, ProtoVStreamInfo *
         auto nms_shape_proto = info_proto->mutable_nms_shape();
         nms_shape_proto->set_number_of_classes(info.nms_shape.number_of_classes);
         nms_shape_proto->set_max_bbox_per_class(info.nms_shape.max_bboxes_per_class);
-        nms_shape_proto->set_max_mask_size(info.nms_shape.max_mask_size);
+        nms_shape_proto->set_max_accumulated_mask_size(info.nms_shape.max_accumulated_mask_size);
     } else {
         auto shape_proto = info_proto->mutable_shape();
         shape_proto->set_height(info.shape.height);
@@ -918,6 +1061,7 @@ void serialize_yolov5seg_op_metadata(hailort::net_flow::OpMetadata &op_metadata,
 
     yolov5seg_config_proto->set_mask_threshold(yolov5seg_config.mask_threshold);
     yolov5seg_config_proto->set_layer_name(yolov5seg_config.proto_layer_name);
+    yolov5seg_config_proto->set_max_accumulated_mask_size(yolov5seg_config.max_accumulated_mask_size);
 }
 
 void serialize_op_matadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMetadata *op_metadata_proto)
@@ -1026,12 +1170,7 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_vstream_infos(grp
     const ConfiguredNetworkGroup_get_vstream_infos_Request *request,
     ConfiguredNetworkGroup_get_vstream_infos_Reply *reply)
 {
-    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, std::string network_name) {
-        return cng->get_all_vstream_infos(network_name);
-    };
-    auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto expected_vstream_infos = net_group_manager.execute<Expected<std::vector<hailo_vstream_info_t>>>(
-        request->identifier().network_group_handle(), lambda, request->network_name());
+    auto expected_vstream_infos = get_all_vstream_infos(request->identifier().network_group_handle());
     CHECK_EXPECTED_AS_RPC_STATUS(expected_vstream_infos, reply);
 
     serialize_vstream_infos(reply, expected_vstream_infos.value());
@@ -1170,7 +1309,6 @@ grpc::Status HailoRtRpcService::InputVStreams_create(grpc::ServerContext *, cons
     auto &net_group_manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
     net_group_manager.dup_handle(network_group_handle, client_pid);
 
-
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::map<std::string, hailo_vstream_params_t> &inputs_params) {
         return cng->create_input_vstreams(inputs_params);
     };
@@ -1178,13 +1316,12 @@ grpc::Status HailoRtRpcService::InputVStreams_create(grpc::ServerContext *, cons
     CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply);
     auto vstreams = vstreams_expected.release();
 
-    auto &manager = ServiceResourceManager<InputVStream>::get_instance();
+    auto &vstreams_manager = ServiceResourceManager<InputVStream>::get_instance();
     for (size_t i = 0; i < vstreams.size(); i++) {
-        auto handle = manager.register_resource(client_pid, make_shared_nothrow<InputVStream>(std::move(vstreams[i])));
+        auto handle = vstreams_manager.register_resource(client_pid, make_shared_nothrow<InputVStream>(std::move(vstreams[i])));
         reply->add_handles(handle);
     }
 
-
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1237,13 +1374,19 @@ grpc::Status HailoRtRpcService::OutputVStreams_create(grpc::ServerContext *, con
     CHECK_EXPECTED_AS_RPC_STATUS(vstreams_expected, reply);
     auto vstreams = vstreams_expected.release();
 
-    auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+    // The network_group's buffer pool is used for the read's buffers.
+    auto &cng_buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
+    auto &vstream_manager = ServiceResourceManager<OutputVStream>::get_instance();
     for (size_t i = 0; i < vstreams.size(); i++) {
-        auto handle = manager.register_resource(client_pid, make_shared_nothrow<OutputVStream>(std::move(vstreams[i])));
+        auto allocate_lambda = [&](std::shared_ptr<ServiceNetworkGroupBufferPool> cng_buffer_pool) {
+            return cng_buffer_pool->allocate_pool(vstreams[i].name(), vstreams[i].get_frame_size(), output_params.at(vstreams[i].name()).queue_size);
+        };
+        CHECK_SUCCESS_AS_RPC_STATUS(cng_buffer_pool_manager.execute(network_group_handle, allocate_lambda), reply);
+
+        auto handle = vstream_manager.register_resource(client_pid, make_shared_nothrow<OutputVStream>(std::move(vstreams[i])));
         reply->add_handles(handle);
     }
 
-
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1292,16 +1435,17 @@ grpc::Status HailoRtRpcService::InputVStream_is_multi_planar(grpc::ServerContext
 grpc::Status HailoRtRpcService::InputVStream_write(grpc::ServerContext*, const InputVStream_write_Request *request,
         InputVStream_write_Reply *reply)
 {
-    std::vector<uint8_t> data(request->data().begin(), request->data().end());
+    MemoryView mem_view = MemoryView::create_const(reinterpret_cast<const uint8_t*>(request->data().c_str()),
+        request->data().size());
     auto lambda = [](std::shared_ptr<InputVStream> input_vstream, const MemoryView &buffer) {
         return input_vstream->write(std::move(buffer));
     };
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
-    auto status = manager.execute(request->identifier().vstream_handle(), lambda, MemoryView::create_const(data.data(), data.size()));
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, mem_view);
 
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("User aborted VStream write.");
-        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORTED_BY_USER));
+        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORT));
         return grpc::Status::OK;
     }
     CHECK_SUCCESS_AS_RPC_STATUS(status,  reply, "VStream write failed");
@@ -1315,6 +1459,7 @@ grpc::Status HailoRtRpcService::InputVStream_write_pix(grpc::ServerContext*, con
     hailo_pix_buffer_t pix_buffer = {};
     pix_buffer.index = request->index();
     pix_buffer.number_of_planes = request->number_of_planes();
+    pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;  // Service does not support other memory types
     std::vector<std::vector<uint8_t>> data_arrays;
     data_arrays.reserve(pix_buffer.number_of_planes);
     for (uint32_t i =0; i < pix_buffer.number_of_planes; i++) {
@@ -1329,9 +1474,9 @@ grpc::Status HailoRtRpcService::InputVStream_write_pix(grpc::ServerContext*, con
     auto &manager = ServiceResourceManager<InputVStream>::get_instance();
     auto status = manager.execute(request->identifier().vstream_handle(), lambda, pix_buffer);
 
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("User aborted VStream write.");
-        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORTED_BY_USER));
+        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORT));
         return grpc::Status::OK;
     }
     CHECK_SUCCESS_AS_RPC_STATUS(status,  reply, "VStream write failed");
@@ -1362,34 +1507,71 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_network_infos(grpc::S
 grpc::Status HailoRtRpcService::OutputVStream_read(grpc::ServerContext*, const OutputVStream_read_Request *request,
     OutputVStream_read_Reply *reply)
 {
-    std::vector<uint8_t> data(request->size());
+    auto ng_handle = request->identifier().network_group_handle();
+    auto vstream_name = output_vstream_name(request->identifier().vstream_handle());
+    CHECK_EXPECTED_AS_RPC_STATUS(vstream_name, reply);
+
+    auto buffer_exp = acquire_buffer_from_cng_pool(ng_handle, vstream_name.value());
+    CHECK_EXPECTED_AS_RPC_STATUS(buffer_exp, reply);
+    auto buffer = buffer_exp.release();
+
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream, MemoryView &buffer) {
         return output_vstream->read(std::move(buffer));
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto status = manager.execute(request->identifier().vstream_handle(), lambda, MemoryView(data.data(), data.size()));
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, MemoryView(buffer->data(), buffer->size()));
 
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("User aborted VStream read.");
-        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORTED_BY_USER));
+        reply->set_status(static_cast<uint32_t>(HAILO_STREAM_ABORT));
         return grpc::Status::OK;
     }
     CHECK_SUCCESS_AS_RPC_STATUS(status,  reply, "VStream read failed");
-    reply->set_data(data.data(), data.size());
+
+    if (buffer->size() > MAX_GRPC_BUFFER_SIZE) {
+        LOGGER__ERROR("Response buffer size is too big: {}. Max response size is: {}", buffer->size(), MAX_GRPC_BUFFER_SIZE);
+        reply->set_status(static_cast<uint32_t>(HAILO_RPC_FAILED));
+        return grpc::Status::OK;
+    }
+
+    reply->set_data(buffer->data(), buffer->size());
+
+    status = return_buffer_to_cng_pool(ng_handle, vstream_name.value(), buffer);
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
 
-grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_stream_infos(grpc::ServerContext*,
-    const ConfiguredNetworkGroup_get_all_stream_infos_Request *request,
-    ConfiguredNetworkGroup_get_all_stream_infos_Reply *reply)
+Expected<std::vector<hailo_stream_info_t>> HailoRtRpcService::get_all_stream_infos(uint32_t ng_handle)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
         return cng->get_all_stream_infos();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto expected_stream_infos = manager.execute<Expected<std::vector<hailo_stream_info_t>>>(request->identifier().network_group_handle(),
-        lambda);
+    auto expected_stream_infos = manager.execute<Expected<std::vector<hailo_stream_info_t>>>(ng_handle, lambda);
+    CHECK_EXPECTED(expected_stream_infos);
+
+    return expected_stream_infos.release();
+}
+
+Expected<std::vector<hailo_vstream_info_t>> HailoRtRpcService::get_all_vstream_infos(uint32_t ng_handle)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
+        return cng->get_all_vstream_infos();
+    };
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto expected_vstream_infos = manager.execute<Expected<std::vector<hailo_vstream_info_t>>>(ng_handle, lambda);
+    CHECK_EXPECTED(expected_vstream_infos);
+
+    return expected_vstream_infos.release();
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_stream_infos(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_get_all_stream_infos_Request *request,
+    ConfiguredNetworkGroup_get_all_stream_infos_Reply *reply)
+{
+    auto expected_stream_infos = get_all_stream_infos(request->identifier().network_group_handle());
     CHECK_EXPECTED_AS_RPC_STATUS(expected_stream_infos, reply);
 
     auto proto_stream_infos = reply->mutable_stream_infos();
@@ -1495,16 +1677,23 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_sorted_output_names(g
     return grpc::Status::OK;
 }
 
-grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
-    const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
-    ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply)
+Expected<size_t> HailoRtRpcService::get_min_buffer_pool_size(uint32_t ng_handle)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
         return cng->get_min_buffer_pool_size();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto min_buffer_pool_size_expected = manager.execute<Expected<size_t>>(request->identifier().network_group_handle(),
-        lambda);
+    auto min_buffer_pool_size = manager.execute<Expected<size_t>>(ng_handle, lambda);
+    CHECK_EXPECTED(min_buffer_pool_size);
+
+    return min_buffer_pool_size.release();
+}
+
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
+    ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply)
+{
+    auto min_buffer_pool_size_expected = get_min_buffer_pool_size(request->identifier().network_group_handle());
     CHECK_EXPECTED_AS_RPC_STATUS(min_buffer_pool_size_expected, reply);
 
     reply->set_min_buffer_pool_size(static_cast<uint32_t>(min_buffer_pool_size_expected.release()));
@@ -1598,6 +1787,22 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_bboxes_per_cl
     return grpc::Status::OK;
 }
 
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request *request,
+    ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &edge_name, uint32_t max_accumulated_mask_size) {
+        return cng->set_nms_max_accumulated_mask_size(edge_name, max_accumulated_mask_size);
+    };
+    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
+    auto status = manager.execute(request->identifier().network_group_handle(), lambda,
+        request->edge_name(), request->max_accumulated_mask_size());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
+
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
 grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*,
     const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request,
     ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply)
@@ -1656,11 +1861,7 @@ grpc::Status HailoRtRpcService::InputVStream_get_frame_size(grpc::ServerContext*
 grpc::Status HailoRtRpcService::OutputVStream_get_frame_size(grpc::ServerContext*, const VStream_get_frame_size_Request *request,
     VStream_get_frame_size_Reply *reply)
 {
-    auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
-        return output_vstream->get_frame_size();
-    };
-    auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto frame_size = manager.execute<Expected<size_t>>(request->identifier().vstream_handle(), lambda);
+    auto frame_size = output_vstream_frame_size(request->identifier().vstream_handle());
     CHECK_EXPECTED_AS_RPC_STATUS(frame_size, reply);
 
     reply->set_frame_size(static_cast<uint32_t>(frame_size.release()));
@@ -1692,14 +1893,34 @@ grpc::Status HailoRtRpcService::InputVStream_name(grpc::ServerContext*, const VS
     return grpc::Status::OK;
 }
 
-grpc::Status HailoRtRpcService::OutputVStream_name(grpc::ServerContext*, const VStream_name_Request *request,
-    VStream_name_Reply *reply)
+Expected<std::string> HailoRtRpcService::output_vstream_name(uint32_t vstream_handle)
 {
     auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
         return output_vstream->name();
     };
     auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
-    auto name = manager.execute<Expected<std::string>>(request->identifier().vstream_handle(), lambda);
+    auto name = manager.execute<Expected<std::string>>(vstream_handle, lambda);
+    CHECK_EXPECTED(name);
+
+    return name.release();
+}
+
+Expected<size_t> HailoRtRpcService::output_vstream_frame_size(uint32_t vstream_handle)
+{
+    auto lambda = [](std::shared_ptr<OutputVStream> output_vstream) {
+        return output_vstream->get_frame_size();
+    };
+    auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+    auto frame_size = manager.execute<Expected<size_t>>(vstream_handle, lambda);
+    CHECK_EXPECTED(frame_size);
+
+    return frame_size.release();
+}
+
+grpc::Status HailoRtRpcService::OutputVStream_name(grpc::ServerContext*, const VStream_name_Request *request,
+    VStream_name_Reply *reply)
+{
+    auto name = output_vstream_name(request->identifier().vstream_handle());
     CHECK_EXPECTED_AS_RPC_STATUS(name, reply);
 
     reply->set_name(name.release());
@@ -1971,6 +2192,23 @@ grpc::Status HailoRtRpcService::OutputVStream_set_nms_iou_threshold(grpc::Server
     return grpc::Status::OK;
 }
 
+hailo_status HailoRtRpcService::update_buffer_size_in_pool(uint32_t vstream_handle, uint32_t network_group_handle)
+{
+    auto vstream_name = output_vstream_name(vstream_handle);
+    CHECK_EXPECTED(vstream_name);
+
+    auto frame_size = output_vstream_frame_size(vstream_handle);
+    CHECK_EXPECTED(frame_size);
+
+    auto &cng_buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
+    auto allocate_lambda = [&](std::shared_ptr<ServiceNetworkGroupBufferPool> cng_buffer_pool) {
+        return cng_buffer_pool->reallocate_pool(vstream_name.release(), frame_size.release());
+    };
+    CHECK_SUCCESS(cng_buffer_pool_manager.execute(network_group_handle, allocate_lambda));
+
+    return HAILO_SUCCESS;
+}
+
 grpc::Status HailoRtRpcService::OutputVStream_set_nms_max_proposals_per_class(grpc::ServerContext*, const VStream_set_nms_max_proposals_per_class_Request *request,
     VStream_set_nms_max_proposals_per_class_Reply *reply)
 {
@@ -1981,6 +2219,26 @@ grpc::Status HailoRtRpcService::OutputVStream_set_nms_max_proposals_per_class(gr
     auto status = manager.execute(request->identifier().vstream_handle(), lambda, static_cast<uint32_t>(request->max_proposals_per_class()));
     CHECK_SUCCESS_AS_RPC_STATUS(status,  reply, "set_nms_max_proposals_per_class failed");
 
+    status = update_buffer_size_in_pool(request->identifier().vstream_handle(), request->identifier().network_group_handle());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "Updating buffer size in pool failed");
+
+    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
+    return grpc::Status::OK;
+}
+
+grpc::Status HailoRtRpcService::OutputVStream_set_nms_max_accumulated_mask_size(grpc::ServerContext*,
+    const VStream_set_nms_max_accumulated_mask_size_Request *request, VStream_set_nms_max_accumulated_mask_size_Reply *reply)
+{
+    auto lambda = [](std::shared_ptr<OutputVStream> output_vstream, uint32_t max_accumulated_mask_size) {
+        return output_vstream->set_nms_max_accumulated_mask_size(max_accumulated_mask_size);
+    };
+    auto &manager = ServiceResourceManager<OutputVStream>::get_instance();
+    auto status = manager.execute(request->identifier().vstream_handle(), lambda, static_cast<uint32_t>(request->max_accumulated_mask_size()));
+    CHECK_SUCCESS_AS_RPC_STATUS(status,  reply, "set_nms_max_accumulated_mask_size failed");
+
+    status = update_buffer_size_in_pool(request->identifier().vstream_handle(), request->identifier().network_group_handle());
+    CHECK_SUCCESS_AS_RPC_STATUS(status, reply, "Updating buffer size in pool failed");
+
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
diff --git a/hailort/hailort_service/hailort_rpc_service.hpp b/hailort/hailort_service/hailort_rpc_service.hpp
index 0531e533..5e022cc3 100644
--- a/hailort/hailort_service/hailort_rpc_service.hpp
+++ b/hailort/hailort_service/hailort_rpc_service.hpp
@@ -123,6 +123,8 @@ class HailoRtRpcService final : public ProtoHailoRtRpc::Service {
         const VStream_set_nms_iou_threshold_Request *request, VStream_set_nms_iou_threshold_Reply*) override;
     virtual grpc::Status OutputVStream_set_nms_max_proposals_per_class(grpc::ServerContext *ctx,
         const VStream_set_nms_max_proposals_per_class_Request *request, VStream_set_nms_max_proposals_per_class_Reply*) override;
+    virtual grpc::Status OutputVStream_set_nms_max_accumulated_mask_size(grpc::ServerContext *ctx,
+        const VStream_set_nms_max_accumulated_mask_size_Request *request, VStream_set_nms_max_accumulated_mask_size_Reply*) override;
 
     virtual grpc::Status ConfiguredNetworkGroup_dup_handle(grpc::ServerContext *ctx, const ConfiguredNetworkGroup_dup_handle_Request *request,
         ConfiguredNetworkGroup_dup_handle_Reply*) override;
@@ -206,6 +208,9 @@ class HailoRtRpcService final : public ProtoHailoRtRpc::Service {
     virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(grpc::ServerContext*,
         const ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request *request,
         ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request *request,
+        ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply *reply) override;
     virtual grpc::Status ConfiguredNetworkGroup_get_stream_names_from_vstream_name(grpc::ServerContext*,
         const ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request *request,
         ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply *reply) override;
@@ -224,12 +229,30 @@ class HailoRtRpcService final : public ProtoHailoRtRpc::Service {
     void abort_vstreams_by_pids(std::set<uint32_t> &pids);
     void remove_disconnected_clients();
     void update_client_id_timestamp(uint32_t pid);
+    Expected<size_t> get_min_buffer_pool_size(uint32_t ng_handle);
+    Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(uint32_t ng_handle);
+    Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(uint32_t ng_handle);
+    Expected<std::string> output_vstream_name(uint32_t vstream_handle);
+    hailo_status create_buffer_pools_for_ng(uint32_t vdevice_handle, uint32_t ng_handle, uint32_t request_pid,
+        bool allocate_for_raw_streams);
+    Expected<NamedBuffersCallbacks> prepare_named_buffers_callbacks(uint32_t vdevice_handle,
+        uint32_t ng_handle, std::shared_ptr<ConfiguredNetworkGroup_infer_async_Request> infer_async_request);
+    hailo_status add_input_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request, uint32_t vdevice_handle,
+        uint32_t ng_handle, std::shared_ptr<ConfiguredNetworkGroup_infer_async_Request> infer_async_request,
+        NamedBuffersCallbacks &named_buffers_callbacks);
+    hailo_status add_output_named_buffer(const ProtoTransferRequest &proto_stream_transfer_request, uint32_t vdevice_handle,
+        uint32_t ng_handle, NamedBuffersCallbacks &named_buffers_callbacks);
+    void enqueue_cb_identifier(uint32_t vdevice_handle, ProtoCallbackIdentifier &&cb_identifier);
+    hailo_status return_buffer_to_cng_pool(uint32_t ng_handle, const std::string &output_name, BufferPtr buffer);
+    Expected<BufferPtr> acquire_buffer_from_cng_pool(uint32_t ng_handle, const std::string &output_name);
+    Expected<size_t> output_vstream_frame_size(uint32_t vstream_handle);
+    hailo_status update_buffer_size_in_pool(uint32_t vstream_handle, uint32_t network_group_handle);
 
-    std::mutex m_mutex;
+    std::mutex m_keep_alive_mutex;
     std::map<uint32_t, std::chrono::time_point<std::chrono::high_resolution_clock>> m_clients_pids;
     std::unique_ptr<std::thread> m_keep_alive;
 
-    std::mutex m_vdevice_creation_mutex;
+    std::mutex m_vdevice_mutex;
 };
 
 }
diff --git a/hailort/hailortcli/CMakeLists.txt b/hailort/hailortcli/CMakeLists.txt
index 4f05b5da..8b180dad 100644
--- a/hailort/hailortcli/CMakeLists.txt
+++ b/hailort/hailortcli/CMakeLists.txt
@@ -72,6 +72,7 @@ target_link_libraries(hailortcli
     nlohmann_json
     spdlog::spdlog
     readerwriterqueue
+    eigen
     DotWriter
     scheduler_mon_proto
     profiler_proto)
diff --git a/hailort/hailortcli/benchmark_command.cpp b/hailort/hailortcli/benchmark_command.cpp
index 7c3d1c18..57fa385f 100644
--- a/hailort/hailortcli/benchmark_command.cpp
+++ b/hailort/hailortcli/benchmark_command.cpp
@@ -98,10 +98,10 @@ hailo_status BenchmarkCommand::execute()
         std::cout << "FPS     (hw_only)                 = " << hw_only_res.fps().value() <<std::endl;
         std::cout << "        (streaming)               = " << streaming_res->fps().value() <<std::endl;
         if (auto hw_latency = latency_res->hw_latency()) {
-            std::cout << "Latency (hw)                      = " << InferResultsFormatUtils::latency_result_to_ms(hw_latency.value()) << " ms" << std::endl;
+            std::cout << "Latency (hw)                      = " << InferStatsPrinter::latency_result_to_ms(hw_latency.value()) << " ms" << std::endl;
         }
         if (auto overall_latency = latency_res->overall_latency()) {
-            std::cout << "        (overall)                 = " << InferResultsFormatUtils::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl;
+            std::cout << "        (overall)                 = " << InferStatsPrinter::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl;
         }
     }
     if (!m_not_measure_power) {
diff --git a/hailort/hailortcli/download_action_list_command.cpp b/hailort/hailortcli/download_action_list_command.cpp
index fdd978e7..7d57108f 100644
--- a/hailort/hailortcli/download_action_list_command.cpp
+++ b/hailort/hailortcli/download_action_list_command.cpp
@@ -411,7 +411,7 @@ Expected<ordered_json> DownloadActionListCommand::parse_single_action(uint32_t b
 }
 
 Expected<ordered_json> DownloadActionListCommand::parse_context(Device &device, uint32_t network_group_id,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, const std::string &context_name)
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, const std::string &context_name)
 {
     uint8_t converted_context_type = static_cast<uint8_t>(context_type);
     uint32_t action_list_base_address = 0;
@@ -521,7 +521,7 @@ Expected<ordered_json> DownloadActionListCommand::parse_network_group(Device &de
     network_group_json["contexts"].emplace_back(preliminary_context_json.release());
 
     const auto dynamic_contexts_count = number_of_dynamic_contexts_per_network_group.value()[network_group_id];
-    for (uint8_t context_index = 0; context_index < dynamic_contexts_count; context_index++) {
+    for (uint16_t context_index = 0; context_index < dynamic_contexts_count; context_index++) {
         auto context_json = parse_context(device, network_group_id,
             CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC, context_index,
             fmt::format("dynamic_{}", context_index));
diff --git a/hailort/hailortcli/download_action_list_command.hpp b/hailort/hailortcli/download_action_list_command.hpp
index ede0ac65..0cbb8851 100644
--- a/hailort/hailortcli/download_action_list_command.hpp
+++ b/hailort/hailortcli/download_action_list_command.hpp
@@ -61,7 +61,7 @@ class DownloadActionListCommand : public DeviceCommand
         uint32_t current_buffer_offset, uint32_t *action_length, bool *is_repeated, uint8_t *num_repeated,
         CONTEXT_SWITCH_DEFS__ACTION_TYPE_t *sub_action_type, uint32_t *time_stamp);
     static Expected<ordered_json> parse_context(Device &device, uint32_t network_group_id,
-        CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index,
+        CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index,
         const std::string &context_name);
     static double get_accumulator_mean_value(const AccumulatorPtr &accumulator, double default_value = INVALID_NUMERIC_VALUE);
     static Expected<ordered_json> parse_network_groups(Device &device, const ConfiguredNetworkGroupVector &network_groups);
diff --git a/hailort/hailortcli/graph_printer.cpp b/hailort/hailortcli/graph_printer.cpp
index f58fa9ba..2975886a 100644
--- a/hailort/hailortcli/graph_printer.cpp
+++ b/hailort/hailortcli/graph_printer.cpp
@@ -119,16 +119,16 @@ DotWriter::HtmlString PipelineGraphNode::format_runtime_stats(const std::vector<
         // We split the statistics into two lines
         std::stringstream string_stream;
         string_stream << "<B>" << accumulator->get_data_type() << ": </B>";
-        string_stream << "mean=" << InferResultsFormatUtils::format_statistic(accumulator_result.mean()) << ", ";
-        string_stream << "min=" << InferResultsFormatUtils::format_statistic(accumulator_result.min()) << ", ";
-        string_stream << "max=" << InferResultsFormatUtils::format_statistic(accumulator_result.max()) << ", ";
+        string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.mean(), "mean") << ", ";
+        string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.min(), "min") << ", ";
+        string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.max(), "max") << ", ";
         lines.emplace_back(string_stream.str());
 
         // Clear the stream and format the next line
         string_stream.str("");
-        string_stream << "var=" << InferResultsFormatUtils::format_statistic(accumulator_result.var()) << ", ";
-        string_stream << "sd=" << InferResultsFormatUtils::format_statistic(accumulator_result.sd()) << ", ";
-        string_stream << "mean_sd=" << InferResultsFormatUtils::format_statistic(accumulator_result.mean_sd());
+        string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.var(), "var") << ", ";
+        string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.sd(), "sd") << ", ";
+        string_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.mean_sd(), "mean_sd");
         lines.emplace_back(string_stream.str());
     }
 
diff --git a/hailort/hailortcli/infer_stats_printer.cpp b/hailort/hailortcli/infer_stats_printer.cpp
index 2e062578..546c8cf4 100644
--- a/hailort/hailortcli/infer_stats_printer.cpp
+++ b/hailort/hailortcli/infer_stats_printer.cpp
@@ -30,27 +30,7 @@ static std::string infer_mode_to_string(InferMode infer_mode)
     }
 }
 
-std::string InferResultsFormatUtils::format_statistic(const Expected<double> &statistic, uint32_t precision)
-{
-    if (!statistic.has_value()) {
-        return "-";
-    }
-
-    std::stringstream string_stream;
-    string_stream << std::fixed << std::setprecision(precision) << statistic.value();
-    return string_stream.str();
-}
-
-std::string InferResultsFormatUtils::format_statistic(const Expected<size_t> &statistic)
-{
-    if (!statistic.has_value()) {
-        return "-";
-    }
-
-    return std::to_string(statistic.value());
-}
-
-double InferResultsFormatUtils::latency_result_to_ms(std::chrono::nanoseconds latency)
+double InferStatsPrinter::latency_result_to_ms(std::chrono::nanoseconds latency)
 {
     return std::chrono::duration_cast<std::chrono::duration<double, std::milli>>(latency).count();
 }
@@ -172,12 +152,12 @@ void InferStatsPrinter::print_csv(const std::vector<std::string> &network_groups
             m_results_csv_file << ",";
 
             if (auto hw_latency = results.hw_latency()) {
-                m_results_csv_file << InferResultsFormatUtils::latency_result_to_ms(hw_latency.value());
+                m_results_csv_file << InferStatsPrinter::latency_result_to_ms(hw_latency.value());
             }
             m_results_csv_file << ",";
 
             if (auto overall_latency = results.overall_latency()) {
-                m_results_csv_file << InferResultsFormatUtils::latency_result_to_ms(overall_latency.value());
+                m_results_csv_file << InferStatsPrinter::latency_result_to_ms(overall_latency.value());
             }
 
             // TODO HRT-5363 support multiple devices (Currently assumes 1 device in the map)
@@ -327,12 +307,12 @@ void InferStatsPrinter::print_benchmark_csv(InferResult &hw_inference_result,
         m_results_csv_file << ",";
 
         if (auto hw_latency = latency_res->hw_latency()) {
-            m_results_csv_file << InferResultsFormatUtils::latency_result_to_ms(hw_latency.value());
+            m_results_csv_file << InferStatsPrinter::latency_result_to_ms(hw_latency.value());
         }
         m_results_csv_file << ",";
 
         if (auto overall_latency = latency_res->overall_latency()) {
-            m_results_csv_file << InferResultsFormatUtils::latency_result_to_ms(overall_latency.value());
+            m_results_csv_file << InferStatsPrinter::latency_result_to_ms(overall_latency.value());
         }
 
         // TODO HRT-5363 support multiple devices (Currently assumes 1 device in the map)
@@ -378,11 +358,11 @@ void InferStatsPrinter::print_stdout_single_element(const T &results, size_t fra
     }
 
     if (auto hw_latency = results.hw_latency()) {
-        std::cout << "    HW Latency: " << InferResultsFormatUtils::latency_result_to_ms(hw_latency.value()) << " ms" << std::endl;
+        std::cout << "    HW Latency: " << InferStatsPrinter::latency_result_to_ms(hw_latency.value()) << " ms" << std::endl;
     }
 
     if (auto overall_latency = results.overall_latency()) {
-        std::cout << "    Overall Latency: " << InferResultsFormatUtils::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl;
+        std::cout << "    Overall Latency: " << InferStatsPrinter::latency_result_to_ms(overall_latency.value()) << " ms" << std::endl;
     }
 
 }
@@ -489,12 +469,12 @@ void InferStatsPrinter::write_accumulator_results(std::ofstream &output_stream,
     output_stream << vstream_name << ",";
     output_stream << accumulator->get_data_type() << ",";
     output_stream << elem_name << ",";
-    output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.mean()) << ",";
-    output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.min()) << ",";
-    output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.max()) << ",";
-    output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.var()) << ",";
-    output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.sd()) << ",";
-    output_stream << InferResultsFormatUtils::format_statistic(accumulator_result.mean_sd()) << ",";
+    output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.mean()) << ",";
+    output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.min()) << ",";
+    output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.max()) << ",";
+    output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.var()) << ",";
+    output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.sd()) << ",";
+    output_stream << AccumulatorResultsHelper::format_statistic(accumulator_result.mean_sd()) << ",";
     if (NO_INDEX != index) {
         output_stream << index;
     }
diff --git a/hailort/hailortcli/infer_stats_printer.hpp b/hailort/hailortcli/infer_stats_printer.hpp
index ac0e9503..0d28c4eb 100644
--- a/hailort/hailortcli/infer_stats_printer.hpp
+++ b/hailort/hailortcli/infer_stats_printer.hpp
@@ -15,19 +15,9 @@
 
 #include <limits>
 
-class InferResultsFormatUtils final {
-public:
-    InferResultsFormatUtils() = delete;
-
-    static const uint32_t DEFAULT_FLOATING_POINT_PRECISION = 4;
-
-    static std::string format_statistic(const Expected<double> &statistic, uint32_t precision = DEFAULT_FLOATING_POINT_PRECISION);
-    static std::string format_statistic(const Expected<size_t> &statistic);
-    static double latency_result_to_ms(std::chrono::nanoseconds latency);
-};
-
 class InferStatsPrinter final {
 public:
+    static double latency_result_to_ms(std::chrono::nanoseconds latency);
     static Expected<InferStatsPrinter> create(const inference_runner_params &params, bool print_running_info = true);
     void print(const std::vector<std::string> &network_groups_names, Expected<InferResult> &inference_result);
     void print_benchmark_csv(InferResult &hw_inference_result,
diff --git a/hailort/hailortcli/inference_progress.cpp b/hailort/hailortcli/inference_progress.cpp
index ee7d6d05..f514ee0b 100644
--- a/hailort/hailortcli/inference_progress.cpp
+++ b/hailort/hailortcli/inference_progress.cpp
@@ -142,7 +142,7 @@ std::string NetworkProgressBar::get_progress_text()
         double avg_hw_latency = 0;
         auto latency_expected = m_configured_network_group->get_latency_measurement(m_network_name);
         if (latency_expected) {
-            avg_hw_latency = InferResultsFormatUtils::latency_result_to_ms(latency_expected.release().avg_hw_latency);
+            avg_hw_latency = InferStatsPrinter::latency_result_to_ms(latency_expected.release().avg_hw_latency);
         }
 
         if (avg_hw_latency > 0) {
diff --git a/hailort/hailortcli/run2/io_wrappers.hpp b/hailort/hailortcli/run2/io_wrappers.hpp
index 7a0f1da8..f3337ab2 100644
--- a/hailort/hailortcli/run2/io_wrappers.hpp
+++ b/hailort/hailortcli/run2/io_wrappers.hpp
@@ -16,12 +16,15 @@
 #include "common/file_utils.hpp"
 #include "common/latency_meter.hpp"
 
+#include "hailo/dma_mapped_buffer.hpp"
+
 #include <chrono>
 #include <string>
 
 using namespace hailort;
 
 constexpr uint32_t UNLIMITED_FRAMERATE = 0;
+constexpr size_t   AMOUNT_OF_OUTPUT_BUFFERS_SYNC_API = 1;
 
 #ifndef HAILO_EMULATOR
 constexpr std::chrono::milliseconds HAILORTCLI_DEFAULT_TIMEOUT(HAILO_DEFAULT_VSTREAM_TIMEOUT_MS);
@@ -44,21 +47,27 @@ class FramerateThrottle final
 };
 
 // Wrapper for InputStream or InputVStream objects.
-// We use std::enable_from_this because on async api the callback is using `this`. We want to increase the reference
-// count until the callback is over.
+// We use std::enable_from_this because on async api, we want to increase the ref count of this object until the
+// callback is called. It can happen since network_group->shutdown() may be called after this object is being
+// destructed.
 template<typename Writer>
 class WriterWrapper final : public std::enable_shared_from_this<WriterWrapper<Writer>>
 {
 public:
     template<typename WriterParams>
     static Expected<std::shared_ptr<WriterWrapper>> create(Writer &writer, const WriterParams &params,
-        const LatencyMeterPtr &overall_latency_meter, uint32_t framerate)
+        VDevice &vdevice, const LatencyMeterPtr &overall_latency_meter, uint32_t framerate, bool async_api)
     {
-        auto dataset = create_dataset(writer, params);
-        CHECK_EXPECTED(dataset);
+        TRY(auto dataset, create_dataset(writer, params));
+
+        std::vector<DmaMappedBuffer> dataset_mapped_buffers;
+        if (async_api) {
+            TRY(dataset_mapped_buffers, dma_map_dataset(dataset, vdevice));
+        }
 
         std::shared_ptr<WriterWrapper> wrapper(
-            new (std::nothrow) WriterWrapper(writer, dataset.release(), overall_latency_meter, framerate));
+            new (std::nothrow) WriterWrapper(writer, std::move(dataset), std::move(dataset_mapped_buffers),
+                                             overall_latency_meter, framerate));
         CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY);
 
         return wrapper;
@@ -102,10 +111,11 @@ class WriterWrapper final : public std::enable_shared_from_this<WriterWrapper<Wr
     }
 
 private:
-    WriterWrapper(Writer &writer, std::vector<BufferPtr> &&dataset, const LatencyMeterPtr &overall_latency_meter,
-                  uint32_t framerate) :
+    WriterWrapper(Writer &writer, std::vector<BufferPtr> &&dataset, std::vector<DmaMappedBuffer> &&dataset_mapped_buffers,
+                  const LatencyMeterPtr &overall_latency_meter, uint32_t framerate) :
         m_writer(std::ref(writer)),
         m_dataset(std::move(dataset)),
+        m_dataset_mapped_buffers(std::move(dataset_mapped_buffers)),
         m_overall_latency_meter(overall_latency_meter),
         m_framerate_throttle(framerate)
     {}
@@ -142,6 +152,7 @@ class WriterWrapper final : public std::enable_shared_from_this<WriterWrapper<Wr
     static Expected<std::vector<BufferPtr>> create_constant_dataset(size_t frame_size)
     {
         const uint8_t const_byte = 0xAB;
+
         auto constant_buffer = Buffer::create_shared(frame_size, const_byte, BufferStorageParams::create_dma());
         CHECK_EXPECTED(constant_buffer);
 
@@ -169,9 +180,20 @@ class WriterWrapper final : public std::enable_shared_from_this<WriterWrapper<Wr
         return dataset;
     }
 
+    static Expected<std::vector<DmaMappedBuffer>> dma_map_dataset(const std::vector<BufferPtr> &dataset, VDevice &vdevice) {
+        std::vector<DmaMappedBuffer> dataset_mapped_buffers;
+        for (const auto &buffer : dataset) {
+            auto mapped_buffer = DmaMappedBuffer::create(vdevice, buffer->data(), buffer->size(), HAILO_DMA_BUFFER_DIRECTION_H2D);
+            CHECK_EXPECTED(mapped_buffer);
+            dataset_mapped_buffers.emplace_back(mapped_buffer.release());
+        }
+        return dataset_mapped_buffers;
+    }
+
     std::reference_wrapper<Writer> m_writer;
 
     std::vector<BufferPtr> m_dataset;
+    std::vector<DmaMappedBuffer> m_dataset_mapped_buffers;
     size_t m_current_buffer_index = 0;
 
     LatencyMeterPtr m_overall_latency_meter;
@@ -182,20 +204,51 @@ template<typename Writer>
 using WriterWrapperPtr = std::shared_ptr<WriterWrapper<Writer>>;
 
 // Wrapper for OutputStream or OutputVStream objects.
-// We use std::enable_from_this because on async api the callback is using `this`. We want to increase the reference
-// count until the callback is over.
+// We use std::enable_from_this because on async api, we want to increase the ref count of this object until the
+// callback is called. It can happen since network_group->shutdown() may be called after this object is being
+// destructed.
 template<typename Reader>
 class ReaderWrapper final : public std::enable_shared_from_this<ReaderWrapper<Reader>>
 {
 public:
-    static Expected<std::shared_ptr<ReaderWrapper>> create(Reader &reader, const LatencyMeterPtr &overall_latency_meter,
-        std::shared_ptr<NetworkLiveTrack> net_live_track)
+
+    // Function that gets the amount of output buffers needed for stream. Templated for both possible cases of Types that
+    // ReaderWrapper can wrap - OutputStream and OutputVStream
+
+    // In async create amount of output buffers equal to async_max_queue_size - we do this because in async mode we want
+    // each stream to have its own buffer. (Otherwise can cause bugs in NMS Async mode.)
+    static Expected<size_t> get_amount_of_output_buffers(OutputStream &output_stream, bool async_api)
     {
-        auto buffer = Buffer::create_shared(reader.get_frame_size(), BufferStorageParams::create_dma());
-        CHECK_EXPECTED(buffer);
+        if (async_api) {
+            return output_stream.get_async_max_queue_size();
+        } else {
+            return static_cast<size_t>(AMOUNT_OF_OUTPUT_BUFFERS_SYNC_API);
+        }
+    }
+
+    // Vstreams will always be sync hence 1 output buffer is enough.
+    static Expected<size_t> get_amount_of_output_buffers(OutputVStream &output_vstream, bool async_api)
+    {
+        (void) output_vstream;
+        (void) async_api;
+        return static_cast<size_t>(AMOUNT_OF_OUTPUT_BUFFERS_SYNC_API);
+    }
+
+    static Expected<std::shared_ptr<ReaderWrapper>> create(Reader &reader, VDevice &vdevice,
+        const LatencyMeterPtr &overall_latency_meter, std::shared_ptr<NetworkLiveTrack> net_live_track, bool async_api)
+    {
+        TRY(const auto amount_of_output_buffers, get_amount_of_output_buffers(reader,async_api));
+
+        TRY(auto output_buffers, create_output_buffers(reader, amount_of_output_buffers));
+
+        std::vector<DmaMappedBuffer> dma_mapped_buffers;
+        if (async_api) {
+            TRY(dma_mapped_buffers, dma_map_output_buffers(vdevice, amount_of_output_buffers, output_buffers));
+        }
 
         std::shared_ptr<ReaderWrapper> wrapper(
-            new (std::nothrow) ReaderWrapper(reader, buffer.release(), overall_latency_meter, net_live_track));
+            new (std::nothrow) ReaderWrapper(reader, std::move(output_buffers), std::move(dma_mapped_buffers),
+            overall_latency_meter, net_live_track));
         CHECK_NOT_NULL_AS_EXPECTED(wrapper, HAILO_OUT_OF_HOST_MEMORY);
 
         return wrapper;
@@ -206,7 +259,7 @@ class ReaderWrapper final : public std::enable_shared_from_this<ReaderWrapper<Re
 
     hailo_status read()
     {
-        auto status = get().read(MemoryView(*m_buffer));
+        auto status = get().read(MemoryView(*next_buffer()));
         if (HAILO_SUCCESS != status) {
             return status;
         }
@@ -217,13 +270,13 @@ class ReaderWrapper final : public std::enable_shared_from_this<ReaderWrapper<Re
 
     hailo_status wait_for_async_ready()
     {
-        return get().wait_for_async_ready(m_buffer->size(), HAILORTCLI_DEFAULT_TIMEOUT);
+        return get().wait_for_async_ready(m_buffer[0]->size(), HAILORTCLI_DEFAULT_TIMEOUT);
     }
 
     hailo_status read_async(typename Reader::TransferDoneCallback callback)
     {
         auto self = std::enable_shared_from_this<ReaderWrapper<Reader>>::shared_from_this();
-        return get().read_async(MemoryView(*m_buffer),
+        return get().read_async(MemoryView(*next_buffer()),
             [self, original=callback](const typename Reader::CompletionInfo &completion_info) {
                 original(completion_info);
                 if (completion_info.status == HAILO_SUCCESS) {
@@ -233,10 +286,11 @@ class ReaderWrapper final : public std::enable_shared_from_this<ReaderWrapper<Re
     }
 
 private:
-    ReaderWrapper(Reader &reader, BufferPtr &&buffer, const LatencyMeterPtr &overall_latency_meter,
-                  std::shared_ptr<NetworkLiveTrack> net_live_track) :
+    ReaderWrapper(Reader &reader, std::vector<BufferPtr> &&buffer, std::vector<DmaMappedBuffer> &&mapped_buffer_ptr,
+                  const LatencyMeterPtr &overall_latency_meter, std::shared_ptr<NetworkLiveTrack> net_live_track) :
         m_reader(std::ref(reader)),
         m_buffer(std::move(buffer)),
+        m_mapped_buffer_ptr(std::move(mapped_buffer_ptr)),
         m_overall_latency_meter(overall_latency_meter),
         m_net_live_track(net_live_track)
     {}
@@ -253,9 +307,51 @@ class ReaderWrapper final : public std::enable_shared_from_this<ReaderWrapper<Re
     }
 
     std::reference_wrapper<Reader> m_reader;
-    BufferPtr m_buffer;
+    std::vector<BufferPtr> m_buffer;
+    std::vector<DmaMappedBuffer> m_mapped_buffer_ptr;
     LatencyMeterPtr m_overall_latency_meter;
     std::shared_ptr<NetworkLiveTrack> m_net_live_track;
+    size_t m_current_buffer_index = 0;
+
+    static Expected<std::vector<BufferPtr>> create_output_buffers(Reader &reader, size_t amount_of_output_buffers)
+    {
+        std::vector<BufferPtr> output_buffers;
+        output_buffers.reserve(amount_of_output_buffers);
+
+        for (size_t i = 0; i < amount_of_output_buffers; i++) {
+            TRY(auto buffer, Buffer::create_shared(reader.get_frame_size(), BufferStorageParams::create_dma()));
+            output_buffers.emplace_back(std::move(buffer));
+        }
+
+        return output_buffers;
+    }
+
+    static Expected<std::vector<DmaMappedBuffer>>dma_map_output_buffers(VDevice &vdevice, size_t amount_of_output_buffers,
+        const std::vector<BufferPtr> &output_buffers)
+    {
+        std::vector<DmaMappedBuffer> mapped_output_buffers;
+        mapped_output_buffers.reserve(amount_of_output_buffers);
+        
+        for (const auto& output_buffer : output_buffers) {
+            TRY(auto mapped_buffer,
+                DmaMappedBuffer::create(vdevice, output_buffer->data(), output_buffer->size(), HAILO_DMA_BUFFER_DIRECTION_D2H));
+            mapped_output_buffers.emplace_back(std::move(mapped_buffer));
+        }
+
+        return mapped_output_buffers;
+    }
+
+    size_t next_buffer_index()
+    {
+        const auto index = m_current_buffer_index;
+        m_current_buffer_index = (m_current_buffer_index + 1) % m_buffer.size();
+        return index;
+    }
+
+    BufferPtr next_buffer()
+    {
+        return m_buffer[next_buffer_index()];
+    }
 };
 
 template<typename Reader>
diff --git a/hailort/hailortcli/run2/network_live_track.cpp b/hailort/hailortcli/run2/network_live_track.cpp
index 0033816c..c9d35ce1 100644
--- a/hailort/hailortcli/run2/network_live_track.cpp
+++ b/hailort/hailortcli/run2/network_live_track.cpp
@@ -75,7 +75,7 @@ uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss)
     if (m_cng) {
         auto hw_latency_measurement = m_cng->get_latency_measurement();
         if (hw_latency_measurement) {
-            ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
+            ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferStatsPrinter::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
         } else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
             ss << fmt::format("{}hw latency: NaN (err)", get_separator());
         }
@@ -83,7 +83,7 @@ uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss)
     else {
         auto hw_latency_measurement = m_configured_infer_model->get_hw_latency_measurement();
         if (hw_latency_measurement) {
-            ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
+            ss << fmt::format("{}hw latency: {:.2f} ms", get_separator(), InferStatsPrinter::latency_result_to_ms(hw_latency_measurement->avg_hw_latency));
         }
         else if (HAILO_NOT_AVAILABLE != hw_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
             ss << fmt::format("{}hw latency: NaN (err)", get_separator());
@@ -93,7 +93,7 @@ uint32_t NetworkLiveTrack::push_text_impl(std::stringstream &ss)
     if (m_overall_latency_meter) {
         auto overall_latency_measurement = m_overall_latency_meter->get_latency(false);
         if (overall_latency_measurement) {
-            ss << fmt::format("{}overall latency: {:.2f} ms", get_separator(), InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement));
+            ss << fmt::format("{}overall latency: {:.2f} ms", get_separator(), InferStatsPrinter::latency_result_to_ms(*overall_latency_measurement));
         }
         else if (HAILO_NOT_AVAILABLE != overall_latency_measurement.status()) { // HAILO_NOT_AVAILABLE is a valid error, we ignore it
             ss << fmt::format("{}overall latency: NaN (err)", get_separator());
@@ -127,13 +127,13 @@ void NetworkLiveTrack::push_json_impl(nlohmann::ordered_json &json)
     if (m_cng) {
         auto hw_latency_measurement = m_cng->get_latency_measurement();
         if (hw_latency_measurement){
-            network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
+            network_group_json["hw_latency"] = InferStatsPrinter::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
         }
     }
     else {
         auto hw_latency_measurement = m_configured_infer_model->get_hw_latency_measurement();
         if (hw_latency_measurement){
-            network_group_json["hw_latency"] = InferResultsFormatUtils::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
+            network_group_json["hw_latency"] = InferStatsPrinter::latency_result_to_ms(hw_latency_measurement->avg_hw_latency);
         }
     }
 
@@ -141,7 +141,7 @@ void NetworkLiveTrack::push_json_impl(nlohmann::ordered_json &json)
     if (m_overall_latency_meter){
         auto overall_latency_measurement = m_overall_latency_meter->get_latency(false);
         if (overall_latency_measurement){
-            network_group_json["overall_latency"] = InferResultsFormatUtils::latency_result_to_ms(*overall_latency_measurement);
+            network_group_json["overall_latency"] = InferStatsPrinter::latency_result_to_ms(*overall_latency_measurement);
         }
     }
     json["network_groups"].emplace_back(network_group_json);
diff --git a/hailort/hailortcli/run2/network_runner.cpp b/hailort/hailortcli/run2/network_runner.cpp
index 15a563a2..86845f73 100644
--- a/hailort/hailortcli/run2/network_runner.cpp
+++ b/hailort/hailortcli/run2/network_runner.cpp
@@ -254,7 +254,7 @@ Expected<std::shared_ptr<NetworkRunner>> NetworkRunner::create_shared(VDevice &v
 
         switch (final_net_params.mode)
         {
-        case InferenceMode::FULL:
+        case InferenceMode::FULL_SYNC:
         {
             std::map<std::string, hailo_vstream_params_t> vstreams_params;
             for (auto &vstream_params : final_net_params.vstream_params) {
@@ -263,13 +263,13 @@ Expected<std::shared_ptr<NetworkRunner>> NetworkRunner::create_shared(VDevice &v
             auto vstreams = create_vstreams(*cfgr_net_group, vstreams_params);
             CHECK_EXPECTED(vstreams);
 
-            auto net_runner = make_shared_nothrow<FullNetworkRunner>(final_net_params, expected_net_group_name.value(), vdevice,
+            auto net_runner = make_shared_nothrow<FullSyncNetworkRunner>(final_net_params, expected_net_group_name.value(), vdevice,
                 std::move(vstreams->first), std::move(vstreams->second), cfgr_net_group);
             CHECK_NOT_NULL_AS_EXPECTED(net_runner, HAILO_OUT_OF_HOST_MEMORY);
             net_runner_ptr = std::static_pointer_cast<NetworkRunner>(net_runner);
             break;
         }
-        case InferenceMode::RAW:            // Fallthrough
+        case InferenceMode::RAW_SYNC:       // Fallthrough
         case InferenceMode::RAW_ASYNC:      // Fallthrough
         case InferenceMode::RAW_ASYNC_SINGLE_THREAD:
         {
@@ -425,10 +425,10 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> Netwo
 }
 
 const std::vector<hailo_status> NetworkRunner::ALLOWED_INFERENCE_RETURN_VALUES{
-    {HAILO_SUCCESS, HAILO_STREAM_ABORTED_BY_USER, HAILO_SHUTDOWN_EVENT_SIGNALED}
+    {HAILO_SUCCESS, HAILO_STREAM_ABORT, HAILO_SHUTDOWN_EVENT_SIGNALED}
 };
 
-FullNetworkRunner::FullNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
+FullSyncNetworkRunner::FullSyncNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
                                      std::vector<InputVStream> &&input_vstreams, std::vector<OutputVStream> &&output_vstreams,
                                      std::shared_ptr<ConfiguredNetworkGroup> cng) :
     NetworkRunner(params, name, vdevice, cng),
@@ -437,14 +437,15 @@ FullNetworkRunner::FullNetworkRunner(const NetworkParams &params, const std::str
 {
 }
 
-Expected<std::vector<AsyncThreadPtr<hailo_status>>> FullNetworkRunner::start_inference_threads(EventPtr shutdown_event,
+Expected<std::vector<AsyncThreadPtr<hailo_status>>> FullSyncNetworkRunner::start_inference_threads(EventPtr shutdown_event,
     std::shared_ptr<NetworkLiveTrack> net_live_track)
 {
+    static const bool SYNC_API = false;
     std::vector<AsyncThreadPtr<hailo_status>> threads;
     for (auto &input_vstream : m_input_vstreams) {
         const auto vstream_params = get_params(input_vstream.name());
-        auto writer = WriterWrapper<InputVStream>::create(input_vstream, vstream_params, m_overall_latency_meter,
-            m_params.framerate);
+        auto writer = WriterWrapper<InputVStream>::create(input_vstream, vstream_params, m_vdevice,
+            m_overall_latency_meter, m_params.framerate, SYNC_API);
         CHECK_EXPECTED(writer);
 
         threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("WRITE",
@@ -455,8 +456,8 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> FullNetworkRunner::start_inf
 
     bool first = true; //TODO: check with multiple outputs
     for (auto &output_vstream : m_output_vstreams) {
-        auto reader = ReaderWrapper<OutputVStream>::create(output_vstream, m_overall_latency_meter,
-            first ? net_live_track : nullptr);
+        auto reader = ReaderWrapper<OutputVStream>::create(output_vstream, m_vdevice,
+            m_overall_latency_meter, first ? net_live_track : nullptr, SYNC_API);
         CHECK_EXPECTED(reader);
 
         threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("READ",
@@ -469,12 +470,12 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> FullNetworkRunner::start_inf
     return threads;
 }
 
-void FullNetworkRunner::stop()
+void FullSyncNetworkRunner::stop()
 {
     (void) m_cng->shutdown();
 }
 
-std::set<std::string> FullNetworkRunner::get_input_names()
+std::set<std::string> FullSyncNetworkRunner::get_input_names()
 {
     std::set<std::string> result;
 
@@ -485,7 +486,7 @@ std::set<std::string> FullNetworkRunner::get_input_names()
     return result;
 }
 
-std::set<std::string> FullNetworkRunner::get_output_names()
+std::set<std::string> FullSyncNetworkRunner::get_output_names()
 {
     std::set<std::string> result;
 
@@ -496,7 +497,7 @@ std::set<std::string> FullNetworkRunner::get_output_names()
     return result;
 }
 
-VStreamParams FullNetworkRunner::get_params(const std::string &name)
+VStreamParams FullSyncNetworkRunner::get_params(const std::string &name)
 {
     for (const auto &params : m_params.vstream_params) {
         if (name == params.name) {
@@ -552,9 +553,12 @@ Expected<AsyncInferJob> FullAsyncNetworkRunner::create_infer_job(const Configure
         m_overall_latency_meter->add_start_sample(std::chrono::steady_clock::now().time_since_epoch());
     }
     auto job = m_configured_infer_model->run_async(bindings, [=, &inference_status] (const AsyncInferCompletionInfo &completion_info) {
+
         if (HAILO_SUCCESS != completion_info.status) {
             inference_status = completion_info.status;
-            LOGGER__ERROR("Failed in infer async request");
+            if (HAILO_STREAM_ABORT != completion_info.status) {
+                LOGGER__ERROR("Failed in infer async request");
+            }
             return;
         }
         if (m_overall_latency_meter) {
@@ -575,36 +579,6 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut
 {
     auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
 
-    std::map<std::string, Buffer> inputs_buffer_pool;
-    const uint8_t const_byte = 0xAB;
-    for (const auto &input_name : get_input_names()) {
-        inputs_buffer_pool[input_name] = {};
-        auto input_config = m_infer_model->input(input_name);
-        CHECK_EXPECTED_AS_STATUS(input_config);
-
-        auto params = get_params(input_name);
-        if (params.input_file_path.empty()) {
-            auto constant_buffer = Buffer::create(input_config->get_frame_size(), const_byte, BufferStorageParams::create_dma());
-            CHECK_EXPECTED_AS_STATUS(constant_buffer);
-            inputs_buffer_pool[input_name] = constant_buffer.release();
-        } else {
-            auto buffer = read_binary_file(params.input_file_path, BufferStorageParams::create_dma());
-            CHECK_EXPECTED_AS_STATUS(buffer);
-            inputs_buffer_pool[input_name] = buffer.release();
-        }
-    }
-
-    std::map<std::string, Buffer> outputs_buffer_pool;
-    for (const auto &output_name : get_output_names()) {
-        outputs_buffer_pool[output_name] = {};
-        auto output_config = m_infer_model->output(output_name);
-        CHECK_EXPECTED_AS_STATUS(output_config);
-
-        auto constant_buffer = Buffer::create(output_config->get_frame_size(), 0, BufferStorageParams::create_dma());
-        CHECK_EXPECTED_AS_STATUS(constant_buffer);
-        outputs_buffer_pool[output_name] = constant_buffer.release();
-    }
-
     std::unique_ptr<ConfiguredInferModelActivationGuard> guard = nullptr;
     if (HAILO_SCHEDULING_ALGORITHM_NONE != m_params.scheduling_algorithm) {
         auto status = m_configured_infer_model->set_scheduler_threshold(m_params.scheduler_threshold);
@@ -624,24 +598,64 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut
     auto bindings = m_configured_infer_model->create_bindings();
     CHECK_EXPECTED_AS_STATUS(bindings);
 
-    for (auto &pair : inputs_buffer_pool) {
-        auto &name = pair.first;
-        auto &buffer = pair.second;
-        bindings->input(name)->set_buffer(hailort::MemoryView(buffer));
+    std::unordered_map<std::string, Buffer> input_buffers; // Keys are inputs names
+    std::vector<Buffer> output_buffers;
+    std::vector<DmaMappedBuffer> dma_mapped_buffers;
+
+    const uint8_t const_byte = 0xAB;
+    for (const auto &name : get_input_names()) {
+        auto input_config = m_infer_model->input(name);
+        CHECK_EXPECTED_AS_STATUS(input_config);
+
+        auto params = get_params(name);
+        auto buffer = params.input_file_path.empty() ?
+            Buffer::create(input_config->get_frame_size(), const_byte, BufferStorageParams::create_dma()) :
+            read_binary_file(params.input_file_path, BufferStorageParams::create_dma());
+        CHECK_EXPECTED_AS_STATUS(buffer);
+        CHECK(0 == (buffer->size() % input_config->get_frame_size()), HAILO_INVALID_ARGUMENT,
+            "Size of data for input '{}' must be a multiple of the frame size {}. Received - {}", name, input_config->get_frame_size(), buffer->size());
+        input_buffers.emplace(name, buffer.release());
+
+        for (uint32_t i = 0; i < (input_buffers.at(name).size() % input_config->get_frame_size()); i++) {
+            auto mapped_buffer = DmaMappedBuffer::create(m_vdevice, input_buffers.at(name).data() + (i * input_config->get_frame_size()),
+                input_config->get_frame_size(), HAILO_DMA_BUFFER_DIRECTION_H2D);
+            CHECK_EXPECTED_AS_STATUS(mapped_buffer);
+            dma_mapped_buffers.emplace_back(mapped_buffer.release());
+        }
     }
-    for (auto &pair : outputs_buffer_pool) {
-        auto &name = pair.first;
-        auto &buffer = pair.second;
-        bindings->output(name)->set_buffer(hailort::MemoryView(buffer));
+
+    for (const auto &name : get_output_names()) {
+        auto output_config = m_infer_model->output(name);
+        CHECK_EXPECTED_AS_STATUS(output_config);
+
+        auto buffer = Buffer::create(output_config->get_frame_size(), 0, BufferStorageParams::create_dma());
+        CHECK_EXPECTED_AS_STATUS(buffer);
+        output_buffers.emplace_back(buffer.release());
+
+        auto mapped_buffer = DmaMappedBuffer::create(m_vdevice, output_buffers.back().data(), output_buffers.back().size(),
+            HAILO_DMA_BUFFER_DIRECTION_D2H);
+        CHECK_EXPECTED_AS_STATUS(mapped_buffer);
+        dma_mapped_buffers.emplace_back(mapped_buffer.release());
+
+        CHECK_SUCCESS(bindings->output(name)->set_buffer(MemoryView(output_buffers.back())));
     }
 
     FramerateThrottle frame_rate_throttle(m_params.framerate);
 
     AsyncInferJob last_job;
     auto inference_status = HAILO_SUCCESS;
+    uint32_t frame_id = 0;
     while (HAILO_TIMEOUT == shutdown_event->wait(std::chrono::milliseconds(0)) && (HAILO_SUCCESS == inference_status)) {
         for (uint32_t frames_in_cycle = 0; frames_in_cycle < m_params.batch_size; frames_in_cycle++) {
-            if (HAILO_SUCCESS == m_configured_infer_model->wait_for_async_ready(HAILO_INFINITE_TIMEOUT)) {
+            for (const auto &name : get_input_names()) {
+                auto input_config = m_infer_model->input(name);
+                CHECK_EXPECTED_AS_STATUS(input_config);
+                auto offset = (frame_id % (input_buffers.at(name).size() / input_config->get_frame_size())) * input_config->get_frame_size();
+                CHECK_SUCCESS(bindings->input(name)->set_buffer(MemoryView(input_buffers.at(name).data() + offset,
+                    input_config->get_frame_size())));
+            }
+            frame_id++;
+            if (HAILO_SUCCESS == m_configured_infer_model->wait_for_async_ready(DEFAULT_TRANSFER_TIMEOUT)) {
                 auto job_exp = create_infer_job(*bindings, net_live_track, frame_rate_throttle, inference_status);
                 CHECK_EXPECTED_AS_STATUS(job_exp);
                 last_job = job_exp.release();
@@ -653,6 +667,7 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut
             last_job.wait(HAILO_INFINITE_TIMEOUT);
         }
     }
+    m_configured_infer_model->shutdown();
     last_job.wait(HAILO_INFINITE_TIMEOUT);
 
     return inference_status;
@@ -674,8 +689,8 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> RawNetworkRunner::start_infe
     std::vector<AsyncThreadPtr<hailo_status>> threads;
     for (auto &input_stream : m_input_streams) {
         const auto stream_params = get_params(input_stream.get().name());
-        auto writer = WriterWrapper<InputStream>::create(input_stream.get(), stream_params, m_overall_latency_meter,
-            m_params.framerate);
+        auto writer = WriterWrapper<InputStream>::create(input_stream.get(), stream_params, m_vdevice,
+            m_overall_latency_meter, m_params.framerate, async_streams);
         CHECK_EXPECTED(writer);
 
         if (async_streams) {
@@ -693,8 +708,8 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> RawNetworkRunner::start_infe
 
     bool first = true; //TODO: check with multiple outputs
     for (auto &output_stream : m_output_streams) {
-        auto reader = ReaderWrapper<OutputStream>::create(output_stream.get(), m_overall_latency_meter,
-            first ? net_live_track : nullptr);
+        auto reader = ReaderWrapper<OutputStream>::create(output_stream.get(), m_vdevice,
+            m_overall_latency_meter, first ? net_live_track : nullptr, async_streams);
         CHECK_EXPECTED(reader);
 
         if (async_streams) {
@@ -717,13 +732,15 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> RawNetworkRunner::start_infe
 hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_event,
     std::shared_ptr<NetworkLiveTrack> net_live_track)
 {
+    static const bool ASYNC_API = true;
+
     // Build output wrappers
     std::vector<ReaderWrapperPtr<OutputStream>> reader_wrappers;
     std::vector<SemaphorePtr> output_semaphores;
     bool is_first_output = true;
     for (auto &output_stream : m_output_streams) {
-        auto reader_wrapper = ReaderWrapper<OutputStream>::create(output_stream.get(), m_overall_latency_meter,
-            is_first_output ? net_live_track : nullptr);
+        auto reader_wrapper = ReaderWrapper<OutputStream>::create(output_stream.get(), m_vdevice,
+            m_overall_latency_meter, is_first_output ? net_live_track : nullptr, ASYNC_API);
         CHECK_EXPECTED_AS_STATUS(reader_wrapper);
         is_first_output = false;
 
@@ -731,9 +748,9 @@ hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_e
         CHECK_EXPECTED_AS_STATUS(max_queue_size);
 
         auto semaphore = Semaphore::create_shared(static_cast<uint32_t>(*max_queue_size));
-        CHECK_NOT_NULL(semaphore, HAILO_OUT_OF_HOST_MEMORY);
+        CHECK_EXPECTED_AS_STATUS(semaphore);
 
-        output_semaphores.emplace_back(semaphore);
+        output_semaphores.emplace_back(semaphore.release());
         reader_wrappers.emplace_back(reader_wrapper.release());
     }
 
@@ -742,16 +759,16 @@ hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_e
     std::vector<SemaphorePtr> input_semaphores;
     for (auto &input_stream : m_input_streams) {
         auto writer_wrapper = WriterWrapper<InputStream>::create(input_stream.get(),
-            get_params(input_stream.get().name()), m_overall_latency_meter, m_params.framerate);
+            get_params(input_stream.get().name()), m_vdevice, m_overall_latency_meter, m_params.framerate, ASYNC_API);
         CHECK_EXPECTED_AS_STATUS(writer_wrapper);
 
         auto max_queue_size = writer_wrapper.value()->get().get_async_max_queue_size();
         CHECK_EXPECTED_AS_STATUS(max_queue_size);
 
         auto semaphore = Semaphore::create_shared(static_cast<uint32_t>(*max_queue_size));
-        CHECK_NOT_NULL(semaphore, HAILO_OUT_OF_HOST_MEMORY);
+        CHECK_EXPECTED_AS_STATUS(semaphore);
 
-        input_semaphores.emplace_back(semaphore);
+        input_semaphores.emplace_back(semaphore.release());
         writer_wrappers.emplace_back(writer_wrapper.release());
     }
 
diff --git a/hailort/hailortcli/run2/network_runner.hpp b/hailort/hailortcli/run2/network_runner.hpp
index d0d0376f..9601172f 100644
--- a/hailort/hailortcli/run2/network_runner.hpp
+++ b/hailort/hailortcli/run2/network_runner.hpp
@@ -37,10 +37,10 @@ constexpr std::chrono::milliseconds SYNC_EVENT_TIMEOUT(1000);
 
 
 enum class InferenceMode {
-    FULL,
+    FULL_SYNC,
     FULL_ASYNC,
 
-    RAW,
+    RAW_SYNC,
     RAW_ASYNC,
     RAW_ASYNC_SINGLE_THREAD,
 };
@@ -166,7 +166,7 @@ class NetworkRunner
 
             for (auto i = 0; i < m_params.batch_size; i++) {
                 auto status = writer->write();
-                if (status == HAILO_STREAM_ABORTED_BY_USER) {
+                if (status == HAILO_STREAM_ABORT) {
                     return status;
                 }
                 CHECK_SUCCESS(status);
@@ -198,7 +198,7 @@ class NetworkRunner
 
             for (auto i = 0; i < m_params.batch_size; i++) {
                 auto status = writer->wait_for_async_ready();
-                if (status == HAILO_STREAM_ABORTED_BY_USER) {
+                if (status == HAILO_STREAM_ABORT) {
                     return status;
                 }
                 CHECK_SUCCESS(status);
@@ -209,7 +209,7 @@ class NetworkRunner
                             (void)sync_event->signal();
                         }
                     });
-                if (status == HAILO_STREAM_ABORTED_BY_USER) {
+                if (status == HAILO_STREAM_ABORT) {
                     return status;
                 }
                 CHECK_SUCCESS(status);
@@ -243,7 +243,7 @@ class NetworkRunner
 
             for (auto i = 0; i < m_params.batch_size; i++) {
                 auto status = reader->read();
-                if (status == HAILO_STREAM_ABORTED_BY_USER) {
+                if (status == HAILO_STREAM_ABORT) {
                     return status;
                 }
                 CHECK_SUCCESS(status);
@@ -275,7 +275,7 @@ class NetworkRunner
 
             for (auto i = 0; i < m_params.batch_size; i++) {
                 auto status = reader->wait_for_async_ready();
-                if (status == HAILO_STREAM_ABORTED_BY_USER) {
+                if (status == HAILO_STREAM_ABORT) {
                     return status;
                 }
                 CHECK_SUCCESS(status);
@@ -286,7 +286,7 @@ class NetworkRunner
                             (void)sync_event->signal();
                         }
                     });
-                if (status == HAILO_STREAM_ABORTED_BY_USER) {
+                if (status == HAILO_STREAM_ABORT) {
                     return status;
                 }
                 CHECK_SUCCESS(status);
@@ -323,10 +323,10 @@ class NetworkRunner
     static Expected<BufferPtr> create_dataset_from_input_file(const std::string &file_path, size_t size);
 };
 
-class FullNetworkRunner : public NetworkRunner
+class FullSyncNetworkRunner : public NetworkRunner
 {
 public:
-    FullNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
+    FullSyncNetworkRunner(const NetworkParams &params, const std::string &name, VDevice &vdevice,
         std::vector<InputVStream> &&input_vstreams, std::vector<OutputVStream> &&output_vstreams,
         std::shared_ptr<ConfiguredNetworkGroup> cng);
 
diff --git a/hailort/hailortcli/run2/run2_command.cpp b/hailort/hailortcli/run2/run2_command.cpp
index 6de243e2..4bfdf062 100644
--- a/hailort/hailortcli/run2/run2_command.cpp
+++ b/hailort/hailortcli/run2/run2_command.cpp
@@ -324,6 +324,7 @@ class Run2 : public CLI::App
 
     bool is_ethernet_device() const;
     void validate_and_set_scheduling_algorithm();
+    void validate_mode_supports_service();
 
     std::vector<NetworkParams> m_network_params;
     uint32_t m_time_to_run;
@@ -346,7 +347,6 @@ class Run2 : public CLI::App
     std::string m_measure_fw_actions_output_path;
 };
 
-
 Run2::Run2() : CLI::App("Run networks", "run2")
 {
     add_measure_fw_actions_subcom();
@@ -354,14 +354,17 @@ Run2::Run2() : CLI::App("Run networks", "run2")
     add_option("-t,--time-to-run", m_time_to_run, "Time to run (seconds)")
         ->default_val(DEFAULT_TIME_TO_RUN_SECONDS)
         ->check(CLI::PositiveNumber);
-    add_option("-m,--mode", m_mode, "Inference mode")
+    auto mode = add_option("-m,--mode", m_mode, "Inference mode")
         ->transform(HailoCheckedTransformer<InferenceMode>({
-            { "full", InferenceMode::FULL },
+            { "full_sync", InferenceMode::FULL_SYNC },
+            { "full", InferenceMode::FULL_SYNC, OptionVisibility::HIDDEN }, // TODO: Remove option
             { "full_async", InferenceMode::FULL_ASYNC },
-            { "raw", InferenceMode::RAW },
+            { "raw_sync", InferenceMode::RAW_SYNC },
+            { "raw", InferenceMode::RAW_SYNC, OptionVisibility::HIDDEN }, // TODO: Remove option
             { "raw_async", InferenceMode::RAW_ASYNC },
             { "raw_async_single_thread", InferenceMode::RAW_ASYNC_SINGLE_THREAD, OptionVisibility::HIDDEN }
-        }))->default_val("full");
+        }))->default_val("full_sync");
+
     add_option("-j,--json", m_stats_json_path, "If set save statistics as json to the specified path")
         ->default_val("")
         ->check(FileSuffixValidator(JSON_SUFFIX));
@@ -412,8 +415,12 @@ Run2::Run2() : CLI::App("Run networks", "run2")
         // When working with service over ip - client doesn't have access to physical devices
     }
 
+    hailo_deprecate_options(this, { std::make_shared<ValueDeprecation>(mode, "full", "full_sync"),
+        std::make_shared<ValueDeprecation>(mode, "raw", "raw_sync") }, false);
+
     parse_complete_callback([this]() {
         validate_and_set_scheduling_algorithm();
+        validate_mode_supports_service();
     });
 }
 
@@ -578,6 +585,14 @@ bool Run2::is_ethernet_device() const
     return is_valid_ip(m_device_ids[0]);
 }
 
+void Run2::validate_mode_supports_service()
+{
+    if (m_multi_process_service) {
+        PARSE_CHECK(((InferenceMode::FULL_SYNC == m_mode) || (InferenceMode::FULL_ASYNC == m_mode)),
+            "When running multi-process, only FULL_SYNC or FULL_ASYNC modes are allowed");
+    }
+}
+
 void Run2::validate_and_set_scheduling_algorithm()
 {
     if (m_scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE) {
@@ -617,9 +632,9 @@ static hailo_status wait_for_threads(std::vector<AsyncThreadPtr<hailo_status>> &
     auto last_error_status = HAILO_SUCCESS;
     for (auto& thread : threads) {
         auto thread_status = thread->get();
-        if ((HAILO_SUCCESS != thread_status) && (HAILO_STREAM_ABORTED_BY_USER != thread_status)) {
+        if ((HAILO_SUCCESS != thread_status) && (HAILO_STREAM_ABORT != thread_status)) {
             last_error_status = thread_status;
-            LOGGER__ERROR("Thread failed with with status {}", thread_status);
+            LOGGER__ERROR("Thread failed with status {}", thread_status);
         }
     }
     return last_error_status;
@@ -628,12 +643,12 @@ static hailo_status wait_for_threads(std::vector<AsyncThreadPtr<hailo_status>> &
 std::string get_str_infer_mode(const InferenceMode& infer_mode)
 {
     switch(infer_mode){
-    case InferenceMode::FULL:
-        return "full";
+    case InferenceMode::FULL_SYNC:
+        return "full_sync";
     case InferenceMode::FULL_ASYNC:
         return "full_async";
-    case InferenceMode::RAW:
-        return "raw";
+    case InferenceMode::RAW_SYNC:
+        return "raw_sync";
     case InferenceMode::RAW_ASYNC:
         return "raw_async";
     case InferenceMode::RAW_ASYNC_SINGLE_THREAD:
@@ -682,8 +697,8 @@ Expected<std::unique_ptr<VDevice>> Run2::create_vdevice()
         CHECK_AS_EXPECTED(!get_multi_process_service(), HAILO_INVALID_OPERATION, "Collecting runtime data is not supported with multi process service");
         CHECK_AS_EXPECTED(get_device_count() == 1, HAILO_INVALID_OPERATION, "Collecting runtime data is not supported with multi device");
         CHECK_AS_EXPECTED(!(get_measure_hw_latency() || get_measure_overall_latency()), HAILO_INVALID_OPERATION, "Latency measurement is not allowed when collecting runtime data");
-        CHECK_AS_EXPECTED((get_mode() == InferenceMode::RAW) || (get_mode() == InferenceMode::RAW_ASYNC), HAILO_INVALID_OPERATION,
-            "'measure-fw-actions' is only supported with '--mode=raw'. Received mode: '{}'", get_str_infer_mode(get_mode()));
+        CHECK_AS_EXPECTED((get_mode() == InferenceMode::RAW_SYNC) || (get_mode() == InferenceMode::RAW_ASYNC), HAILO_INVALID_OPERATION,
+            "'measure-fw-actions' is only supported with '--mode=raw_sync' or '--mode=raw_async'. Received mode: '{}'", get_str_infer_mode(get_mode()));
     }
 
     vdevice_params.group_id = get_group_id().c_str();
@@ -725,6 +740,8 @@ Expected<std::vector<std::shared_ptr<NetworkRunner>>> Run2::init_and_run_net_run
 
     auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
 
+    activation_barrier.arrive_and_wait();
+
     if (get_measure_power() || get_measure_current() || get_measure_temp()) {
         auto physical_devices = vdevice->get_physical_devices();
         CHECK_EXPECTED(physical_devices);
@@ -732,17 +749,12 @@ Expected<std::vector<std::shared_ptr<NetworkRunner>>> Run2::init_and_run_net_run
         for (auto &device : physical_devices.value()) {
             auto measurement_live_track = MeasurementLiveTrack::create_shared(device.get(), get_measure_power(),
                 get_measure_current(), get_measure_temp());
-            if (HAILO_SUCCESS != measurement_live_track.status()) {
-                activation_barrier.terminate();
-            }
             CHECK_EXPECTED(measurement_live_track);
 
             live_stats->add(measurement_live_track.release(), 2);
         }
     }
 
-    // TODO: wait for all nets before starting timer. start() should update TimerLiveTrack to start. or maybe append here but first in vector...
-    activation_barrier.arrive_and_wait();
     CHECK_SUCCESS_AS_EXPECTED(live_stats->start());
     auto status = shutdown_event->wait(get_time_to_run());
     if (HAILO_TIMEOUT != status) {
diff --git a/hailort/hailortcli/run_command.cpp b/hailort/hailortcli/run_command.cpp
index 63edf7c8..55b62a19 100644
--- a/hailort/hailortcli/run_command.cpp
+++ b/hailort/hailortcli/run_command.cpp
@@ -401,7 +401,7 @@ hailo_status send_loop(const inference_runner_params &params, SendObject &send_o
             auto status = send_object.write(MemoryView(
                 const_cast<uint8_t*>(input_buffer->data()) + offset,
                 send_object.get_frame_size()));
-            if (HAILO_STREAM_ABORTED_BY_USER == status) {
+            if (HAILO_STREAM_ABORT == status) {
                 LOGGER__DEBUG("Input stream was aborted!");
                 return status;
             }
@@ -692,7 +692,7 @@ static hailo_status run_streaming_impl(std::shared_ptr<ConfiguredNetworkGroup> c
     auto error_status = HAILO_SUCCESS;
     for (auto& result : results) {
         auto status = result->get();
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             continue;
         }
         if (HAILO_SUCCESS != status) {
diff --git a/hailort/hailortcli/udp_rate_limiter_command.cpp b/hailort/hailortcli/udp_rate_limiter_command.cpp
index b1978af6..bbcb0517 100644
--- a/hailort/hailortcli/udp_rate_limiter_command.cpp
+++ b/hailort/hailortcli/udp_rate_limiter_command.cpp
@@ -18,7 +18,7 @@
 #define PORTS_COUNT (16)     // Should be same as HW_PACKAGE__CORE_PKG__N_AXIS_IN
 
 UdpRateLimiterCommand::UdpRateLimiterCommand (CLI::App &parent_app) :
-    Command(parent_app.add_subcommand("udp-rate-limiter", "Limit UDP rate"))
+    Command(parent_app.add_subcommand("udp-rate-limiter", "Limit the UDP rate"))
 {
     m_set_command = m_app->add_subcommand("set", "Sets the udp rate limit");
     m_set_command->add_option("--kbit-rate", m_rate_kbit_sec, "rate in Kbit/s")
diff --git a/hailort/libhailort/CMakeLists.txt b/hailort/libhailort/CMakeLists.txt
index b44ab0e3..89df1ba4 100644
--- a/hailort/libhailort/CMakeLists.txt
+++ b/hailort/libhailort/CMakeLists.txt
@@ -2,23 +2,24 @@ cmake_minimum_required(VERSION 3.0.0)
 # set(CMAKE_C_CLANG_TIDY "clang-tidy;-checks=*")
 
 set(HAILORT_MAJOR_VERSION    4)
-set(HAILORT_MINOR_VERSION    16)
-set(HAILORT_REVISION_VERSION 2)
+set(HAILORT_MINOR_VERSION    17)
+set(HAILORT_REVISION_VERSION 0)
 
 # Add the cmake folder so the modules there are found
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
 
 # Generate hef-proto files using host protobuf
-protobuf_generate_cpp(PROTO_HEF_SRC PROTO_HEF_HEADR hef.proto)
+protobuf_generate_cpp(PROTO_HEF_SRC PROTO_HEF_HEADER hef.proto)
+protobuf_generate_python(PROTO_HEF_PY hef.proto) # TODO (HRT-12504): Copy hef_pb2.py to tools directory
 
-add_library(hef_proto ${PROTO_HEF_SRC} ${PROTO_HEF_HEADR})
+add_library(hef_proto ${PROTO_HEF_SRC} ${PROTO_HEF_HEADER} ${PROTO_HEF_PY})
 target_link_libraries(hef_proto libprotobuf-lite)
 set_target_properties(hef_proto PROPERTIES CXX_STANDARD 14 GENERATED TRUE POSITION_INDEPENDENT_CODE ON)
 if(CMAKE_HOST_WIN32)
     # https://github.com/protocolbuffers/protobuf/tree/master/cmake#notes-on-compiler-warnings
     target_compile_options(hef_proto PRIVATE /wd4244)
 endif()
-get_filename_component(PROTO_HEADER_DIRECTORY ${PROTO_HEF_HEADR} DIRECTORY)
+get_filename_component(PROTO_HEADER_DIRECTORY ${PROTO_HEF_HEADER} DIRECTORY)
 target_include_directories(hef_proto
     PUBLIC
     $<BUILD_INTERFACE: ${PROTO_HEADER_DIRECTORY}>
diff --git a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
index abe92849..b69addd1 100644
--- a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
+++ b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
@@ -8,7 +8,7 @@ if(NOT CMAKE_HOST_UNIX)
     message(FATAL_ERROR "Only unix hosts are supported, stopping build")
 endif()
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 # GST_PLUGIN_DEFINE needs PACKAGE to be defined
 set(GST_HAILO_PACKAGE_NAME "hailo")
@@ -19,13 +19,14 @@ pkg_search_module(GLIB REQUIRED glib-2.0)
 pkg_search_module(GSTREAMER REQUIRED gstreamer-1.0)
 pkg_search_module(GSTREAMER_BASE REQUIRED gstreamer-base-1.0)
 pkg_search_module(GSTREAMER_VIDEO REQUIRED gstreamer-video-1.0)
+pkg_search_module(GSTREAMER_PLUGINS_BASE REQUIRED gstreamer-plugins-base-1.0)
 
 add_library(gsthailo SHARED
     gst-hailo/gsthailoplugin.cpp
+    gst-hailo/sync_gsthailonet.cpp
+    gst-hailo/sync_gst_hailosend.cpp
+    gst-hailo/sync_gst_hailorecv.cpp
     gst-hailo/gsthailonet.cpp
-    gst-hailo/gsthailosend.cpp
-    gst-hailo/gsthailorecv.cpp
-    gst-hailo/gsthailonet2.cpp
     gst-hailo/gsthailodevicestats.cpp
     gst-hailo/common.cpp
     gst-hailo/network_group_handle.cpp
@@ -51,7 +52,7 @@ target_compile_options(gsthailo PRIVATE
     -DPACKAGE="${GST_HAILO_PACKAGE_NAME}")
 
 target_include_directories(gsthailo PRIVATE ${GSTREAMER_VIDEO_INCLUDE_DIRS})
-target_link_libraries(gsthailo HailoRT::libhailort ${GSTREAMER_VIDEO_LDFLAGS})
+target_link_libraries(gsthailo HailoRT::libhailort ${GSTREAMER_VIDEO_LDFLAGS} -lgstallocators-1.0)
 
 install(TARGETS gsthailo
     LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
index d4c64216..e1508af7 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
@@ -18,498 +18,692 @@
  * Boston, MA 02110-1301, USA.
  */
 #include "gsthailonet.hpp"
-#include "gsthailosend.hpp"
-#include "gsthailorecv.hpp"
-#include "hailo_events/hailo_events.hpp"
-#include "metadata/hailo_buffer_flag_meta.hpp"
+#include "metadata/tensor_meta.hpp"
+#include "hailo/buffer.hpp"
 #include "hailo/hailort_common.hpp"
 #include "hailo/hailort_defaults.hpp"
 
-#include <sstream>
+#include <gst/allocators/gstdmabuf.h>
 #include <algorithm>
+#include <unordered_map>
 
-GST_DEBUG_CATEGORY_STATIC(gst_hailonet_debug_category);
-#define GST_CAT_DEFAULT gst_hailonet_debug_category
-
-constexpr std::chrono::milliseconds WAIT_FOR_FLUSH_TIMEOUT_MS(1000);
-
-static void gst_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
-static void gst_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec);
-static gboolean gst_hailorecv_src_pad_event(GstPad *pad, GstObject *parent, GstEvent *event);
-static GstPadProbeReturn gst_hailonet_sink_probe(GstPad *pad, GstPadProbeInfo *info, gpointer user_data);
-static GstStateChangeReturn gst_hailonet_change_state(GstElement *element, GstStateChange transition);
-static void gst_hailonet_flush_callback(GstHailoNet *hailonet, gpointer data);
-static void gst_hailonet_inner_queue_overrun_callback(GstElement *queue, gpointer udata);
-static void gst_hailonet_inner_queue_underrun_callback(GstElement *queue, gpointer udata);
+#define WAIT_FOR_ASYNC_READY_TIMEOUT (std::chrono::milliseconds(10000))
+#define ERROR(msg, ...) g_print(msg, ##__VA_ARGS__)
 
 enum
 {
     PROP_0,
-    PROP_DEBUG,
-    PROP_DEVICE_ID,
     PROP_HEF_PATH,
-    PROP_NETWORK_NAME,
     PROP_BATCH_SIZE,
+    PROP_DEVICE_ID,
+    PROP_DEVICE_COUNT,
+    PROP_VDEVICE_GROUP_ID,
+    PROP_IS_ACTIVE,
     PROP_OUTPUTS_MIN_POOL_SIZE,
     PROP_OUTPUTS_MAX_POOL_SIZE,
-    PROP_IS_ACTIVE,
-    PROP_DEVICE_COUNT,
-    PROP_VDEVICE_KEY,
     PROP_SCHEDULING_ALGORITHM,
     PROP_SCHEDULER_TIMEOUT_MS,
     PROP_SCHEDULER_THRESHOLD,
     PROP_SCHEDULER_PRIORITY,
-    PROP_MULTI_PROCESS_SERVICE,
     PROP_INPUT_FORMAT_TYPE,
     PROP_OUTPUT_FORMAT_TYPE,
     PROP_NMS_SCORE_THRESHOLD,
     PROP_NMS_IOU_THRESHOLD,
     PROP_NMS_MAX_PROPOSALS_PER_CLASS,
+    PROP_INPUT_FROM_META,
+    PROP_NO_TRANSFORM,
+    PROP_MULTI_PROCESS_SERVICE,
+    PROP_PASS_THROUGH,
+    PROP_FORCE_WRITABLE,
+
+    // Deprecated
+    PROP_VDEVICE_KEY,
 };
 
-G_DEFINE_TYPE(GstHailoNet, gst_hailonet, GST_TYPE_BIN);
+static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
+static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
 
-static void gst_hailonet_class_init(GstHailoNetClass *klass)
+G_DEFINE_TYPE (GstHailoAllocator, gst_hailo_allocator, GST_TYPE_ALLOCATOR);
+G_DEFINE_TYPE (GstHailoNet, gst_hailonet, GST_TYPE_ELEMENT);
+
+static std::atomic_uint32_t hailonet_count(0);
+
+static bool gst_hailo_should_use_dma_buffers()
 {
-    GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
-    GstElementClass *element_class = GST_ELEMENT_CLASS(klass);
+    const char *env = g_getenv(GST_HAILO_USE_DMA_BUFFER_ENV_VAR);
+    return (nullptr != env) && (0 == g_strcmp0(env, "1"));
+}
 
-    GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
-    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template));
+static GstMemory *gst_hailo_allocator_alloc(GstAllocator* allocator, gsize size, GstAllocationParams* /*params*/) {
+    GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator);
+    auto buffer = Buffer::create(size, BufferStorageParams::create_dma());
+    if (!buffer) {
+        ERROR("Creating buffer for allocator has failed, status = %d\n", buffer.status());
+        return nullptr;
+    }
 
-    GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
-    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template));
+    GstMemory *memory = gst_memory_new_wrapped(static_cast<GstMemoryFlags>(0), buffer->data(),
+        buffer->size(), 0, buffer->size(), nullptr, nullptr);
+    if (nullptr == memory) {
+        ERROR("Creating new GstMemory for allocator has failed!\n");
+        return nullptr;
+    }
 
-    gst_element_class_set_static_metadata(element_class,
-        "hailonet element", "Hailo/Network",
-        "Configure and Activate Hailo Network. "
-            "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. "
-            "When deactivating a hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the "
-            "hailonet, since this operation waits until there are no frames coming in.",
-        PLUGIN_AUTHOR);
+    hailo_allocator->buffers[memory] = std::move(buffer.release());
+    return memory;
+}
 
-    element_class->change_state = GST_DEBUG_FUNCPTR(gst_hailonet_change_state);
-    
-    gobject_class->set_property = gst_hailonet_set_property;
-    gobject_class->get_property = gst_hailonet_get_property;
-    g_object_class_install_property(gobject_class, PROP_DEBUG,
-        g_param_spec_boolean("debug", "Debug flag", "Should print debug information", false,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_DEVICE_ID,
-        g_param_spec_string("device-id", "Device ID", "Device ID ([<domain>]:<bus>:<device>.<func>, same as in lspci command). Excludes device-count.", NULL,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT,
-        g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT,
-            std::numeric_limits<uint16_t>::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY,
-        g_param_spec_uint("vdevice-key",
-            "Indicate whether to re-use or re-create vdevice",
-            "Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \
-            "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets",
-            MIN_VALID_VDEVICE_KEY, std::numeric_limits<uint32_t>::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_HEF_PATH,
-        g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", NULL,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_NETWORK_NAME,
-        g_param_spec_string("net-name", "Network Name",
-            "Configure and run this specific network. "
-            "If not passed, configure and run the default network - ONLY if there is one network in the HEF!", NULL,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_BATCH_SIZE,
-        g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch", MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE,
-        g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer",
-            0, std::numeric_limits<uint32_t>::max(), DEFAULT_OUTPUTS_MIN_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE,
-        g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size",
-            "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits<uint32_t>::max(),
-            DEFAULT_OUTPUTS_MAX_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_IS_ACTIVE,
-        g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. "
-            "By default, the hailonet element will not be active unless it is the only one. "
-            "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+static void gst_hailo_allocator_free(GstAllocator* allocator, GstMemory *mem) {
+    GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator);
+    hailo_allocator->buffers.erase(mem);
+}
 
-    g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM,
-        g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. "
-            "Gets values from the enum GstHailoSchedulingAlgorithms. "
-            "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. "
-            "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ",
-            GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS,
-        g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler,"
-            " as long as at least one send request has been sent.",
-            HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD,
-        g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.",
-            HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY,
-        g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. "
-            "Bigger number represent higher priority",
-            HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE,
-        g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. "
-            "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.",
-            HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE,
-        g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto."
-            "Gets values from the enum GstHailoFormatType. ",
-            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE,
-        g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto."
-            "Gets values from the enum GstHailoFormatType. ",
-            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD,
-        g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score<TH is suppressed.",
-            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_NMS_IOU_THRESHOLD,
-        g_param_spec_float("nms-iou-threshold", "NMS IoU threshold", "Intersection over union overlap Threshold, used in the NMS iterative elimination process where potential duplicates of detected items are suppressed.",
-            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_NMS_MAX_PROPOSALS_PER_CLASS,
-        g_param_spec_uint("nms-max-proposals-per-class", "NMS max proposals per class", "Set a limit for the maximum number of boxes per class.",
-            0, std::numeric_limits<uint32_t>::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+static void gst_hailo_allocator_class_init(GstHailoAllocatorClass* klass) {
+    GstAllocatorClass* allocator_class = GST_ALLOCATOR_CLASS(klass);
 
-    // See information about the "flush" signal in the element description
-    g_signal_new(
-        "flush",
-        GST_TYPE_HAILONET,
-        G_SIGNAL_ACTION,
-        0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0
-    );
+    allocator_class->alloc = gst_hailo_allocator_alloc;
+    allocator_class->free = gst_hailo_allocator_free;
 }
 
-std::string create_name(std::string prefix, uint32_t id)
+static void gst_hailo_allocator_init(GstHailoAllocator* allocator) {
+    allocator->buffers = std::unordered_map<GstMemory*, Buffer>();
+}
+
+static hailo_status gst_hailonet_deconfigure(GstHailoNet *self)
 {
-    return prefix + std::to_string(id);
+    // This will wakeup any blocking calls to deuque
+    for (auto &name_pool_pair : self->output_buffer_pools) {
+        gst_buffer_pool_set_flushing(name_pool_pair.second, TRUE);
+    }
+
+    std::unique_lock<std::mutex> lock(self->infer_mutex);
+    self->configured_infer_model.reset();
+    self->is_configured = false;
+    return HAILO_SUCCESS;
 }
 
-Expected<std::unique_ptr<HailoNetImpl>> HailoNetImpl::create(GstHailoNet *element)
+static hailo_status gst_hailonet_free(GstHailoNet *self)
 {
-    if (nullptr == element) {
-        return make_unexpected(HAILO_INVALID_ARGUMENT);
+    std::unique_lock<std::mutex> lock(self->infer_mutex);
+    self->configured_infer_model.reset();
+    self->infer_model.reset();
+    self->vdevice.reset();
+
+    {
+        std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
+        self->is_thread_running = false;
     }
+    self->thread_cv.notify_all();
 
-    auto hailosend_name = create_name("hailosend", HailoNetImpl::m_hailonet_count);
-    GstElement *hailosend = gst_element_factory_make("hailosend", hailosend_name.c_str());
-    if (nullptr == hailosend) {
-        GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating hailosend element in bin!"), (NULL));
-        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    if (self->thread.joinable()) {
+        self->thread.join();
     }
 
-    g_object_set(hailosend, "qos", FALSE, NULL);
+    if (nullptr != self->input_queue) {
+        gst_queue_array_free(self->input_queue);
+    }
 
-    auto hailoqueue_name = create_name("hailoqueue", HailoNetImpl::m_hailonet_count);
-    GstElement *queue = gst_element_factory_make("queue", hailoqueue_name.c_str());
-    if (nullptr == queue) {
-        GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating queue element in bin!"), (NULL));
-        gst_object_unref(hailosend);
-        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    if (nullptr != self->thread_queue) {
+        gst_queue_array_free(self->thread_queue);
     }
 
-    // Passing 0 disables the features here
-    g_object_set(queue, "max-size-time", (guint64)0, NULL);
-    g_object_set(queue, "max-size-bytes", (guint)0, NULL);
-    g_signal_connect(queue, "overrun", G_CALLBACK(gst_hailonet_inner_queue_overrun_callback), nullptr);
-    g_signal_connect(queue, "underrun", G_CALLBACK(gst_hailonet_inner_queue_underrun_callback), nullptr);
+    if (nullptr != self->input_caps) {
+        gst_caps_unref(self->input_caps);
+    }
 
-    auto hailorecv_name = create_name("hailorecv", HailoNetImpl::m_hailonet_count);
-    GstElement *hailorecv = gst_element_factory_make("hailorecv", hailorecv_name.c_str());
-    if (nullptr == hailorecv) {
-        GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating hailorecv element in bin!"), (NULL));
-        gst_object_unref(hailosend);
-        gst_object_unref(queue);
-        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    for (auto &name_pool_pair : self->output_buffer_pools) {
+        gboolean result = gst_buffer_pool_set_active(name_pool_pair.second, FALSE);
+        CHECK(result, HAILO_INTERNAL_FAILURE, "Could not release buffer pool");
+        gst_object_unref(name_pool_pair.second);
+    }
+    if (gst_hailo_should_use_dma_buffers()) {
+        gst_object_unref(self->dma_allocator);
+    } else {
+        gst_object_unref(self->allocator);
     }
 
-    g_object_set(hailorecv, "qos", FALSE, NULL);
+    self->props.free_strings();
 
-    g_signal_connect(element, "flush", G_CALLBACK(gst_hailonet_flush_callback), nullptr);
+    return HAILO_SUCCESS;
+}
 
-    auto was_flushed_event = Event::create_shared(Event::State::not_signalled);
-    GST_CHECK_EXPECTED(was_flushed_event, element, RESOURCE, "Failed allocating memory for event!");
+static hailo_status gst_hailonet_set_format_types(GstHailoNet *self, std::shared_ptr<InferModel> infer_model)
+{
+    if (self->props.m_input_format_type.was_changed()) {
+        for (const auto &input_name : infer_model->get_input_names()) {
+            auto input = infer_model->input(input_name);
+            CHECK_EXPECTED_AS_STATUS(input);
 
-    auto ptr = make_unique_nothrow<HailoNetImpl>(element, hailosend, queue, hailorecv, was_flushed_event.release());
-    if (nullptr == ptr) {
-        return make_unexpected(HAILO_OUT_OF_HOST_MEMORY);
+            input->set_format_type(self->props.m_input_format_type.get());
+        }
     }
+    if (self->props.m_output_format_type.was_changed()) {
+        for (const auto &output_name : infer_model->get_output_names()) {
+            auto output = infer_model->output(output_name);
+            CHECK_EXPECTED_AS_STATUS(output);
 
-    return ptr;
+            output->set_format_type(self->props.m_output_format_type.get());
+        }
+    }
+
+    return HAILO_SUCCESS;
 }
 
-std::atomic_uint32_t HailoNetImpl::m_hailonet_count(0);
-std::mutex HailoNetImpl::m_mutex;
-HailoNetImpl::HailoNetImpl(GstHailoNet *element, GstElement *hailosend, GstElement *queue, GstElement *hailorecv, EventPtr was_flushed_event) :
-    m_element(element), m_props(), m_output_formats(), m_hailosend(hailosend), m_queue(queue), m_hailorecv(hailorecv),
-    m_net_group_handle(nullptr), m_was_configured(false), m_has_called_activate(false),
-    m_was_flushed_event(was_flushed_event), m_pool(nullptr)
+static hailo_status gst_hailonet_set_nms_params(GstHailoNet *self, std::shared_ptr<InferModel> infer_model)
 {
-    GST_DEBUG_CATEGORY_INIT(gst_hailonet_debug_category, "hailonet", 0, "debug category for hailonet element");
+     // Check that if one of the NMS params are changed, we have NMS outputs in the model
+    auto has_nms_output = std::any_of(infer_model->outputs().begin(), infer_model->outputs().end(), [](const auto &output)
+    {
+        return output.is_nms();
+    });
+
+    for (const auto &output_name : infer_model->get_output_names()) {
+        auto output = infer_model->output(output_name);
+        CHECK_EXPECTED_AS_STATUS(output);
 
-    /* gst_bin_add_many cannot fail. I use this function because the elements are created here and does not come from the outside so,
-     * gst_bin_add will not fail */
-    gst_bin_add_many(GST_BIN(m_element), m_hailosend, m_queue, m_hailorecv, NULL);
-    init_ghost_sink();
-    init_ghost_src();
+        if (self->props.m_nms_score_threshold.was_changed()) {
+            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS score threshold is set, but there is no NMS output in this model.");
+            if (output->is_nms()) {
+                output->set_nms_score_threshold(self->props.m_nms_score_threshold.get());
+            }
+        }
+        if (self->props.m_nms_iou_threshold.was_changed()) {
+            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS IoU threshold is set, but there is no NMS output in this model.");
+            if (output->is_nms()) {
+                output->set_nms_iou_threshold(self->props.m_nms_iou_threshold.get());
+            }
+        }
+        if (self->props.m_nms_max_proposals_per_class.was_changed()) {
+            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS max proposals per class is set, but there is no NMS output in this model.");
+            if (output->is_nms()) {
+                output->set_nms_max_proposals_per_class(self->props.m_nms_max_proposals_per_class.get());
+            }
+        }
+    }
 
-    ++m_hailonet_count;
+    return HAILO_SUCCESS;
 }
 
-HailoNetImpl::~HailoNetImpl()
+static hailo_status gst_hailonet_set_scheduler_params(GstHailoNet *self, std::shared_ptr<ConfiguredInferModel> configured_infer_model)
 {
-    if (nullptr != m_pool) {
-        (void)gst_buffer_pool_set_active(m_pool, FALSE);
+    if (self->props.m_scheduler_timeout_ms.was_changed()) {
+        auto millis = std::chrono::milliseconds(self->props.m_scheduler_timeout_ms.get());
+        auto status = configured_infer_model->set_scheduler_timeout(millis);
+        CHECK_SUCCESS(status, "Setting scheduler timeout failed, status = %d", status);
     }
+    if (self->props.m_scheduler_threshold.was_changed()) {
+        auto status = configured_infer_model->set_scheduler_threshold(self->props.m_scheduler_threshold.get());
+        CHECK_SUCCESS(status, "Setting scheduler threshold failed, status = %d", status);
+    }
+    if (self->props.m_scheduler_priority.was_changed()) {
+        auto status = configured_infer_model->set_scheduler_priority(self->props.m_scheduler_priority.get());
+        CHECK_SUCCESS(status, "Setting scheduler priority failed, status = %d", status);
+    }
+
+    return HAILO_SUCCESS;
 }
 
-void HailoNetImpl::init_ghost_sink()
+static Expected<GstBufferPool*> gst_hailonet_create_buffer_pool(GstHailoNet *self, size_t frame_size)
 {
-    GstPad *pad = gst_element_get_static_pad(m_hailosend, "sink");
+    GstBufferPool *pool = gst_buffer_pool_new();
+
+    GstStructure *config = gst_buffer_pool_get_config(pool);
+    gst_buffer_pool_config_set_params(config, nullptr, static_cast<guint>(frame_size), self->props.m_outputs_min_pool_size.get(),
+        self->props.m_outputs_max_pool_size.get());
 
-    GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
-    GstPadTemplate *pad_tmpl = gst_static_pad_template_get(&sink_template);
+    if (gst_hailo_should_use_dma_buffers()) {
+        gst_buffer_pool_config_set_allocator(config, self->dma_allocator, nullptr);
+    } else {
+        gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->allocator), nullptr);
+    }
 
-    GstPad *ghost_pad = gst_ghost_pad_new_from_template("sink", pad, pad_tmpl);
-    gst_pad_set_active(ghost_pad, TRUE);
-    gst_element_add_pad(GST_ELEMENT(m_element), ghost_pad);
+    gboolean result = gst_buffer_pool_set_config(pool, config);
+    CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set config buffer pool");
 
-    gst_pad_add_probe(pad, GST_PAD_PROBE_TYPE_BUFFER, static_cast<GstPadProbeCallback>(gst_hailonet_sink_probe), nullptr, nullptr);
+    result = gst_buffer_pool_set_active(pool, TRUE);
+    CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set buffer pool as active");
 
-    gst_object_unref(pad_tmpl);
-    gst_object_unref(pad);
+    return pool;
 }
 
-void HailoNetImpl::init_ghost_src()
+static hailo_status gst_hailonet_configure(GstHailoNet *self)
 {
-    GstPad *pad = gst_element_get_static_pad(m_hailorecv, "src");
+    if (self->is_configured) {
+        return HAILO_SUCCESS;
+    }
+
+    for (auto &name_pool_pair : self->output_buffer_pools) {
+        gst_buffer_pool_set_flushing(name_pool_pair.second, FALSE);
+    }
+
+    self->infer_model->set_batch_size(self->props.m_batch_size.get());
+
+    auto status = gst_hailonet_set_format_types(self, self->infer_model);
+    CHECK_SUCCESS(status);
+
+    status = gst_hailonet_set_nms_params(self, self->infer_model);
+    CHECK_SUCCESS(status);
+
+    // In RGB formats, Gstreamer is padding each row to 4.
+    for (const auto &input_name : self->infer_model->get_input_names()) {
+        if(self->props.m_no_transform.get()) {
+            // In case transformation is disabled - format order will be the same as we get from the HW (stream info).
+            auto input_stream_infos = self->infer_model->hef().get_stream_info_by_name(input_name, HAILO_H2D_STREAM);
+            CHECK_EXPECTED_AS_STATUS(input_stream_infos);
+            self->infer_model->input(input_name)->set_format_order(input_stream_infos.value().format.order);
+        } else if (self->infer_model->input(input_name)->format().order == HAILO_FORMAT_ORDER_NHWC) {
+            self->infer_model->input(input_name)->set_format_order(HAILO_FORMAT_ORDER_RGB4);
+        }
+    }
+
+    if (self->props.m_no_transform.get()) {
+        for (const auto &output_name : self->infer_model->get_output_names()) {
+            // In case transformation is disabled - format order will be the same as we get from the HW (stream info).
+            auto output_stream_infos = self->infer_model->hef().get_stream_info_by_name(output_name, HAILO_D2H_STREAM);
+            CHECK_EXPECTED_AS_STATUS(output_stream_infos);
+            self->infer_model->output(output_name)->set_format_order(output_stream_infos.value().format.order);
+        }
+    }
 
-    GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
-    GstPadTemplate *pad_tmpl = gst_static_pad_template_get(&src_template);
+    auto configured_infer_model = self->infer_model->configure();
+    CHECK_EXPECTED_AS_STATUS(configured_infer_model);
 
-    GstPad *ghost_pad = gst_ghost_pad_new_from_template("src", pad, pad_tmpl);
-    gst_pad_set_active(ghost_pad, TRUE);
-    gst_element_add_pad(GST_ELEMENT(m_element), ghost_pad);
+    auto ptr = make_shared_nothrow<ConfiguredInferModel>(configured_infer_model.release());
+    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
+    self->configured_infer_model = ptr;
 
-    gst_pad_set_event_function(pad, gst_hailorecv_src_pad_event);
+    status = gst_hailonet_set_scheduler_params(self, self->configured_infer_model);
+    CHECK_SUCCESS(status);
 
-    gst_object_unref(pad_tmpl);
-    gst_object_unref(pad);
+    self->is_configured = true;
+    return HAILO_SUCCESS;
 }
 
-void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec)
+static hailo_status gst_hailonet_allocate_infer_resources(GstHailoNet *self)
 {
-    GST_DEBUG_OBJECT(m_element, "set_property");
+    auto bindings = self->configured_infer_model->create_bindings();
+    CHECK_EXPECTED_AS_STATUS(bindings);
+    self->infer_bindings = std::move(bindings.release());
+
+    self->output_buffer_pools = std::unordered_map<std::string, GstBufferPool*>();
+    self->output_vstream_infos = std::unordered_map<std::string, hailo_vstream_info_t>();
+
+    auto async_queue_size = self->configured_infer_model->get_async_queue_size();
+    CHECK_EXPECTED_AS_STATUS(async_queue_size);
+    self->input_queue = gst_queue_array_new(static_cast<guint>(async_queue_size.value()));
+    self->thread_queue = gst_queue_array_new(static_cast<guint>(async_queue_size.value()));
+    self->is_thread_running = true;
+    self->thread = std::thread([self] () {
+        while (self->is_thread_running) {
+            GstBuffer *buffer = nullptr;
+            {
+                std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
+                self->thread_cv.wait(lock, [self] () {
+                    return (self->buffers_in_thread_queue > 0) || !self->is_thread_running;
+                });
+                if (!self->is_thread_running) {
+                    break;
+                }
 
-    if ((object == nullptr) || (value == nullptr) || (pspec == nullptr)) {
-        g_error("set_property got null parameter!");
-        return;
+                buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->thread_queue));
+                self->buffers_in_thread_queue--;
+            }
+            self->thread_cv.notify_all();
+            if (GST_IS_PAD(self->srcpad)) { // Checking because we fail here when exiting the application
+                GstFlowReturn ret = gst_pad_push(self->srcpad, buffer);
+                if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && (!self->has_got_eos)) {
+                    ERROR("gst_pad_push failed with status = %d\n", ret);
+                    break;
+                }
+            }
+        }
+    });
+
+    for (auto &output : self->infer_model->outputs()) {
+        auto buffer_pool = gst_hailonet_create_buffer_pool(self, output.get_frame_size());
+        CHECK_EXPECTED_AS_STATUS(buffer_pool);
+
+        self->output_buffer_pools[output.name()] = buffer_pool.release();
     }
 
-    switch (property_id) {
-    case PROP_DEBUG:
+    auto vstream_infos = self->infer_model->hef().get_output_vstream_infos();
+    CHECK_EXPECTED_AS_STATUS(vstream_infos);
+
+    for (const auto &vstream_info : vstream_infos.value()) {
+        self->output_vstream_infos[vstream_info.name] = vstream_info;
+    }
+
+    return HAILO_SUCCESS;
+}
+
+static GstStateChangeReturn gst_hailonet_change_state(GstElement *element, GstStateChange transition)
+{
+    GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_hailonet_parent_class)->change_state(element, transition);
+    if (GST_STATE_CHANGE_FAILURE == ret) {
+        return ret;
+    }
+
+    GstHailoNet *self = GST_HAILONET(element);
+    std::unique_lock<std::mutex> lock(self->sink_probe_change_state_mutex);
+
+    switch (transition) {
+    case GST_STATE_CHANGE_PAUSED_TO_PLAYING:
+    {
+        auto status = gst_hailonet_configure(self);
+        if (HAILO_SUCCESS != status) {
+            return GST_STATE_CHANGE_FAILURE;
+        }
+        break;
+    }
+    case GST_STATE_CHANGE_PLAYING_TO_PAUSED:
+    {
+        auto status = gst_hailonet_deconfigure(self);
+        if (HAILO_SUCCESS != status) {
+            return GST_STATE_CHANGE_FAILURE;
+        }
+        break;
+    }
+    case GST_STATE_CHANGE_READY_TO_NULL:
     {
-        gboolean debug = g_value_get_boolean(value);
-        g_object_set(m_hailosend, "debug", debug, NULL);
-        g_object_set(m_hailorecv, "debug", debug, NULL);
+        auto status = gst_hailonet_free(self);
+        if (HAILO_SUCCESS != status) {
+            return GST_STATE_CHANGE_FAILURE;
+        }
         break;
     }
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static hailo_status gst_hailonet_toggle_activation(GstHailoNet *self, gboolean old_is_active, gboolean new_is_active)
+{
+    std::unique_lock<std::mutex> lock(self->infer_mutex);
+
+    if (self->props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get())) {
+        g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
+        return HAILO_INVALID_OPERATION;
+    }
+
+    if (self->has_called_activate) {
+        // Should we keep this? If the user changes the is-active property when we are not configured, it's his fault.
+        if (!self->is_configured) {
+            g_warning("Trying to change is-active property when network is not configured!");
+            return HAILO_INVALID_OPERATION;
+        }
+        if (old_is_active && !new_is_active) {
+            self->configured_infer_model->deactivate();
+        } else if (!old_is_active && new_is_active) {
+            auto status = self->configured_infer_model->activate();
+            CHECK_SUCCESS(status);
+        } else {
+            g_warning("Trying to change is-active property from %d to %d", old_is_active, new_is_active);
+        }
+    }
+
+    self->props.m_is_active = new_is_active;
+    return HAILO_SUCCESS;
+}
+
+static void gst_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec)
+{
+    GstHailoNet *self = GST_HAILONET(object);
+    switch (property_id) {
+    case PROP_HEF_PATH:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the HEF path will not take place!");
+            break;
+        }
+        if (nullptr != self->props.m_hef_path.get()) {
+            g_free(self->props.m_hef_path.get());
+        }
+        self->props.m_hef_path = g_strdup(g_value_get_string(value));
+        break;
+    case PROP_BATCH_SIZE:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the batch size will not take place!");
+            break;
+        }
+        self->props.m_batch_size = static_cast<guint16>(g_value_get_uint(value));
+        break;
     case PROP_DEVICE_ID:
-        if (0 != m_props.m_device_count.get()) {
+        if (0 != self->props.m_device_count.get()) {
             g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
-                g_value_get_string(value), m_props.m_device_count.get());
+                g_value_get_string(value), self->props.m_device_count.get());
             break;
         }
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the device ID will not take place!");
             break;
         }
-        if (nullptr != m_props.m_device_id.get()) {
-            g_free(m_props.m_device_id.get());
+        if (nullptr != self->props.m_device_id.get()) {
+            g_free(self->props.m_device_id.get());
         }
-        m_props.m_device_id = g_strdup(g_value_get_string(value));
+        self->props.m_device_id = g_strdup(g_value_get_string(value));
         break;
     case PROP_DEVICE_COUNT:
-        if (nullptr != m_props.m_device_id.get()) {
+        if (nullptr != self->props.m_device_id.get()) {
             g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
-                m_props.m_device_id.get(), g_value_get_uint(value));
+                self->props.m_device_id.get(), g_value_get_uint(value));
             break;
         }
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the device count will not take place!");
             break;
         }
-        m_props.m_device_count = static_cast<guint16>(g_value_get_uint(value));
+        self->props.m_device_count = static_cast<guint16>(g_value_get_uint(value));
         break;
-    case PROP_VDEVICE_KEY:
-        if (m_was_configured) {
-            g_warning("The network was already configured so changing the vdevice key will not take place!");
+    case PROP_VDEVICE_GROUP_ID:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the vdevice group ID will not take place!");
             break;
         }
-        m_props.m_vdevice_key = static_cast<guint32>(g_value_get_uint(value));
-        break;
-    case PROP_HEF_PATH:
-        if (m_was_configured) {
-            g_warning("The network was already configured so changing the HEF path will not take place!");
-            break;
+        if (nullptr != self->props.m_vdevice_group_id.get()) {
+            g_free(self->props.m_vdevice_group_id.get());
         }
-        if (nullptr != m_props.m_hef_path.get()) {
-            g_free(m_props.m_hef_path.get());
-        }
-        m_props.m_hef_path = g_strdup(g_value_get_string(value));
+        self->props.m_vdevice_group_id = g_strdup(g_value_get_string(value));
         break;
-    case PROP_NETWORK_NAME:
-        if (m_was_configured) {
-            g_warning("The network was already configured so changing the network name will not take place!");
-            break;
-        }
-        if (nullptr != m_props.m_network_name.get()) {
-            g_free(m_props.m_network_name.get());
-        }
-        m_props.m_network_name = g_strdup(g_value_get_string(value));
+    case PROP_IS_ACTIVE:
+        (void)gst_hailonet_toggle_activation(self, self->props.m_is_active.get(), g_value_get_boolean(value));
         break;
-    case PROP_BATCH_SIZE:
-        if (m_was_configured) {
-            g_warning("The network was already configured so changing the batch size will not take place!");
-            break;
-        }
-        m_props.m_batch_size = static_cast<guint16>(g_value_get_uint(value));
+    case PROP_PASS_THROUGH:
+        self->props.m_pass_through = g_value_get_boolean(value);
+        break;
+    case PROP_FORCE_WRITABLE:
+        self->props.m_should_force_writable = g_value_get_boolean(value);
         break;
     case PROP_OUTPUTS_MIN_POOL_SIZE:
-        if (m_was_configured) {
-            g_warning("The network was already configured so changing the outputs minimum pool size will not take place!");
+        if (self->is_configured) {
+            g_warning("The network has already been configured, the output's minimum pool size cannot be changed!");
             break;
         }
-        g_object_set(m_hailorecv, "outputs-min-pool-size", g_value_get_uint(value), NULL);
+        self->props.m_outputs_min_pool_size = g_value_get_uint(value);
         break;
     case PROP_OUTPUTS_MAX_POOL_SIZE:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the outputs maximum pool size will not take place!");
             break;
         }
-        g_object_set(m_hailorecv, "outputs-max-pool-size", g_value_get_uint(value), NULL);
-        break;
-    case PROP_IS_ACTIVE:
-    {
-        gboolean new_is_active = g_value_get_boolean(value);
-
-        if (m_props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != m_props.m_scheduling_algorithm.get())) {
-            g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
-            break;
-        }
-
-        if (m_has_called_activate) {
-            if (m_props.m_is_active.get() && !new_is_active) {
-                // Setting this to false before deactivating to signal hailosend and hailorecv to stop inferring
-                m_props.m_is_active = false;
-                hailo_status status = deactivate_network_group();
-                if (HAILO_SUCCESS != status) {
-                    g_error("Deactivating network group failed, status = %d", status);
-                    return;
-                }
-            } else if (!m_props.m_is_active.get() && new_is_active) {
-                hailo_status status = m_net_group_handle->activate_network_group();
-                if (HAILO_SUCCESS != status) {
-                    g_error("Failed activating network group, status = %d", status);
-                    break;
-                }
-                m_props.m_is_active = true;
-            } else {
-                g_warning("Trying to change is-active property state from %d to %d", m_props.m_is_active.get(), new_is_active);
-                break;
-            }
-        } else {
-            m_props.m_is_active = new_is_active;
-        }
+        self->props.m_outputs_max_pool_size = g_value_get_uint(value);
         break;
-    }
     case PROP_SCHEDULING_ALGORITHM:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the scheduling algorithm will not take place!");
             break;
         }
-        if (m_props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) {
+        if (self->props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) {
             g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
             break;
         }
-        m_props.m_scheduling_algorithm = static_cast<hailo_scheduling_algorithm_t>(g_value_get_enum(value));
+        self->props.m_scheduling_algorithm = static_cast<hailo_scheduling_algorithm_t>(g_value_get_enum(value));
         break;
     case PROP_SCHEDULER_TIMEOUT_MS:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the scheduling timeout will not take place!");
             break;
         }
-        if (m_props.m_is_active.was_changed()) {
-            g_error("scheduler usage (scheduler-timeout-ms) in combination with 'is-active' is not supported.");
-            break;
-        }
-        m_props.m_scheduler_timeout_ms = g_value_get_uint(value);
+        self->props.m_scheduler_timeout_ms = g_value_get_uint(value);
         break;
     case PROP_SCHEDULER_THRESHOLD:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the scheduling threshold will not take place!");
             break;
         }
-        if (m_props.m_is_active.was_changed()) {
-            g_error("scheduler usage (scheduler-threshold) in combination with 'is-active' is not supported.");
-            break;
-        }
-        m_props.m_scheduler_threshold = g_value_get_uint(value);
+        self->props.m_scheduler_threshold = g_value_get_uint(value);
         break;
     case PROP_SCHEDULER_PRIORITY:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the scheduling priority will not take place!");
             break;
         }
-        if (m_props.m_is_active.was_changed()) {
-            g_error("scheduler usage (scheduler-priority) in combination with 'is-active' is not supported.");
-            break;
-        }
-        m_props.m_scheduler_priority = static_cast<guint8>(g_value_get_uint(value));
-        break;
-    case PROP_MULTI_PROCESS_SERVICE:
-        if (m_was_configured) {
-            g_warning("The network was already configured so changing the multi-process-service property will not take place!");
-            break;
-        }
-        m_props.m_multi_process_service = g_value_get_boolean(value);
+        self->props.m_scheduler_priority = static_cast<guint8>(g_value_get_uint(value));
         break;
     case PROP_INPUT_FORMAT_TYPE:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the format type will not take place!");
             break;
         }
-        m_props.m_input_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
+        self->props.m_input_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
         break;
     case PROP_OUTPUT_FORMAT_TYPE:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the format type will not take place!");
             break;
         }
-        m_props.m_output_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
+        self->props.m_output_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
         break;
     case PROP_NMS_SCORE_THRESHOLD:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the score threshold will not take place!");
             break;
         }
-        m_props.m_nms_score_threshold = static_cast<gfloat>(g_value_get_float(value));
+        self->props.m_nms_score_threshold = static_cast<gfloat>(g_value_get_float(value));
         break;
     case PROP_NMS_IOU_THRESHOLD:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the IoU threshold will not take place!");
             break;
         }
-        m_props.m_nms_iou_threshold = static_cast<gfloat>(g_value_get_float(value));
+        self->props.m_nms_iou_threshold = static_cast<gfloat>(g_value_get_float(value));
         break;
     case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
-        if (m_was_configured) {
+        if (self->is_configured) {
             g_warning("The network was already configured so changing the max proposals per class will not take place!");
             break;
         }
-        m_props.m_nms_max_proposals_per_class = static_cast<guint32>(g_value_get_uint(value));
+        self->props.m_nms_max_proposals_per_class = static_cast<guint32>(g_value_get_uint(value));
+        break;
+    case PROP_INPUT_FROM_META:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the input method will not take place!");
+            break;
+        }
+        self->props.m_input_from_meta = g_value_get_boolean(value);
+        break;
+    case PROP_NO_TRANSFORM:
+        if (self->is_configured) {
+            g_warning("The network was already configured so disabling the transformation will not take place!");
+        }
+        self->props.m_no_transform = g_value_get_boolean(value);
+        break;
+    case PROP_MULTI_PROCESS_SERVICE:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the multi-process-service property will not take place!");
+            break;
+        }
+        self->props.m_multi_process_service = g_value_get_boolean(value);
+        break;
+    
+    // Deprecated
+    case PROP_VDEVICE_KEY:
+        if (self->is_configured) {
+            g_warning("The network was already configured so changing the vdevice key will not take place!");
+            break;
+        }
+        self->props.m_vdevice_key = static_cast<guint32>(g_value_get_uint(value));
+        break;
+    default:
+        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
+        break;
+    }
+}
+
+static void gst_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec)
+{
+    GstHailoNet *self = GST_HAILONET(object);
+    switch (property_id) {
+    case PROP_HEF_PATH:
+        g_value_set_string(value, self->props.m_hef_path.get());
+        break;
+    case PROP_BATCH_SIZE:
+        g_value_set_uint(value, self->props.m_batch_size.get());
+        break;
+    case PROP_DEVICE_ID:
+        g_value_set_string(value, self->props.m_device_id.get());
+        break;
+    case PROP_DEVICE_COUNT:
+        g_value_set_uint(value, self->props.m_device_count.get());
+        break;
+    case PROP_VDEVICE_GROUP_ID:
+        g_value_set_string(value, self->props.m_vdevice_group_id.get());
+        break;
+    case PROP_IS_ACTIVE:
+        g_value_set_boolean(value, self->props.m_is_active.get());
+        break;
+    case PROP_PASS_THROUGH:
+        g_value_set_boolean(value, self->props.m_pass_through.get());
+        break;
+    case PROP_FORCE_WRITABLE:
+        g_value_set_boolean(value, self->props.m_should_force_writable.get());
+        break;
+    case PROP_OUTPUTS_MIN_POOL_SIZE:
+        g_value_set_uint(value, self->props.m_outputs_min_pool_size.get());
+        break;
+    case PROP_OUTPUTS_MAX_POOL_SIZE:
+        g_value_set_uint(value, self->props.m_outputs_max_pool_size.get());
+        break;
+    case PROP_SCHEDULING_ALGORITHM:
+        g_value_set_enum(value, self->props.m_scheduling_algorithm.get());
+        break;
+    case PROP_SCHEDULER_TIMEOUT_MS:
+        g_value_set_uint(value, self->props.m_scheduler_timeout_ms.get());
+        break;
+    case PROP_SCHEDULER_THRESHOLD:
+        g_value_set_uint(value, self->props.m_scheduler_threshold.get());
+        break;
+    case PROP_SCHEDULER_PRIORITY:
+        g_value_set_uint(value, self->props.m_scheduler_priority.get());
+        break;
+    case PROP_INPUT_FORMAT_TYPE:
+        g_value_set_enum(value, self->props.m_input_format_type.get());
+        break;
+    case PROP_OUTPUT_FORMAT_TYPE:
+        g_value_set_enum(value, self->props.m_output_format_type.get());
+        break;
+    case PROP_NMS_SCORE_THRESHOLD:
+        g_value_set_float(value, self->props.m_nms_score_threshold.get());
+        break;
+    case PROP_NMS_IOU_THRESHOLD:
+        g_value_set_float(value, self->props.m_nms_iou_threshold.get());
+        break;
+    case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
+        g_value_set_uint(value, self->props.m_nms_max_proposals_per_class.get());
+        break; 
+    case PROP_INPUT_FROM_META:
+        g_value_set_boolean(value, self->props.m_input_from_meta.get());
+        break;
+    case PROP_NO_TRANSFORM:
+        g_value_set_boolean(value, self->props.m_no_transform.get());
+        break;
+    case PROP_MULTI_PROCESS_SERVICE:
+        g_value_set_boolean(value, self->props.m_multi_process_service.get());
+        break;
+    
+    // Deprecated
+    case PROP_VDEVICE_KEY:
+        g_value_set_uint(value, self->props.m_vdevice_key.get());
         break;
     default:
         G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
@@ -517,476 +711,763 @@ void HailoNetImpl::set_property(GObject *object, guint property_id, const GValue
     }
 }
 
-void HailoNetImpl::get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec)
+static void gst_hailonet_class_init(GstHailoNetClass *klass)
+{
+    GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
+    GstElementClass *element_class = GST_ELEMENT_CLASS(klass);
+
+    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template));
+    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template));
+    element_class->change_state = gst_hailonet_change_state;
+
+    gst_element_class_set_static_metadata(element_class,
+        "hailonet element", "Hailo/Network",
+        "Configure and Activate Hailo Network. "
+            "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. "
+            "When deactivating a hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the "
+            "hailonet, since this operation waits until there are no frames coming in.",
+        PLUGIN_AUTHOR);
+
+    gobject_class->set_property = gst_hailonet_set_property;
+    gobject_class->get_property = gst_hailonet_get_property;
+    g_object_class_install_property(gobject_class, PROP_HEF_PATH,
+        g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", nullptr,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_BATCH_SIZE,
+        g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch",
+            MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE,
+        g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer",
+            0, std::numeric_limits<uint32_t>::max(), MIN_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE,
+        g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size",
+            "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits<uint32_t>::max(),
+            MAX_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_DEVICE_ID,
+        g_param_spec_string("device-id", "Device ID", "Device ID ([<domain>]:<bus>:<device>.<func>, same as in lspci command). Excludes device-count.", NULL,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT,
+        g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT,
+            std::numeric_limits<uint16_t>::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_VDEVICE_GROUP_ID,
+        g_param_spec_string("vdevice-group-id",
+            "VDevice Group ID to share vdevices across hailonets",
+            "Used to share VDevices across different hailonet instances", HAILO_DEFAULT_VDEVICE_GROUP_ID,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    // TODO (HRT-12306): Change is-active behavior
+    g_object_class_install_property(gobject_class, PROP_IS_ACTIVE,
+        g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. "
+            "By default, the hailonet element will not be active unless it is the only one. "
+            "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_PASS_THROUGH,
+        g_param_spec_boolean("pass-through", "Is element pass-through", "Controls whether the element will perform inference or simply pass buffers through. "
+            "By default, the hailonet element will not be pass-through. "
+            "Setting this property to true disables inference, regardless of the scheduler settings.", false,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_FORCE_WRITABLE,
+        g_param_spec_boolean("force-writable", "Force writable", "Controls whether the element will force the input buffer to be writable. "
+            "We force the input to be writable with the function gst_buffer_make_writable, which in most cases will do a shallow copy of the buffer. "
+            "But in some cases (when the buffer is marked as not shared - see gst_buffer_copy documentation), it will do a deep copy."
+            "By default, the hailonet element will not force the input buffer to be writable and will raise an error when the buffer is read-only.", false,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM,
+        g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. "
+            "Gets values from the enum GstHailoSchedulingAlgorithms. "
+            "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. "
+            "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ",
+            GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS,
+        g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler,"
+            " as long as at least one send request has been sent.",
+            HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD,
+        g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.",
+            HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY,
+        g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. "
+            "Bigger number represent higher priority",
+            HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE,
+        g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto."
+            "Gets values from the enum GstHailoFormatType. ",
+            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE,
+        g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto."
+            "Gets values from the enum GstHailoFormatType. ",
+            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_INPUT_FROM_META,
+        g_param_spec_boolean("input-from-meta", "Enable input from meta", "Take network input from metadata instead of video frame.", false,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NO_TRANSFORM,
+        g_param_spec_boolean("no-transform", "Disable transformations", "Format will remain the same as the HW format.", false,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD,
+        g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score<TH is suppressed.",
+            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NMS_IOU_THRESHOLD,
+        g_param_spec_float("nms-iou-threshold", "NMS IoU threshold", "Intersection over union overlap Threshold, used in the NMS iterative elimination process where potential duplicates of detected items are suppressed.",
+            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NMS_MAX_PROPOSALS_PER_CLASS,
+        g_param_spec_uint("nms-max-proposals-per-class", "NMS max proposals per class", "Set a limit for the maximum number of boxes per class.",
+            0, std::numeric_limits<uint32_t>::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE,
+        g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. "
+            "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.",
+            HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    // Deprecated
+    g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY,
+        g_param_spec_uint("vdevice-key",
+            "Deprecated: Indicate whether to re-use or re-create vdevice",
+            "Deprecated: Use vdevice-group-id instead. Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \
+            "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets",
+            MIN_VALID_VDEVICE_KEY, std::numeric_limits<uint32_t>::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    // See information about the "flush" signal in the element description
+    g_signal_new(
+        "flush",
+        GST_TYPE_HAILONET,
+        G_SIGNAL_ACTION,
+        0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0
+    );
+}
+
+static void gst_hailonet_push_buffer_to_thread(GstHailoNet *self, GstBuffer *buffer)
 {
-    GST_DEBUG_OBJECT(m_element, "get_property");
-
-    if ((object == nullptr) || (value == nullptr) || (pspec == nullptr)) {
-        g_error("get_property got null parameter!");
-        return;
+    {
+        std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
+        self->thread_cv.wait(lock, [self] () {
+            bool is_unlimited_pool_not_empty = (self->props.m_outputs_max_pool_size.get() == 0) && (self->buffers_in_thread_queue < MAX_OUTPUTS_POOL_SIZE);
+            bool is_pool_empty = self->buffers_in_thread_queue < self->props.m_outputs_max_pool_size.get();
+            return is_unlimited_pool_not_empty || is_pool_empty;
+        });
+        gst_queue_array_push_tail(self->thread_queue, buffer);
+        self->buffers_in_thread_queue++;
     }
+    self->thread_cv.notify_all();
+}
 
-    switch (property_id) {
-    case PROP_DEBUG:
-    {
-        gboolean debug;
-        g_object_get(m_hailosend, "debug", &debug, nullptr);
-        g_value_set_boolean(value, debug);
-        break;
+// TODO: This function should be refactored. It does many unrelated things and the user need to know that he should unmap the buffer
+// in case of an error. AND it does not print errors nor return an indicative status (also the comments are confusing - "continue"?)
+static bool set_infos(GstParentBufferMeta *parent_buffer_meta, hailo_vstream_info_t &vstream_info, GstMapInfo &info)
+{
+    gboolean map_succeeded = gst_buffer_map(parent_buffer_meta->buffer, &info, GST_MAP_READ);
+    if (!map_succeeded) {
+        // Failed to map, this buffer might not have a GstHailoTensorMeta, continue
+        return false;
     }
-    case PROP_DEVICE_ID:
-        g_value_set_string(value, m_props.m_device_id.get());
-        break;
-    case PROP_DEVICE_COUNT:
-        g_value_set_uint(value, m_props.m_device_count.get());
-        break;
-    case PROP_VDEVICE_KEY:
-        g_value_set_uint(value, m_props.m_vdevice_key.get());
-        break;
-    case PROP_HEF_PATH:
-        g_value_set_string(value, m_props.m_hef_path.get());
-        break;
-    case PROP_NETWORK_NAME:
-        g_value_set_string(value, m_props.m_network_name.get());
-        break;
-    case PROP_BATCH_SIZE:
-        g_value_set_uint(value, m_props.m_batch_size.get());
-        break;
-    case PROP_OUTPUTS_MIN_POOL_SIZE:
-    {
-        guint outputs_min_pool_size;
-        g_object_get(m_hailorecv, "outputs-min-pool-size", &outputs_min_pool_size, nullptr);
-        g_value_set_uint(value, outputs_min_pool_size);
-        break;
+    GstHailoTensorMeta *tensor_meta = GST_TENSOR_META_GET(parent_buffer_meta->buffer);
+    if (!tensor_meta) {
+        // Not a tensor meta (this buffer is not a tensor), unmap and continue
+        gst_buffer_unmap(parent_buffer_meta->buffer, &info);
+        return false;
     }
-    case PROP_OUTPUTS_MAX_POOL_SIZE:
-    {
-        guint outputs_max_pool_size;
-        g_object_get(m_hailorecv, "outputs-max-pool-size", &outputs_max_pool_size, nullptr);
-        g_value_set_uint(value, outputs_max_pool_size);
-        break;
+    vstream_info = tensor_meta->info;
+    return true;
+}
+
+static Expected<std::unordered_map<std::string, hailo_dma_buffer_t>> gst_hailonet_read_input_dma_buffers_from_meta(GstHailoNet *self, GstBuffer *buffer)
+{
+    std::unordered_map<std::string, hailo_dma_buffer_t> input_buffer_metas;
+    gpointer state = NULL;
+    GstMeta *meta;
+
+    while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) {
+        GstParentBufferMeta *parent_buffer_meta = reinterpret_cast<GstParentBufferMeta*>(meta);
+        GstMapInfo info;
+        hailo_vstream_info_t vstream_info;
+        bool result = set_infos(parent_buffer_meta, vstream_info, info);
+        if (result) {
+            CHECK_AS_EXPECTED(gst_is_dmabuf_memory(info.memory), HAILO_INTERNAL_FAILURE, "GstMemory is not a DMA buf as expected!");
+
+            int fd = gst_fd_memory_get_fd(info.memory);
+            CHECK_AS_EXPECTED(fd != -1, HAILO_INTERNAL_FAILURE, "Failed to get FD from GstMemory!");
+
+            hailo_dma_buffer_t dma_buffer = {fd, info.size};
+            input_buffer_metas[vstream_info.name] = dma_buffer;
+            gst_buffer_unmap(parent_buffer_meta->buffer, &info);
+        }
     }
-    case PROP_IS_ACTIVE:
-        g_value_set_boolean(value, m_props.m_is_active.get());
-        break;
-    case PROP_SCHEDULING_ALGORITHM:
-        g_value_set_enum(value, m_props.m_scheduling_algorithm.get());
-        break;
-    case PROP_SCHEDULER_TIMEOUT_MS:
-        g_value_set_uint(value, m_props.m_scheduler_timeout_ms.get());
-        break;
-    case PROP_SCHEDULER_THRESHOLD:
-        g_value_set_uint(value, m_props.m_scheduler_threshold.get());
-        break;
-    case PROP_SCHEDULER_PRIORITY:
-        g_value_set_uint(value, m_props.m_scheduler_priority.get());
-        break;
-    case PROP_MULTI_PROCESS_SERVICE:
-        g_value_set_boolean(value, m_props.m_multi_process_service.get());
-        break;
-    case PROP_INPUT_FORMAT_TYPE:
-        g_value_set_enum(value, m_props.m_input_format_type.get());
-        break;
-    case PROP_OUTPUT_FORMAT_TYPE:
-        g_value_set_enum(value, m_props.m_output_format_type.get());
-        break;
-    case PROP_NMS_SCORE_THRESHOLD:
-        g_value_set_float(value, m_props.m_nms_score_threshold.get());
-        break;
-    case PROP_NMS_IOU_THRESHOLD:
-        g_value_set_float(value, m_props.m_nms_iou_threshold.get());
-        break;
-    case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
-        g_value_set_uint(value, m_props.m_nms_max_proposals_per_class.get());
-        break;
-    default:
-        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
-        break;
+    CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!");
+
+    for (auto &input : self->infer_model->inputs()) {
+        CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(),
+            HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for input: %s", input.name().c_str());
     }
+
+    return input_buffer_metas;
 }
 
-hailo_status HailoNetImpl::set_hef()
+static hailo_status gst_hailonet_fill_multiple_input_bindings_dma_buffers(GstHailoNet *self, GstBuffer *buffer)
 {
-    m_net_group_handle = make_unique_nothrow<NetworkGroupHandle>(GST_ELEMENT(m_element));
-    GST_CHECK(nullptr != m_net_group_handle, HAILO_OUT_OF_HOST_MEMORY, m_element, RESOURCE, "Failed allocating memory for network handle!");
-
-    hailo_status status = m_net_group_handle->set_hef(m_props.m_device_id.get(), m_props.m_device_count.get(),
-        m_props.m_vdevice_key.get(), m_props.m_scheduling_algorithm.get(), static_cast<bool>(m_props.m_multi_process_service.get()),
-        m_props.m_hef_path.get());
-    if (HAILO_SUCCESS != status) {
-        return status;
+    auto input_buffers = gst_hailonet_read_input_dma_buffers_from_meta(self, buffer);
+    CHECK_EXPECTED_AS_STATUS(input_buffers);
+    for (const auto &name : self->infer_model->get_input_names())
+    {
+        auto status = self->infer_bindings.input(name)->set_dma_buffer(input_buffers.value().at(name));
+        CHECK_SUCCESS(status);
     }
 
-    if (m_props.m_multi_process_service.get()) {
-        GST_CHECK(m_props.m_scheduling_algorithm.get() != HAILO_SCHEDULING_ALGORITHM_NONE,
-            HAILO_INVALID_OPERATION, m_element, RESOURCE, "To use multi-process-service please set scheduling-algorithm.");
+    return HAILO_SUCCESS;
+}
+
+static Expected<std::unordered_map<std::string, uint8_t*>> gst_hailonet_read_input_buffers_from_meta(GstHailoNet *self, GstBuffer *buffer)
+{
+    std::unordered_map<std::string, uint8_t*> input_buffer_metas;
+    gpointer state = NULL;
+    GstMeta *meta;
+
+    while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) {
+        GstParentBufferMeta *parent_buffer_meta = reinterpret_cast<GstParentBufferMeta*>(meta);
+        GstMapInfo info;
+        hailo_vstream_info_t vstream_info;
+        bool result = set_infos(parent_buffer_meta, vstream_info, info);
+        if (result) {
+            input_buffer_metas[vstream_info.name] = static_cast<uint8_t*>(info.data);
+            gst_buffer_unmap(parent_buffer_meta->buffer, &info);
+        }
     }
+    CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!");
 
-    if (nullptr == m_props.m_network_name.get()) {
-        // TODO: HRT-4957
-        GST_CHECK(m_net_group_handle->hef()->get_network_groups_names().size() == 1, HAILO_INVALID_ARGUMENT, m_element, RESOURCE,
-            "Network group has to be specified when there are more than one network groups in the HEF!");
-        auto network_group_name = m_net_group_handle->hef()->get_network_groups_names()[0];
+    for (auto &input : self->infer_model->inputs()) {
+        CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(),
+            HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for input: %s", input.name().c_str());
+    }
 
-        auto networks_infos = m_net_group_handle->hef()->get_network_infos(network_group_name.c_str());
-        GST_CHECK_EXPECTED_AS_STATUS(networks_infos, m_element, RESOURCE, "Getting network infos from network group name was failed, status %d", networks_infos.status());
-        GST_CHECK(networks_infos.value().size() == 1, HAILO_INVALID_ARGUMENT, m_element, RESOURCE,
-            "Network has to be specified when there are more than one network in the network group!");
+    return input_buffer_metas;
+}
 
-        std::string default_ng_name = HailoRTDefaults::get_network_name(network_group_name);
-        m_props.m_network_name = g_strdup(default_ng_name.c_str());
+static hailo_status gst_hailonet_fill_multiple_input_bindings(GstHailoNet *self, GstBuffer *buffer)
+{
+    auto input_buffers = gst_hailonet_read_input_buffers_from_meta(self, buffer);
+    CHECK_EXPECTED_AS_STATUS(input_buffers);
+    for (const auto &name : self->infer_model->get_input_names()) {
+        auto status = self->infer_bindings.input(name)->set_buffer(MemoryView(input_buffers.value().at(name),
+            self->infer_model->input(name)->get_frame_size()));
+        CHECK_SUCCESS(status);
     }
 
-    auto input_vstream_infos = m_net_group_handle->hef()->get_input_vstream_infos(m_props.m_network_name.get());
-    GST_CHECK_EXPECTED_AS_STATUS(input_vstream_infos, m_element, RESOURCE, "Getting input vstream infos from HEF has failed, status = %d",
-        input_vstream_infos.status());
+    return HAILO_SUCCESS;
+}
 
-    // TODO: HRT-4095
-    GST_CHECK(1 == input_vstream_infos->size(), HAILO_INVALID_OPERATION, m_element, RESOURCE, "hailonet element supports only HEFs with one input for now!");
+static hailo_status gst_hailonet_push_buffer_to_input_queue(GstHailoNet *self, GstBuffer *buffer)
+{
+    std::unique_lock<std::mutex> lock(self->input_queue_mutex);
+    gst_queue_array_push_tail(self->input_queue, buffer);
 
-    auto input_vstream_info = input_vstream_infos.value()[0];
-    GST_HAILOSEND(m_hailosend)->impl->set_input_vstream_infos(input_vstream_infos.release());
-    GST_HAILOSEND(m_hailosend)->impl->set_batch_size(m_props.m_batch_size.get());
+    return HAILO_SUCCESS;
+}
 
-    GstBufferPool *pool = gst_buffer_pool_new();
-    GstStructure *config = gst_buffer_pool_get_config(pool);
+Expected<std::unordered_map<std::string, TensorInfo>> gst_hailonet_fill_output_bindings(GstHailoNet *self)
+{
+    std::unordered_map<std::string, TensorInfo> tensors;
+    for (auto &output : self->infer_model->outputs()) {
+        GstBuffer *output_buffer = nullptr;
+        GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr);
+        if (GST_FLOW_FLUSHING == flow_result) {
+            return make_unexpected(HAILO_STREAM_ABORT);
+        } else {
+            CHECK_AS_EXPECTED(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, "Acquire buffer failed! flow status = %d", flow_result);
+        }
 
-    auto frame_size = HailoRTCommon::get_frame_size(input_vstream_info, input_vstream_info.format);
-    gst_buffer_pool_config_set_params(config, nullptr, frame_size, 1, 1);
+        GstMapInfo buffer_info;
+        gboolean result = gst_buffer_map(output_buffer, &buffer_info, GST_MAP_WRITE);
+        CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!");
 
-    gboolean result = gst_buffer_pool_set_config(pool, config);
-    GST_CHECK(result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Could not set config buffer pool");
+        if (gst_hailo_should_use_dma_buffers()) {
+            CHECK_AS_EXPECTED(gst_is_dmabuf_memory(buffer_info.memory), HAILO_INTERNAL_FAILURE, "GstMemory is not a DMA buf as expected!");
 
-    result = gst_buffer_pool_set_active(pool, TRUE);
-    GST_CHECK(result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Could not set buffer pool active");
+            int fd = gst_fd_memory_get_fd(buffer_info.memory);
+            CHECK_AS_EXPECTED(fd != -1, HAILO_INTERNAL_FAILURE, "Failed to get FD from GstMemory!");
 
-    m_pool = pool;
+            hailo_dma_buffer_t dma_buffer = {fd, buffer_info.size};
+            auto status = self->infer_bindings.output(output.name())->set_dma_buffer(dma_buffer);
+            CHECK_SUCCESS_AS_EXPECTED(status);
+        } else {
+            auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size));
+            CHECK_SUCCESS_AS_EXPECTED(status);
+        }
 
-    return HAILO_SUCCESS;
+        tensors[output.name()] = {output_buffer, buffer_info};
+    }
+    return tensors;
 }
 
-hailo_status HailoNetImpl::configure_network_group()
+static hailo_status gst_hailonet_fill_single_input_binding(GstHailoNet *self, hailo_pix_buffer_t pix_buffer)
 {
-    std::unique_lock<std::mutex> lock(m_mutex);
-    g_object_set(m_queue, "max-size-buffers", MAX_BUFFER_COUNT(m_props.m_batch_size.get()), NULL);
+    auto status = self->infer_bindings.input()->set_pix_buffer(pix_buffer);
+    CHECK_SUCCESS(status);
 
-    auto network_group_name = get_network_group_name(m_props.m_network_name.get());
-    GST_CHECK_EXPECTED_AS_STATUS(network_group_name, m_element, RESOURCE, "Could not get network group name from name %s, status = %d",
-        m_props.m_network_name.get(), network_group_name.status());
+    return HAILO_SUCCESS;
+}
 
-    hailo_status status = m_net_group_handle->configure_network_group(network_group_name->c_str(), m_props.m_scheduling_algorithm.get(), m_props.m_batch_size.get());
-    if (HAILO_SUCCESS != status) {
-        return status;
-    }
-    m_was_configured = true;
+static hailo_status gst_hailonet_call_run_async(GstHailoNet *self, const std::unordered_map<std::string, TensorInfo> &tensors)
+{
+    auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT);
+    CHECK_SUCCESS(status);
 
-    if (m_props.m_scheduler_timeout_ms.was_changed()) {
-        status = m_net_group_handle->set_scheduler_timeout(m_props.m_network_name.get(), m_props.m_scheduler_timeout_ms.get());
-        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler timeout failed, status = %d", status);
-    }
-    if (m_props.m_scheduler_threshold.was_changed()) {
-        status = m_net_group_handle->set_scheduler_threshold(m_props.m_network_name.get(), m_props.m_scheduler_threshold.get());
-        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler threshold failed, status = %d", status);
-    }
-    if (m_props.m_scheduler_priority.was_changed()) {
-        status = m_net_group_handle->set_scheduler_priority(m_props.m_network_name.get(), m_props.m_scheduler_priority.get());
-        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler priority failed, status = %d", status);
+    {
+        std::unique_lock<std::mutex> lock(self->flush_mutex);
+        self->ongoing_frames++;
     }
 
-    auto vstreams = m_net_group_handle->create_vstreams(m_props.m_network_name.get(), m_props.m_scheduling_algorithm.get(), m_output_formats,
-        m_props.m_input_format_type.get(), m_props.m_output_format_type.get());
-    GST_CHECK_EXPECTED_AS_STATUS(vstreams, m_element, RESOURCE, "Creating vstreams failed, status = %d", status);
+    auto job = self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) {
+        GstBuffer *buffer = nullptr;
+        {
+            std::unique_lock<std::mutex> lock(self->input_queue_mutex);
+            buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->input_queue));
+        }
 
-    GST_HAILOSEND(m_hailosend)->impl->set_input_vstreams(std::move(vstreams->first));
+        for (auto &output : self->infer_model->outputs()) {
+            auto info = tensors.at(output.name());
+            gst_buffer_unmap(info.buffer, &info.buffer_info);
 
-    // Check that if one of the NMS params are changed, we have NMS outputs in the model
-    auto has_nms_output = std::any_of(vstreams->second.begin(), vstreams->second.end(), [](const auto &vs)
-    {
-        return HailoRTCommon::is_nms(vs.get_info());
-    });
+            GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer);
+            buffer_meta->info = self->output_vstream_infos[output.name()];
 
-    for (auto &out_vs : vstreams->second) {
-        if (m_props.m_nms_score_threshold.was_changed()) {
-            GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS score threshold is set, but there is no NMS output in this model.");
-            if (HailoRTCommon::is_nms(out_vs.get_info())) {
-                status = out_vs.set_nms_score_threshold(m_props.m_nms_score_threshold.get());
-                GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS score threshold failed, status = %d", status);
-            }
-        }
-        if (m_props.m_nms_iou_threshold.was_changed()) {
-            GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS IoU threshold is set, but there is no NMS output in this model.");
-            if (HailoRTCommon::is_nms(out_vs.get_info())) {
-                status = out_vs.set_nms_iou_threshold(m_props.m_nms_iou_threshold.get());
-                GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS IoU threshold failed, status = %d", status);
-            }
+            (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer);
+            gst_buffer_unref(info.buffer);
         }
-        if (m_props.m_nms_max_proposals_per_class.was_changed()) {
-            GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS max proposals per class is set, but there is no NMS output in this model.");
-            if (HailoRTCommon::is_nms(out_vs.get_info())) {
-                status = out_vs.set_nms_max_proposals_per_class(m_props.m_nms_max_proposals_per_class.get());
-                GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS max proposals per class failed, status = %d", status);
-            }
+
+        {
+            std::unique_lock<std::mutex> lock(self->flush_mutex);
+            self->ongoing_frames--;
         }
-    }
+        self->flush_cv.notify_all();
 
-    status = GST_HAILORECV(m_hailorecv)->impl->set_output_vstreams(std::move(vstreams->second), m_props.m_batch_size.get());
-    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting output vstreams failed, status = %d", status);
+        gst_hailonet_push_buffer_to_thread(self, buffer);
+    });
+    CHECK_EXPECTED_AS_STATUS(job);
+    job->detach();
 
     return HAILO_SUCCESS;
 }
 
-hailo_status HailoNetImpl::activate_hailonet()
+static hailo_status gst_hailonet_async_infer_multi_input(GstHailoNet *self, GstBuffer *buffer)
 {
-    if (HAILO_SCHEDULING_ALGORITHM_NONE != m_props.m_scheduling_algorithm.get()) {
-        m_props.m_is_active = true;
-        return HAILO_SUCCESS;
+    if (gst_hailo_should_use_dma_buffers()) {
+        auto status = gst_hailonet_fill_multiple_input_bindings_dma_buffers(self, buffer);
+        CHECK_SUCCESS(status);
+    } else {
+        auto status = gst_hailonet_fill_multiple_input_bindings(self, buffer);
+        CHECK_SUCCESS(status);
     }
 
-    if ((1 == m_hailonet_count) && (!m_props.m_is_active.was_changed())) {
-        m_props.m_is_active = true;
-    }
+    auto status = gst_hailonet_push_buffer_to_input_queue(self, buffer);
+    CHECK_SUCCESS(status);
 
-    if (m_props.m_is_active.get()) {
-        std::unique_lock<std::mutex> lock(m_mutex);
-        hailo_status status = m_net_group_handle->activate_network_group();
-        if (HAILO_SUCCESS != status) {
-            return status;
-        }
+    auto tensors = gst_hailonet_fill_output_bindings(self);
+    if (HAILO_STREAM_ABORT == tensors.status()) {
+        return HAILO_SUCCESS;
     }
+    CHECK_EXPECTED_AS_STATUS(tensors);
 
-    m_has_called_activate = true;
-
+    status = gst_hailonet_call_run_async(self, tensors.value());
+    CHECK_SUCCESS(status);
     return HAILO_SUCCESS;
 }
 
-Expected<std::string> HailoNetImpl::get_network_group_name(const std::string &network_name)
+static hailo_status gst_hailonet_async_infer_single_input(GstHailoNet *self, GstBuffer * buffer, hailo_pix_buffer_t pix_buffer)
 {
-    for (const auto &network_group_name : m_net_group_handle->hef()->get_network_groups_names()) {
-        // Look for network_group with the given name
-        if ((network_name == network_group_name) || (network_name == HailoRTDefaults::get_network_name(network_group_name))) {
-            return std::string(network_group_name);
-        }
+    auto status = gst_hailonet_fill_single_input_binding(self, pix_buffer);
+    CHECK_SUCCESS(status);
 
-        auto network_infos = m_net_group_handle->hef()->get_network_infos(network_group_name);
-        GST_CHECK_EXPECTED(network_infos, m_element, RESOURCE, "Could not get network infos of group %s, status = %d", network_group_name.c_str(),
-            network_infos.status());
+    status = gst_hailonet_push_buffer_to_input_queue(self, buffer);
+    CHECK_SUCCESS(status);
 
-        // Look for network with the given name
-        for (const auto &network_info : network_infos.value()) {
-            if (network_name == network_info.name) {
-                return std::string(network_group_name);
-            }
-        }
+    auto tensors = gst_hailonet_fill_output_bindings(self);
+    if (HAILO_STREAM_ABORT == tensors.status()) {
+        return HAILO_SUCCESS;
     }
+    CHECK_EXPECTED_AS_STATUS(tensors);
 
-    GST_ELEMENT_ERROR(m_element, RESOURCE, FAILED, ("Failed to get network group name from the name %s!", network_name.c_str()), (NULL));
-    return make_unexpected(HAILO_NOT_FOUND);
-}
-
-hailo_status HailoNetImpl::link_elements()
-{
-    /* Link elements here because only here we have the HEF and the Caps format */
-    if (!gst_element_link_many(m_hailosend, m_queue, m_hailorecv, NULL)) {
-        GST_ELEMENT_ERROR(m_element, RESOURCE, FAILED, ("Could not add link elements in bin!"), (NULL));
-        return HAILO_INTERNAL_FAILURE;
-    }
+    status = gst_hailonet_call_run_async(self, tensors.value());
+    CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status HailoNetImpl::abort_streams()
+static Expected<hailo_pix_buffer_t> gst_hailonet_construct_pix_buffer(GstHailoNet *self, GstBuffer *buffer)
 {
-    if (!m_props.m_is_active.get()) {
-        return HAILO_SUCCESS;
+    GstVideoFrame frame;
+    auto result = gst_video_frame_map(&frame, &self->input_frame_info, buffer,
+        static_cast<GstMapFlags>(GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF));
+    CHECK_AS_EXPECTED(result,HAILO_INTERNAL_FAILURE, "gst_video_frame_map failed!");
+
+    hailo_pix_buffer_t pix_buffer = {};
+    pix_buffer.index = 0;
+    pix_buffer.number_of_planes = GST_VIDEO_INFO_N_PLANES(&frame.info);
+    pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;
+
+    for (uint32_t plane_index = 0; plane_index < pix_buffer.number_of_planes; plane_index++) {
+        pix_buffer.planes[plane_index].bytes_used = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
+        pix_buffer.planes[plane_index].plane_size = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
+        pix_buffer.planes[plane_index].user_ptr = GST_VIDEO_FRAME_PLANE_DATA(&frame, plane_index);
     }
 
-    auto status = GST_HAILOSEND(m_hailosend)->impl->abort_vstreams();
-    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed aborting input VStreams of hailosend, status = %d", status);
-    status = GST_HAILORECV(m_hailorecv)->impl->abort_vstreams();
-    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed aborting output VStreams of hailorecv, status = %d", status);
-    return HAILO_SUCCESS;
+    gst_video_frame_unmap(&frame);
+    return pix_buffer;
 }
 
-hailo_status HailoNetImpl::deactivate_network_group()
+static GstFlowReturn gst_hailonet_chain(GstPad * /*pad*/, GstObject * parent, GstBuffer * buffer)
 {
-    auto was_deactivated = m_net_group_handle->remove_network_group();
-    GST_CHECK_EXPECTED_AS_STATUS(was_deactivated, m_element, RESOURCE, "Failed removing network, status = %d", was_deactivated.status());
+    GstHailoNet *self = GST_HAILONET(parent);
+    std::unique_lock<std::mutex> lock(self->infer_mutex);
 
-    if (was_deactivated.value()) {
-        return clear_vstreams();
+    if (self->props.m_pass_through.get() || !self->props.m_is_active.get() || !self->is_configured) {
+        gst_hailonet_push_buffer_to_thread(self, buffer);
+        return GST_FLOW_OK;
     }
-    return HAILO_SUCCESS;
-}
 
-hailo_status HailoNetImpl::clear_vstreams()
-{
-    if (nullptr != GST_HAILOSEND(m_hailosend)->impl) {
-        hailo_status status = GST_HAILOSEND(m_hailosend)->impl->clear_vstreams();
-        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed clearing input VStreams of hailosend, status = %d", status);
+    if (!gst_buffer_is_writable(buffer)) {
+        if (self->props.m_should_force_writable.get()) {
+            buffer = gst_buffer_make_writable(buffer);
+            if (nullptr == buffer) {
+                ERROR("Failed to make buffer writable!");
+                return GST_FLOW_ERROR;
+            }
+        } else {
+            ERROR("Input buffer is not writable! Use force-writable property to force the buffer to be writable");
+            return GST_FLOW_ERROR;
+        }
     }
 
-    if (nullptr != GST_HAILORECV(m_hailorecv)->impl) {
-        hailo_status status = GST_HAILORECV(m_hailorecv)->impl->clear_vstreams();
-        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed clearing output VStreams of hailorecv, status = %d", status);
+    if (self->props.m_input_from_meta.get()) {
+        auto status = gst_hailonet_async_infer_multi_input(self, buffer);
+        if (HAILO_SUCCESS != status) {
+            return GST_FLOW_ERROR;
+        }
+    } else {
+        auto pix_buffer = gst_hailonet_construct_pix_buffer(self, buffer);
+        if (!pix_buffer) {
+            return GST_FLOW_ERROR;
+        }
+        auto status = gst_hailonet_async_infer_single_input(self, buffer, pix_buffer.value());
+        if (HAILO_SUCCESS != status) {
+            return GST_FLOW_ERROR;
+        }
     }
 
-    return HAILO_SUCCESS;
+    return GST_FLOW_OK;
 }
 
-gboolean HailoNetImpl::src_pad_event(GstEvent *event)
+static hailo_status gst_hailonet_init_infer_model(GstHailoNet * self)
 {
-    assert(nullptr != event);
+    auto vdevice_params = HailoRTDefaults::get_vdevice_params();
 
-    auto parsed_event = HailoSetOutputFormatEvent::parse(event);
-    if (HAILO_SUCCESS != parsed_event.status()) {
-        return FALSE;
+    hailo_device_id_t device_id = {0};
+    if (self->props.m_device_id.was_changed()) {
+        auto expected_device_id = HailoRTCommon::to_device_id(self->props.m_device_id.get());
+        CHECK_EXPECTED_AS_STATUS(expected_device_id);
+        device_id = std::move(expected_device_id.release());
+
+        vdevice_params.device_ids = &device_id;
+    }
+    if (self->props.m_device_count.was_changed()) {
+        vdevice_params.device_count = self->props.m_device_count.get();
+    }
+    if (self->props.m_vdevice_group_id.was_changed()) {
+        vdevice_params.group_id = self->props.m_vdevice_group_id.get();
+    } else if (self->props.m_vdevice_key.was_changed()) {
+        auto key_str = std::to_string(self->props.m_vdevice_key.get());
+        vdevice_params.group_id = key_str.c_str();
+    }
+    if (self->props.m_scheduling_algorithm.was_changed()) {
+        vdevice_params.scheduling_algorithm = self->props.m_scheduling_algorithm.get();
+    }
+    if (self->props.m_multi_process_service.was_changed()) {
+        vdevice_params.multi_process_service = self->props.m_multi_process_service.get();
+        CHECK(self->props.m_scheduling_algorithm.get() != HAILO_SCHEDULING_ALGORITHM_NONE, HAILO_INVALID_OPERATION,
+            "To use multi-process-service please set scheduling-algorithm to a value other than 'none'");
+    }
+
+    auto vdevice = VDevice::create(vdevice_params);
+    CHECK_EXPECTED_AS_STATUS(vdevice);
+    self->vdevice = std::move(vdevice.release());
+
+    auto infer_model = self->vdevice->create_infer_model(self->props.m_hef_path.get());
+    CHECK_EXPECTED_AS_STATUS(infer_model);
+    self->infer_model = infer_model.release();
+
+    if(!(self->props.m_input_from_meta.get())){
+        CHECK(self->infer_model->inputs().size() == 1, HAILO_INVALID_OPERATION,
+            "In case you want to run a multiple input model, please set the input-from-meta flag.");
     }
 
-    m_output_formats = std::move(parsed_event->formats);
-    return TRUE;
+    return HAILO_SUCCESS;
 }
 
-GstPadProbeReturn HailoNetImpl::sink_probe()
+static const gchar *gst_hailonet_get_format_string(const InferModel::InferStream &input)
 {
-    hailo_status status = activate_hailonet();
-    GST_CHECK(HAILO_SUCCESS == status, GST_PAD_PROBE_REMOVE, m_element, RESOURCE, "Failed activating network, status = %d", status);
-    return GST_PAD_PROBE_REMOVE;
+    switch (input.format().order) {
+    case HAILO_FORMAT_ORDER_RGB4:
+    case HAILO_FORMAT_ORDER_NHWC:
+        if (input.shape().features == RGBA_FEATURES_SIZE) {
+            return "RGBA";
+        }
+        if (input.shape().features == GRAY8_FEATURES_SIZE) {
+            return "GRAY8";
+        }
+        /* Fallthrough */
+    case HAILO_FORMAT_ORDER_NHCW:
+    case HAILO_FORMAT_ORDER_FCR:
+    case HAILO_FORMAT_ORDER_F8CR:
+        if (input.shape().features == GRAY8_FEATURES_SIZE) {
+            return "GRAY8";
+        }
+        CHECK(RGB_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for RGB format! (features=%d)", input.name().c_str(), RGB_FEATURES_SIZE,
+            input.shape().features);
+        return "RGB";
+    case HAILO_FORMAT_ORDER_YUY2:
+        CHECK(YUY2_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for YUY2 format! (features=%d)", input.name().c_str(), YUY2_FEATURES_SIZE,
+            input.shape().features);
+        return "YUY2";
+    case HAILO_FORMAT_ORDER_NV12:
+        CHECK(NV12_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for NV12 format! (features=%d)", input.name().c_str(), NV12_FEATURES_SIZE,
+            input.shape().features);
+        return "NV12";
+    case HAILO_FORMAT_ORDER_NV21:
+        CHECK(NV21_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for NV21 format! (features=%d)", input.name().c_str(), NV21_FEATURES_SIZE,
+            input.shape().features);
+        return "NV21";
+    case HAILO_FORMAT_ORDER_I420:
+        CHECK(I420_FEATURES_SIZE == input.shape().features, nullptr,
+            "Features of input %s is not %d for I420 format! (features=%d)", input.name().c_str(), I420_FEATURES_SIZE,
+            input.shape().features);
+        return "I420";
+    default:
+        ERROR("Input %s has an unsupported format order! order = %d\n", input.name().c_str(), input.format().order);
+        return nullptr;
+    }
 }
 
-gboolean HailoNetImpl::is_active()
+static uint32_t get_height_by_order(uint32_t original_height, hailo_format_order_t order)
 {
-    return m_props.m_is_active.get();
+    switch (order) {
+    case HAILO_FORMAT_ORDER_NV12:
+    case HAILO_FORMAT_ORDER_NV21:
+        return original_height * 2;
+    default:
+        break;
+    }
+    return original_height;
 }
 
-hailo_status HailoNetImpl::flush()
+static GstCaps *gst_hailonet_get_caps(GstHailoNet *self)
 {
-    GstBuffer *buffer = nullptr;
-    GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(m_pool, &buffer, nullptr);
-    GST_CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Acquire buffer failed!");
+    if (self->did_critical_failure_happen) {
+        // Sometimes gst_hailonet_get_caps will get called again even after a critical failure happened and nullptr was returned
+        return nullptr;
+    }
 
-    GstHailoBufferFlagMeta *buffer_meta = GST_HAILO_BUFFER_FLAG_META_ADD(buffer);
-    buffer_meta->flag = BUFFER_FLAG_FLUSH;
-    GST_BUFFER_TIMESTAMP(buffer) = GST_HAILOSEND(m_hailosend)->impl->last_frame_pts();
+    if (nullptr == self->vdevice) {
+        auto status = gst_hailonet_init_infer_model(self);
+        if (HAILO_SUCCESS != status) {
+            self->did_critical_failure_happen = true;
+            return nullptr;
+        }
+    }
 
-    GstPad *pad = gst_element_get_static_pad(m_hailosend, "src");
-    flow_result = gst_pad_push(pad, buffer);
-    GST_CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Pushing buffer to queue has failed!");
+    // TODO (HRT-12491): check caps based on incoming metadata
+    if (self->props.m_input_from_meta.get()) {
+        GstCaps *new_caps = gst_caps_new_any();
+        self->input_caps = new_caps;
+        return gst_caps_copy(new_caps);
+    }
 
-    hailo_status status = m_was_flushed_event->wait(WAIT_FOR_FLUSH_TIMEOUT_MS);
-    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed waiting for flushed event, status = %d", status);
+    auto input = self->infer_model->input();
+    if (!input) {
+        ERROR("Getting input has failed with status = %d\n", input.status());
+        return nullptr;
+    }
 
-    status = m_was_flushed_event->reset();
-    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed resetting flushed event, status = %d", status);
+    const gchar *format = gst_hailonet_get_format_string(input.value());
+    if (nullptr == format) {
+        return nullptr;
+    }
 
-    return HAILO_SUCCESS;
-}
+    GstCaps *new_caps = gst_caps_new_simple("video/x-raw",
+        "format", G_TYPE_STRING, format,
+        "width", G_TYPE_INT, input->shape().width,
+        "height", G_TYPE_INT, get_height_by_order(input->shape().height, input->format().order),
+        nullptr);
 
-hailo_status HailoNetImpl::signal_was_flushed_event()
-{
-    return m_was_flushed_event->signal();
+    if (!gst_video_info_from_caps(&self->input_frame_info, new_caps)) {
+        ERROR("gst_video_info_from_caps failed\n");
+        return nullptr;
+    }
+
+    self->input_caps = new_caps;
+    return gst_caps_copy(new_caps);
 }
 
-static void gst_hailonet_init(GstHailoNet *self)
+static gboolean gst_hailonet_handle_sink_query(GstPad * pad, GstObject * parent, GstQuery * query)
 {
-    if (!do_versions_match(GST_ELEMENT(self))) {
-        return;
+    GstHailoNet *self = GST_HAILONET(parent);
+    switch (GST_QUERY_TYPE (query)) {
+    case GST_QUERY_CAPS:
+    {
+        GstCaps *caps = gst_hailonet_get_caps(self);
+        if (nullptr == caps) {
+            return FALSE;
+        }
+        gst_query_set_caps_result(query, caps);
+        gst_caps_unref(caps);
+        return TRUE;
     }
-
-    auto hailonet_impl = HailoNetImpl::create(self);
-    if (!hailonet_impl) {
-        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Creating hailonet implementation has failed! status = %d", hailonet_impl.status()), (NULL));
-        return;
+    case GST_QUERY_ALLOCATION:
+    {
+        // We implement this to make sure buffers are contiguous in memory
+        gst_query_add_allocation_meta(query, GST_VIDEO_META_API_TYPE, NULL);
+        return gst_pad_query_default(pad, parent, query);
+    }
+    default:
+        return gst_pad_query_default(pad, parent, query);
     }
-
-    self->impl = hailonet_impl.release();
 }
 
-static void gst_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec)
+static gboolean gst_hailonet_handle_caps_event(GstHailoNet *self, GstCaps */*caps*/)
 {
-    GST_HAILONET(object)->impl->set_property(object, property_id, value, pspec);
-}
+    if (nullptr == self->input_caps) {
+        return FALSE;
+    }
 
-static void gst_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec)
-{
-    GST_HAILONET(object)->impl->get_property(object, property_id, value, pspec);
-}
+    GstCaps *caps_result = gst_pad_peer_query_caps(self->srcpad, self->input_caps);
+    if (gst_caps_is_empty(caps_result)) {
+        return FALSE;
+    }
 
-static gboolean gst_hailorecv_src_pad_event(GstPad */*pad*/, GstObject *parent, GstEvent *event)
-{
-    gboolean result = GST_HAILONET(GST_ELEMENT_PARENT(parent))->impl->src_pad_event(event);
-    if (result) {
+    if (gst_caps_is_any(caps_result)) {
+        gst_caps_unref(caps_result);
         return TRUE;
     }
 
-    GstBaseTransform *trans = GST_BASE_TRANSFORM_CAST(parent);
-    return GST_BASE_TRANSFORM_GET_CLASS(trans)->src_event(trans, event);
+    GstCaps *outcaps = gst_caps_fixate(caps_result);
+    gboolean res = gst_pad_set_caps(self->srcpad, outcaps);
+    gst_caps_unref(outcaps);
+    return res;
 }
 
-static GstPadProbeReturn gst_hailonet_sink_probe(GstPad *pad, GstPadProbeInfo */*info*/, gpointer /*user_data*/)
+static gboolean gst_hailonet_sink_event(GstPad *pad, GstObject *parent, GstEvent *event)
 {
-    return GST_HAILONET(GST_ELEMENT_PARENT(gst_pad_get_parent(pad)))->impl->sink_probe();
+    GstHailoNet *self = GST_HAILONET(parent);
+    switch (GST_EVENT_TYPE(event)) {
+    case GST_EVENT_CAPS:
+    {
+        GstCaps *caps;
+        gst_event_parse_caps(event, &caps);
+        auto result = gst_hailonet_handle_caps_event(self, caps);
+        gst_event_unref(event);
+        return result;
+    }
+    case GST_EVENT_EOS:
+        self->has_got_eos = true;
+        return gst_pad_push_event(self->srcpad, event);
+    default:
+        return gst_pad_event_default(pad, parent, event);
+    }
 }
 
-static GstStateChangeReturn gst_hailonet_change_state(GstElement *element, GstStateChange transition)
+static GstPadProbeReturn gst_hailonet_sink_probe(GstPad */*pad*/, GstPadProbeInfo */*info*/, gpointer user_data)
 {
-    GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_hailonet_parent_class)->change_state(element, transition);
-    if (GST_STATE_CHANGE_FAILURE == ret) {
-        return ret;
-    }
+    GstHailoNet *self = static_cast<GstHailoNet*>(user_data);
+    std::unique_lock<std::mutex> lock(self->sink_probe_change_state_mutex);
 
-    auto &hailonet = GST_HAILONET(element)->impl;
-    switch (transition) {
-    case GST_STATE_CHANGE_NULL_TO_READY:
-    {
-        hailo_status status = hailonet->link_elements();
-        GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Linking elements has failed, status = %d\n", status);
-        break;
+    auto status = gst_hailonet_configure(self);
+    if (HAILO_SUCCESS != status) {
+        return GST_PAD_PROBE_DROP;
     }
-    case GST_STATE_CHANGE_READY_TO_PAUSED:
-    {
-        hailo_status status = hailonet->configure_network_group();
-        GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Configuring network group failed, status = %d\n", status);
-        break;
+
+    status = gst_hailonet_allocate_infer_resources(self);
+    if (HAILO_SUCCESS != status) {
+        return GST_PAD_PROBE_DROP;
     }
-    case GST_STATE_CHANGE_PLAYING_TO_PAUSED:
-    {
-        hailo_status status = hailonet->abort_streams();
-        GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Aborting streams has failed, status = %d\n", status);
-        break;
+
+    if (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get()) {
+        self->props.m_is_active = true;
+        return GST_PAD_PROBE_REMOVE;
     }
-    case GST_STATE_CHANGE_READY_TO_NULL:
-    {
-        if (HAILO_SCHEDULING_ALGORITHM_NONE == hailonet->get_props().m_scheduling_algorithm.get()) {
-            auto status = hailonet->deactivate_network_group();
-            GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Deactivating network group failed, status = %d\n", status);
-        }
 
-        // Cleanup all of hailonet memory
-        hailonet.reset();
-        break;
+    if ((1 == hailonet_count) && (!self->props.m_is_active.was_changed())) {
+        self->props.m_is_active = true;
     }
-    default:
-        break;
+
+    if (self->props.m_is_active.get()) {
+        status = self->configured_infer_model->activate();
+        if (HAILO_SUCCESS != status) {
+            return GST_PAD_PROBE_DROP;
+        }
     }
 
-    return ret;
+    self->has_called_activate = true;
+    return GST_PAD_PROBE_REMOVE;
 }
 
-static void gst_hailonet_flush_callback(GstHailoNet *hailonet, gpointer /*data*/)
+static void gst_hailonet_flush_callback(GstHailoNet *self, gpointer /*data*/)
 {
-    (void)hailonet->impl->flush();
+    std::unique_lock<std::mutex> lock(self->flush_mutex);
+    self->flush_cv.wait(lock, [self] () {
+        return 0 == self->ongoing_frames;
+    });
 }
 
-static void gst_hailonet_inner_queue_overrun_callback(GstElement *queue, gpointer /*udata*/)
+static void gst_hailonet_init(GstHailoNet *self)
 {
-    if (GST_HAILONET(GST_ELEMENT_PARENT(queue))->impl->is_active()) {
-        GST_INFO("Inner queue of %s is overrun!", GST_ELEMENT_NAME(GST_ELEMENT_PARENT(queue)));
+    if (!do_versions_match(GST_ELEMENT(self))) {
+        return;
     }
-}
 
-static void gst_hailonet_inner_queue_underrun_callback(GstElement *queue, gpointer /*udata*/)
-{
-    if (GST_HAILONET(GST_ELEMENT_PARENT(queue))->impl->is_active()) {
-        GST_INFO("Inner queue of %s is underrun!", GST_ELEMENT_NAME(GST_ELEMENT_PARENT(queue)));
+    self->sinkpad = gst_pad_new_from_static_template(&sink_template, "sink");
+    gst_pad_set_chain_function(self->sinkpad, gst_hailonet_chain);
+    gst_pad_set_query_function(self->sinkpad, gst_hailonet_handle_sink_query);
+    gst_pad_set_event_function(self->sinkpad, GST_DEBUG_FUNCPTR(gst_hailonet_sink_event));
+    gst_element_add_pad(GST_ELEMENT (self), self->sinkpad);
+    gst_pad_add_probe(self->sinkpad, GST_PAD_PROBE_TYPE_BUFFER, static_cast<GstPadProbeCallback>(gst_hailonet_sink_probe), self, nullptr);
+
+    self->srcpad = gst_pad_new_from_static_template(&src_template, "src");
+    gst_element_add_pad(GST_ELEMENT (self), self->srcpad);
+
+    self->input_caps = nullptr;
+    self->input_queue = nullptr;
+    self->thread_queue = nullptr;
+    self->is_thread_running = false;
+    self->has_got_eos = false;
+    self->buffers_in_thread_queue = 0;
+    self->props = HailoNetProperties();
+    self->vdevice = nullptr;
+    self->is_configured = false;
+    self->has_called_activate = false;
+    self->ongoing_frames = 0;
+    self->did_critical_failure_happen = false;
+
+    gchar *parent_name = gst_object_get_name(GST_OBJECT(self));
+    gchar *name = g_strconcat(parent_name, ":hailo_allocator", NULL);
+    g_free(parent_name);
+
+    if (gst_hailo_should_use_dma_buffers()) {
+        self->dma_allocator = gst_dmabuf_allocator_new();
+    } else {
+        self->allocator = GST_HAILO_ALLOCATOR(g_object_new(GST_TYPE_HAILO_ALLOCATOR, "name", name, NULL));
+        gst_object_ref_sink(self->allocator);
+        g_free(name);
     }
-}
\ No newline at end of file
+
+    g_signal_connect(self, "flush", G_CALLBACK(gst_hailonet_flush_callback), nullptr);
+
+    hailonet_count++;
+}
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
index 8f151958..26244ebb 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
@@ -20,114 +20,164 @@
 #ifndef _GST_HAILONET_HPP_
 #define _GST_HAILONET_HPP_
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wconversion"
+#include <gst/gst.h>
+#pragma GCC diagnostic pop
+
+#include <gst/base/gstqueuearray.h>
+#include <gst/video/gstvideofilter.h>
+
+#include "hailo/infer_model.hpp"
 #include "common.hpp"
-#include "network_group_handle.hpp"
-#include "hailo/expected.hpp"
-#include "hailo/event.hpp"
 
-#include <atomic>
+#include <queue>
 #include <condition_variable>
+#include <mutex>
+#include <thread>
+
+using namespace hailort;
 
 G_BEGIN_DECLS
 
-#define GST_TYPE_HAILONET (gst_hailonet_get_type())
-#define GST_HAILONET(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_HAILONET,GstHailoNet))
-#define GST_HAILONET_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_HAILONET,GstHailoNetClass))
-#define GST_IS_HAILONET(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_HAILONET))
-#define GST_IS_HAILONET_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_HAILONET))
+#define GST_TYPE_HAILO_ALLOCATOR (gst_hailo_allocator_get_type())
+#define GST_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocator))
+#define GST_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocatorClass))
+#define GST_IS_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_HAILO_ALLOCATOR))
+#define GST_IS_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_HAILO_ALLOCATOR))
 
-class HailoNetImpl;
-struct GstHailoNet
+#define MIN_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE)
+#define MAX_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE * 4)
+
+#define GST_HAILO_USE_DMA_BUFFER_ENV_VAR "GST_HAILO_USE_DMA_BUFFER"
+
+struct GstHailoAllocator
 {
-    GstBin parent;
-    std::unique_ptr<HailoNetImpl> impl;
+    GstAllocator parent;
+    std::unordered_map<GstMemory*, Buffer> buffers;
 };
 
-struct GstHailoNetClass
+struct GstHailoAllocatorClass
 {
-    GstBinClass parent;
+    GstAllocatorClass parent;
 };
 
+GType gst_hailo_allocator_get_type(void);
+
 struct HailoNetProperties final
 {
 public:
-    HailoNetProperties() : m_device_id(nullptr), m_hef_path(nullptr), m_network_name(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
-        m_is_active(false), m_device_count(0), m_vdevice_key(DEFAULT_VDEVICE_KEY), m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN),
-        m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
-        m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_format_type(HAILO_FORMAT_TYPE_AUTO),
-        m_output_format_type(HAILO_FORMAT_TYPE_AUTO), m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0)
-
+    HailoNetProperties() : m_hef_path(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
+        m_device_id(nullptr), m_device_count(0), m_vdevice_group_id(nullptr), m_is_active(false), m_pass_through(false),
+        m_outputs_min_pool_size(MIN_OUTPUTS_POOL_SIZE), m_outputs_max_pool_size(MAX_OUTPUTS_POOL_SIZE),
+        m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS),
+        m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
+        m_input_format_type(HAILO_FORMAT_TYPE_AUTO), m_output_format_type(HAILO_FORMAT_TYPE_AUTO),
+        m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0), m_input_from_meta(false),
+        m_no_transform(false), m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_should_force_writable(false),
+        m_vdevice_key(DEFAULT_VDEVICE_KEY)
     {}
 
-    HailoElemProperty<gchar*> m_device_id;
+    void free_strings()
+    {
+      if (m_hef_path.was_changed()) {
+        g_free(m_hef_path.get());
+      }
+      if (m_device_id.was_changed()) {
+        g_free(m_device_id.get());
+      }
+      if (m_vdevice_group_id.was_changed()) {
+        g_free(m_vdevice_group_id.get());
+      }
+    }
+
     HailoElemProperty<gchar*> m_hef_path;
-    HailoElemProperty<gchar*> m_network_name; // This property can be network group name or a network name
     HailoElemProperty<guint16> m_batch_size;
-    HailoElemProperty<gboolean> m_is_active;
+    HailoElemProperty<gchar*> m_device_id;
     HailoElemProperty<guint16> m_device_count;
-    HailoElemProperty<guint32> m_vdevice_key;
+    HailoElemProperty<gchar*> m_vdevice_group_id;
+    HailoElemProperty<gboolean> m_is_active;
+    HailoElemProperty<gboolean> m_pass_through;
+    HailoElemProperty<guint> m_outputs_min_pool_size;
+    HailoElemProperty<guint> m_outputs_max_pool_size;
     HailoElemProperty<hailo_scheduling_algorithm_t> m_scheduling_algorithm;
     HailoElemProperty<guint32> m_scheduler_timeout_ms;
     HailoElemProperty<guint32> m_scheduler_threshold;
     HailoElemProperty<guint8> m_scheduler_priority;
-    HailoElemProperty<gboolean> m_multi_process_service;
     HailoElemProperty<hailo_format_type_t> m_input_format_type;
     HailoElemProperty<hailo_format_type_t> m_output_format_type;
     HailoElemProperty<gfloat> m_nms_score_threshold;
     HailoElemProperty<gfloat> m_nms_iou_threshold;
     HailoElemProperty<guint32> m_nms_max_proposals_per_class;
-};
+    HailoElemProperty<gboolean> m_input_from_meta;
+    HailoElemProperty<gboolean> m_no_transform;
+    HailoElemProperty<gboolean> m_multi_process_service;
+    HailoElemProperty<gboolean> m_should_force_writable;
 
-class HailoNetImpl final
-{
-public:
-    static Expected<std::unique_ptr<HailoNetImpl>> create(GstHailoNet *element);
-    HailoNetImpl(GstHailoNet *element, GstElement *hailosend, GstElement *queue, GstElement *hailorecv, EventPtr was_flushed_event);
-    ~HailoNetImpl();
-
-    void set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
-    void get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec);
-    hailo_status set_hef();
-    hailo_status link_elements();
-    hailo_status configure_network_group();
-    hailo_status activate_hailonet();
-    hailo_status abort_streams();
-
-    gboolean src_pad_event(GstEvent *event);
-    GstPadProbeReturn sink_probe();
-    gboolean is_active();
-    hailo_status flush();
-    hailo_status signal_was_flushed_event();
-
-    hailo_status deactivate_network_group();
-    HailoNetProperties &get_props() {
-        return m_props;
-    }
+    // Deprecated
+    HailoElemProperty<guint32> m_vdevice_key;
+};
 
-private:
-    void init_ghost_sink();
-    void init_ghost_src();
-    Expected<std::string> get_network_group_name(const std::string &network_name);
-
-    hailo_status clear_vstreams();
-
-    static std::atomic_uint32_t m_hailonet_count;
-    static std::mutex m_mutex;
-    GstHailoNet *m_element;
-    HailoNetProperties m_props;
-    std::vector<hailo_format_with_name_t> m_output_formats;
-    GstElement *m_hailosend;
-    GstElement *m_queue;
-    GstElement *m_hailorecv;
-    std::unique_ptr<NetworkGroupHandle> m_net_group_handle;
-    bool m_was_configured;
-    bool m_has_called_activate;
-    EventPtr m_was_flushed_event;
-    GstBufferPool *m_pool;
+typedef struct _GstHailoNet {
+  GstElement element;
+  GstPad *sinkpad;
+  GstPad *srcpad;
+  GstQueueArray *input_queue;
+  GstQueueArray *thread_queue;
+  std::atomic_uint32_t buffers_in_thread_queue;
+  std::thread thread;
+  HailoNetProperties props;
+  GstCaps *input_caps;
+  std::atomic_bool is_thread_running;
+  std::atomic_bool has_got_eos;
+  std::mutex sink_probe_change_state_mutex;
+  bool did_critical_failure_happen;
+
+  std::unique_ptr<VDevice> vdevice;
+  std::shared_ptr<InferModel> infer_model;
+  std::shared_ptr<ConfiguredInferModel> configured_infer_model;
+  ConfiguredInferModel::Bindings infer_bindings;
+  bool is_configured;
+  std::mutex infer_mutex;
+
+  bool has_called_activate;
+  std::atomic_uint32_t ongoing_frames;
+  std::condition_variable flush_cv;
+  std::mutex flush_mutex;
+
+  GstVideoInfo input_frame_info;
+
+  GstHailoAllocator *allocator;
+  GstAllocator *dma_allocator;
+  std::unordered_map<std::string, GstBufferPool*> output_buffer_pools;
+  std::unordered_map<std::string, hailo_vstream_info_t> output_vstream_infos;
+
+  std::mutex input_queue_mutex;
+  std::mutex thread_queue_mutex;
+  std::condition_variable thread_cv;
+} GstHailoNet;
+
+typedef struct _GstHailoNetClass {
+  GstElementClass parent_class;
+} GstHailoNetClass;
+
+struct TensorInfo {
+    GstBuffer *buffer;
+    GstMapInfo buffer_info;
 };
 
-GType gst_hailonet_get_type(void);
+#define GST_TYPE_HAILONET (gst_hailonet_get_type())
+#define GST_HAILONET(obj) \
+  (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_HAILONET,GstHailoNet))
+#define GST_HAILONET_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_HAILONET,GstHailoNetClass))
+#define GST_IS_HAILONET(obj) \
+  (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_HAILONET))
+#define GST_IS_HAILONET_CLASS(klass) \
+  (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_HAILONET))
+
+GType gst_hailonet_get_type (void);
 
 G_END_DECLS
 
-#endif /* _GST_HAILONET_HPP_ */
+#endif /* _GST_HAILONET_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp
deleted file mode 100644
index 69f87635..00000000
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.cpp
+++ /dev/null
@@ -1,1344 +0,0 @@
-/*
- * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-#include "gsthailonet2.hpp"
-#include "metadata/tensor_meta.hpp"
-#include "hailo/buffer.hpp"
-#include "hailo/hailort_common.hpp"
-#include "hailo/hailort_defaults.hpp"
-
-#include <algorithm>
-#include <unordered_map>
-
-#define WAIT_FOR_ASYNC_READY_TIMEOUT (std::chrono::milliseconds(10000))
-#define ERROR(msg, ...) g_print(msg, ##__VA_ARGS__)
-
-enum
-{
-    PROP_0,
-    PROP_HEF_PATH,
-    PROP_BATCH_SIZE,
-    PROP_DEVICE_ID,
-    PROP_DEVICE_COUNT,
-    PROP_VDEVICE_GROUP_ID,
-    PROP_IS_ACTIVE,
-    PROP_OUTPUTS_MIN_POOL_SIZE,
-    PROP_OUTPUTS_MAX_POOL_SIZE,
-    PROP_SCHEDULING_ALGORITHM,
-    PROP_SCHEDULER_TIMEOUT_MS,
-    PROP_SCHEDULER_THRESHOLD,
-    PROP_SCHEDULER_PRIORITY,
-    PROP_INPUT_FORMAT_TYPE,
-    PROP_OUTPUT_FORMAT_TYPE,
-    PROP_NMS_SCORE_THRESHOLD,
-    PROP_NMS_IOU_THRESHOLD,
-    PROP_NMS_MAX_PROPOSALS_PER_CLASS,
-    PROP_INPUT_FROM_META,
-    PROP_NO_TRANSFORM,
-    PROP_MULTI_PROCESS_SERVICE,
-    PROP_PASS_THROUGH,
-
-    // Deprecated
-    PROP_VDEVICE_KEY,
-};
-
-static GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
-static GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
-
-G_DEFINE_TYPE (GstHailoAllocator, gst_hailo_allocator, GST_TYPE_ALLOCATOR);
-G_DEFINE_TYPE (GstHailoNet2, gst_hailonet2, GST_TYPE_ELEMENT);
-
-static std::atomic_uint32_t hailonet_count(0);
-
-static GstMemory *gst_hailo_allocator_alloc(GstAllocator* allocator, gsize size, GstAllocationParams* /*params*/) {
-    GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator);
-    auto buffer = Buffer::create(size, BufferStorageParams::create_dma());
-    if (!buffer) {
-        ERROR("Creating buffer for allocator has failed, status = %d\n", buffer.status());
-        return nullptr;
-    }
-
-    GstMemory *memory = gst_memory_new_wrapped(static_cast<GstMemoryFlags>(0), buffer->data(),
-        buffer->size(), 0, buffer->size(), nullptr, nullptr);
-    if (nullptr == memory) {
-        ERROR("Creating new GstMemory for allocator has failed!\n");
-        return nullptr;
-    }
-
-    hailo_allocator->buffers[memory] = std::move(buffer.release());
-    return memory;
-}
-
-static void gst_hailo_allocator_free(GstAllocator* allocator, GstMemory *mem) {
-    GstHailoAllocator *hailo_allocator = GST_HAILO_ALLOCATOR(allocator);
-    hailo_allocator->buffers.erase(mem);
-}
-
-static void gst_hailo_allocator_class_init(GstHailoAllocatorClass* klass) {
-    GstAllocatorClass* allocator_class = GST_ALLOCATOR_CLASS(klass);
-
-    allocator_class->alloc = gst_hailo_allocator_alloc;
-    allocator_class->free = gst_hailo_allocator_free;
-}
-
-static void gst_hailo_allocator_init(GstHailoAllocator* allocator) {
-    allocator->buffers = std::unordered_map<GstMemory*, Buffer>();
-}
-
-static hailo_status gst_hailonet2_deconfigure(GstHailoNet2 *self)
-{
-    // This will wakeup any blocking calls to deuque
-    for (auto &name_pool_pair : self->output_buffer_pools) {
-        gst_buffer_pool_set_flushing(name_pool_pair.second, TRUE);
-    }
-
-    std::unique_lock<std::mutex> lock(self->infer_mutex);
-    self->configured_infer_model.reset();
-    self->is_configured = false;
-    return HAILO_SUCCESS;
-}
-
-static hailo_status gst_hailonet2_free(GstHailoNet2 *self)
-{
-    std::unique_lock<std::mutex> lock(self->infer_mutex);
-    self->configured_infer_model.reset();
-    self->infer_model.reset();
-    self->vdevice.reset();
-
-    {
-        std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
-        self->is_thread_running = false;
-    }
-    self->thread_cv.notify_all();
-
-    if (self->thread.joinable()) {
-        self->thread.join();
-    }
-
-    if (nullptr != self->input_queue) {
-        gst_queue_array_free(self->input_queue);
-    }
-
-    if (nullptr != self->thread_queue) {
-        gst_queue_array_free(self->thread_queue);
-    }
-
-    if (nullptr != self->input_caps) {
-        gst_caps_unref(self->input_caps);
-    }
-
-    for (auto &name_pool_pair : self->output_buffer_pools) {
-        gboolean result = gst_buffer_pool_set_active(name_pool_pair.second, FALSE);
-        CHECK(result, HAILO_INTERNAL_FAILURE, "Could not release buffer pool");
-        gst_object_unref(name_pool_pair.second);
-    }
-
-    gst_object_unref(self->allocator);
-
-    self->props.free_strings();
-
-    return HAILO_SUCCESS;
-}
-
-static hailo_status gst_hailonet2_set_format_types(GstHailoNet2 *self, std::shared_ptr<InferModel> infer_model)
-{
-    if (self->props.m_input_format_type.was_changed()) {
-        for (const auto &input_name : infer_model->get_input_names()) {
-            auto input = infer_model->input(input_name);
-            CHECK_EXPECTED_AS_STATUS(input);
-
-            input->set_format_type(self->props.m_input_format_type.get());
-        }
-    }
-    if (self->props.m_output_format_type.was_changed()) {
-        for (const auto &output_name : infer_model->get_output_names()) {
-            auto output = infer_model->output(output_name);
-            CHECK_EXPECTED_AS_STATUS(output);
-
-            output->set_format_type(self->props.m_output_format_type.get());
-        }
-    }
-
-    return HAILO_SUCCESS;
-}
-
-static hailo_status gst_hailonet2_set_nms_params(GstHailoNet2 *self, std::shared_ptr<InferModel> infer_model)
-{
-     // Check that if one of the NMS params are changed, we have NMS outputs in the model
-    auto has_nms_output = std::any_of(infer_model->outputs().begin(), infer_model->outputs().end(), [](const auto &output)
-    {
-        return output.is_nms();
-    });
-
-    for (const auto &output_name : infer_model->get_output_names()) {
-        auto output = infer_model->output(output_name);
-        CHECK_EXPECTED_AS_STATUS(output);
-
-        if (self->props.m_nms_score_threshold.was_changed()) {
-            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS score threshold is set, but there is no NMS output in this model.");
-            if (output->is_nms()) {
-                output->set_nms_score_threshold(self->props.m_nms_score_threshold.get());
-            }
-        }
-        if (self->props.m_nms_iou_threshold.was_changed()) {
-            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS IoU threshold is set, but there is no NMS output in this model.");
-            if (output->is_nms()) {
-                output->set_nms_iou_threshold(self->props.m_nms_iou_threshold.get());
-            }
-        }
-        if (self->props.m_nms_max_proposals_per_class.was_changed()) {
-            CHECK(has_nms_output, HAILO_INVALID_OPERATION, "NMS max proposals per class is set, but there is no NMS output in this model.");
-            if (output->is_nms()) {
-                output->set_nms_max_proposals_per_class(self->props.m_nms_max_proposals_per_class.get());
-            }
-        }
-    }
-
-    return HAILO_SUCCESS;
-}
-
-static hailo_status gst_hailonet2_set_scheduler_params(GstHailoNet2 *self, std::shared_ptr<ConfiguredInferModel> configured_infer_model)
-{
-    if (self->props.m_scheduler_timeout_ms.was_changed()) {
-        auto millis = std::chrono::milliseconds(self->props.m_scheduler_timeout_ms.get());
-        auto status = configured_infer_model->set_scheduler_timeout(millis);
-        CHECK_SUCCESS(status, "Setting scheduler timeout failed, status = %d", status);
-    }
-    if (self->props.m_scheduler_threshold.was_changed()) {
-        auto status = configured_infer_model->set_scheduler_threshold(self->props.m_scheduler_threshold.get());
-        CHECK_SUCCESS(status, "Setting scheduler threshold failed, status = %d", status);
-    }
-    if (self->props.m_scheduler_priority.was_changed()) {
-        auto status = configured_infer_model->set_scheduler_priority(self->props.m_scheduler_priority.get());
-        CHECK_SUCCESS(status, "Setting scheduler priority failed, status = %d", status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-static Expected<GstBufferPool*> gst_hailonet2_create_buffer_pool(GstHailoNet2 *self, size_t frame_size)
-{
-    GstBufferPool *pool = gst_buffer_pool_new();
-
-    GstStructure *config = gst_buffer_pool_get_config(pool);
-    gst_buffer_pool_config_set_params(config, nullptr, static_cast<guint>(frame_size), self->props.m_outputs_min_pool_size.get(),
-        self->props.m_outputs_max_pool_size.get());
-
-    gst_buffer_pool_config_set_allocator(config, GST_ALLOCATOR(self->allocator), nullptr);
-
-    gboolean result = gst_buffer_pool_set_config(pool, config);
-    CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set config buffer pool");
-
-    result = gst_buffer_pool_set_active(pool, TRUE);
-    CHECK_AS_EXPECTED(result, HAILO_INTERNAL_FAILURE, "Could not set buffer pool as active");
-
-    return pool;
-}
-
-static hailo_status gst_hailonet2_configure(GstHailoNet2 *self)
-{
-    if (self->is_configured) {
-        return HAILO_SUCCESS;
-    }
-
-    for (auto &name_pool_pair : self->output_buffer_pools) {
-        gst_buffer_pool_set_flushing(name_pool_pair.second, FALSE);
-    }
-
-    self->infer_model->set_batch_size(self->props.m_batch_size.get());
-
-    auto status = gst_hailonet2_set_format_types(self, self->infer_model);
-    CHECK_SUCCESS(status);
-
-    status = gst_hailonet2_set_nms_params(self, self->infer_model);
-    CHECK_SUCCESS(status);
-
-    // In RGB formats, Gstreamer is padding each row to 4.
-    for (const auto &input_name : self->infer_model->get_input_names()) {
-        if(self->props.m_no_transform.get()) {
-            // In case transformation is disabled - format order will be the same as we get from the HW (stream info).
-            auto input_stream_infos = self->infer_model->hef().get_stream_info_by_name(input_name, HAILO_H2D_STREAM);
-            CHECK_EXPECTED_AS_STATUS(input_stream_infos);
-            self->infer_model->input(input_name)->set_format_order(input_stream_infos.value().format.order);
-        } else if (self->infer_model->input(input_name)->format().order == HAILO_FORMAT_ORDER_NHWC) {
-            self->infer_model->input(input_name)->set_format_order(HAILO_FORMAT_ORDER_RGB4);
-        }
-    }
-
-    if (self->props.m_no_transform.get()) {
-        for (const auto &output_name : self->infer_model->get_output_names()) {
-            // In case transformation is disabled - format order will be the same as we get from the HW (stream info).
-            auto output_stream_infos = self->infer_model->hef().get_stream_info_by_name(output_name, HAILO_D2H_STREAM);
-            CHECK_EXPECTED_AS_STATUS(output_stream_infos);
-            self->infer_model->output(output_name)->set_format_order(output_stream_infos.value().format.order);
-        }
-    }
-
-    auto configured_infer_model = self->infer_model->configure();
-    CHECK_EXPECTED_AS_STATUS(configured_infer_model);
-
-    auto ptr = make_shared_nothrow<ConfiguredInferModel>(configured_infer_model.release());
-    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
-    self->configured_infer_model = ptr;
-
-    status = gst_hailonet2_set_scheduler_params(self, self->configured_infer_model);
-    CHECK_SUCCESS(status);
-
-    self->is_configured = true;
-    return HAILO_SUCCESS;
-}
-
-static hailo_status gst_hailonet2_allocate_infer_resources(GstHailoNet2 *self)
-{
-    auto bindings = self->configured_infer_model->create_bindings();
-    CHECK_EXPECTED_AS_STATUS(bindings);
-    self->infer_bindings = std::move(bindings.release());
-
-    self->output_buffer_pools = std::unordered_map<std::string, GstBufferPool*>();
-    self->output_vstream_infos = std::unordered_map<std::string, hailo_vstream_info_t>();
-
-    auto async_queue_size = self->configured_infer_model->get_async_queue_size();
-    CHECK_EXPECTED_AS_STATUS(async_queue_size);
-    self->input_queue = gst_queue_array_new(static_cast<guint>(async_queue_size.value()));
-    self->thread_queue = gst_queue_array_new(static_cast<guint>(async_queue_size.value()));
-    self->is_thread_running = true;
-    self->thread = std::thread([self] () {
-        while (self->is_thread_running) {
-            GstBuffer *buffer = nullptr;
-            {
-                std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
-                self->thread_cv.wait(lock, [self] () {
-                    return (self->buffers_in_thread_queue > 0) || !self->is_thread_running;
-                });
-                if (!self->is_thread_running) {
-                    break;
-                }
-
-                buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->thread_queue));
-                self->buffers_in_thread_queue--;
-            }
-            self->thread_cv.notify_all();
-            if (GST_IS_PAD(self->srcpad)) { // Checking because we fail here when exiting the application
-                GstFlowReturn ret = gst_pad_push(self->srcpad, buffer);
-                if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && (!self->has_got_eos)) {
-                    ERROR("gst_pad_push failed with status = %d\n", ret);
-                    break;
-                }
-            }
-        }
-    });
-
-    for (auto &output : self->infer_model->outputs()) {
-        auto buffer_pool = gst_hailonet2_create_buffer_pool(self, output.get_frame_size());
-        CHECK_EXPECTED_AS_STATUS(buffer_pool);
-
-        self->output_buffer_pools[output.name()] = buffer_pool.release();
-    }
-
-    auto vstream_infos = self->infer_model->hef().get_output_vstream_infos();
-    CHECK_EXPECTED_AS_STATUS(vstream_infos);
-
-    for (const auto &vstream_info : vstream_infos.value()) {
-        self->output_vstream_infos[vstream_info.name] = vstream_info;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-static GstStateChangeReturn gst_hailonet2_change_state(GstElement *element, GstStateChange transition)
-{
-    GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_hailonet2_parent_class)->change_state(element, transition);
-    if (GST_STATE_CHANGE_FAILURE == ret) {
-        return ret;
-    }
-
-    GstHailoNet2 *self = GST_HAILONET2(element);
-    switch (transition) {
-    case GST_STATE_CHANGE_PAUSED_TO_PLAYING:
-    {
-        auto status = gst_hailonet2_configure(self);
-        if (HAILO_SUCCESS != status) {
-            return GST_STATE_CHANGE_FAILURE;
-        }
-        break;
-    }
-    case GST_STATE_CHANGE_PLAYING_TO_PAUSED:
-    {
-        auto status = gst_hailonet2_deconfigure(self);
-        if (HAILO_SUCCESS != status) {
-            return GST_STATE_CHANGE_FAILURE;
-        }
-        break;
-    }
-    case GST_STATE_CHANGE_READY_TO_NULL:
-    {
-        auto status = gst_hailonet2_free(self);
-        if (HAILO_SUCCESS != status) {
-            return GST_STATE_CHANGE_FAILURE;
-        }
-        break;
-    }
-    default:
-        break;
-    }
-
-    return ret;
-}
-
-static hailo_status gst_hailonet2_toggle_activation(GstHailoNet2 *self, gboolean old_is_active, gboolean new_is_active)
-{
-    if (self->props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get())) {
-        g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
-        return HAILO_INVALID_OPERATION;
-    }
-
-    if (self->has_called_activate) {
-        if (old_is_active && !new_is_active) {
-            self->configured_infer_model->deactivate();
-        } else if (!old_is_active && new_is_active) {
-            auto status = self->configured_infer_model->activate();
-            CHECK_SUCCESS(status);
-        } else {
-            g_warning("Trying to change is-active property from %d to %d", old_is_active, new_is_active);
-        }
-    }
-
-    self->props.m_is_active = new_is_active;
-    return HAILO_SUCCESS;
-}
-
-static void gst_hailonet2_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec)
-{
-    GstHailoNet2 *self = GST_HAILONET2(object);
-    switch (property_id) {
-    case PROP_HEF_PATH:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the HEF path will not take place!");
-            break;
-        }
-        if (nullptr != self->props.m_hef_path.get()) {
-            g_free(self->props.m_hef_path.get());
-        }
-        self->props.m_hef_path = g_strdup(g_value_get_string(value));
-        break;
-    case PROP_BATCH_SIZE:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the batch size will not take place!");
-            break;
-        }
-        self->props.m_batch_size = static_cast<guint16>(g_value_get_uint(value));
-        break;
-    case PROP_DEVICE_ID:
-        if (0 != self->props.m_device_count.get()) {
-            g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
-                g_value_get_string(value), self->props.m_device_count.get());
-            break;
-        }
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the device ID will not take place!");
-            break;
-        }
-        if (nullptr != self->props.m_device_id.get()) {
-            g_free(self->props.m_device_id.get());
-        }
-        self->props.m_device_id = g_strdup(g_value_get_string(value));
-        break;
-    case PROP_DEVICE_COUNT:
-        if (nullptr != self->props.m_device_id.get()) {
-            g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
-                self->props.m_device_id.get(), g_value_get_uint(value));
-            break;
-        }
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the device count will not take place!");
-            break;
-        }
-        self->props.m_device_count = static_cast<guint16>(g_value_get_uint(value));
-        break;
-    case PROP_VDEVICE_GROUP_ID:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the vdevice group ID will not take place!");
-            break;
-        }
-        if (nullptr != self->props.m_vdevice_group_id.get()) {
-            g_free(self->props.m_vdevice_group_id.get());
-        }
-        self->props.m_vdevice_group_id = g_strdup(g_value_get_string(value));
-        break;
-    case PROP_IS_ACTIVE:
-        (void)gst_hailonet2_toggle_activation(self, self->props.m_is_active.get(), g_value_get_boolean(value));
-        break;
-    case PROP_PASS_THROUGH:
-        self->props.m_pass_through = g_value_get_boolean(value);
-        break;
-    case PROP_OUTPUTS_MIN_POOL_SIZE:
-        if (self->is_configured) {
-            g_warning("The network has already been configured, the output's minimum pool size cannot be changed!");
-            break;
-        }
-        self->props.m_outputs_min_pool_size = g_value_get_uint(value);
-        break;
-    case PROP_OUTPUTS_MAX_POOL_SIZE:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the outputs maximum pool size will not take place!");
-            break;
-        }
-        self->props.m_outputs_max_pool_size = g_value_get_uint(value);
-        break;
-    case PROP_SCHEDULING_ALGORITHM:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the scheduling algorithm will not take place!");
-            break;
-        }
-        if (self->props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) {
-            g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
-            break;
-        }
-        self->props.m_scheduling_algorithm = static_cast<hailo_scheduling_algorithm_t>(g_value_get_enum(value));
-        break;
-    case PROP_SCHEDULER_TIMEOUT_MS:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the scheduling timeout will not take place!");
-            break;
-        }
-        self->props.m_scheduler_timeout_ms = g_value_get_uint(value);
-        break;
-    case PROP_SCHEDULER_THRESHOLD:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the scheduling threshold will not take place!");
-            break;
-        }
-        self->props.m_scheduler_threshold = g_value_get_uint(value);
-        break;
-    case PROP_SCHEDULER_PRIORITY:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the scheduling priority will not take place!");
-            break;
-        }
-        self->props.m_scheduler_priority = static_cast<guint8>(g_value_get_uint(value));
-        break;
-    case PROP_INPUT_FORMAT_TYPE:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the format type will not take place!");
-            break;
-        }
-        self->props.m_input_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
-        break;
-    case PROP_OUTPUT_FORMAT_TYPE:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the format type will not take place!");
-            break;
-        }
-        self->props.m_output_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
-        break;
-    case PROP_NMS_SCORE_THRESHOLD:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the score threshold will not take place!");
-            break;
-        }
-        self->props.m_nms_score_threshold = static_cast<gfloat>(g_value_get_float(value));
-        break;
-    case PROP_NMS_IOU_THRESHOLD:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the IoU threshold will not take place!");
-            break;
-        }
-        self->props.m_nms_iou_threshold = static_cast<gfloat>(g_value_get_float(value));
-        break;
-    case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the max proposals per class will not take place!");
-            break;
-        }
-        self->props.m_nms_max_proposals_per_class = static_cast<guint32>(g_value_get_uint(value));
-        break;
-    case PROP_INPUT_FROM_META:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the input method will not take place!");
-            break;
-        }
-        self->props.m_input_from_meta = g_value_get_boolean(value);
-        break;
-    case PROP_NO_TRANSFORM:
-        if (self->is_configured) {
-            g_warning("The network was already configured so disabling the transformation will not take place!");
-        }
-        self->props.m_no_transform = g_value_get_boolean(value);
-        break;
-    case PROP_MULTI_PROCESS_SERVICE:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the multi-process-service property will not take place!");
-            break;
-        }
-        self->props.m_multi_process_service = g_value_get_boolean(value); // TODO: do something with this
-        break;
-    
-    // Deprecated
-    case PROP_VDEVICE_KEY:
-        if (self->is_configured) {
-            g_warning("The network was already configured so changing the vdevice key will not take place!");
-            break;
-        }
-        self->props.m_vdevice_key = static_cast<guint32>(g_value_get_uint(value));
-        break;
-    default:
-        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
-        break;
-    }
-}
-
-static void gst_hailonet2_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec)
-{
-    GstHailoNet2 *self = GST_HAILONET2(object);
-    switch (property_id) {
-    case PROP_HEF_PATH:
-        g_value_set_string(value, self->props.m_hef_path.get());
-        break;
-    case PROP_BATCH_SIZE:
-        g_value_set_uint(value, self->props.m_batch_size.get());
-        break;
-    case PROP_DEVICE_ID:
-        g_value_set_string(value, self->props.m_device_id.get());
-        break;
-    case PROP_DEVICE_COUNT:
-        g_value_set_uint(value, self->props.m_device_count.get());
-        break;
-    case PROP_VDEVICE_GROUP_ID:
-        g_value_set_string(value, self->props.m_vdevice_group_id.get());
-        break;
-    case PROP_IS_ACTIVE:
-        g_value_set_boolean(value, self->props.m_is_active.get());
-        break;
-    case PROP_PASS_THROUGH:
-        g_value_set_boolean(value, self->props.m_pass_through.get());
-        break;
-    case PROP_OUTPUTS_MIN_POOL_SIZE:
-        g_value_set_uint(value, self->props.m_outputs_min_pool_size.get());
-        break;
-    case PROP_OUTPUTS_MAX_POOL_SIZE:
-        g_value_set_uint(value, self->props.m_outputs_max_pool_size.get());
-        break;
-    case PROP_SCHEDULING_ALGORITHM:
-        g_value_set_enum(value, self->props.m_scheduling_algorithm.get());
-        break;
-    case PROP_SCHEDULER_TIMEOUT_MS:
-        g_value_set_uint(value, self->props.m_scheduler_timeout_ms.get());
-        break;
-    case PROP_SCHEDULER_THRESHOLD:
-        g_value_set_uint(value, self->props.m_scheduler_threshold.get());
-        break;
-    case PROP_SCHEDULER_PRIORITY:
-        g_value_set_uint(value, self->props.m_scheduler_priority.get());
-        break;
-    case PROP_INPUT_FORMAT_TYPE:
-        g_value_set_enum(value, self->props.m_input_format_type.get());
-        break;
-    case PROP_OUTPUT_FORMAT_TYPE:
-        g_value_set_enum(value, self->props.m_output_format_type.get());
-        break;
-    case PROP_NMS_SCORE_THRESHOLD:
-        g_value_set_float(value, self->props.m_nms_score_threshold.get());
-        break;
-    case PROP_NMS_IOU_THRESHOLD:
-        g_value_set_float(value, self->props.m_nms_iou_threshold.get());
-        break;
-    case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
-        g_value_set_uint(value, self->props.m_nms_max_proposals_per_class.get());
-        break; 
-    case PROP_INPUT_FROM_META:
-        g_value_set_boolean(value, self->props.m_input_from_meta.get());
-        break;
-    case PROP_NO_TRANSFORM:
-        g_value_set_boolean(value, self->props.m_no_transform.get());
-        break;
-    case PROP_MULTI_PROCESS_SERVICE:
-        g_value_set_boolean(value, self->props.m_multi_process_service.get());
-        break;
-    
-    // Deprecated
-    case PROP_VDEVICE_KEY:
-        g_value_set_uint(value, self->props.m_vdevice_key.get());
-        break;
-    default:
-        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
-        break;
-    }
-}
-
-static void gst_hailonet2_class_init(GstHailoNet2Class *klass)
-{
-    GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
-    GstElementClass *element_class = GST_ELEMENT_CLASS(klass);
-
-    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template));
-    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template));
-    element_class->change_state = gst_hailonet2_change_state;
-
-    gst_element_class_set_static_metadata(element_class,
-        "hailonet element", "Hailo/Network",
-        "Configure and Activate Hailo Network. "
-            "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. "
-            "When deactivating a hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the "
-            "hailonet, since this operation waits until there are no frames coming in.",
-        PLUGIN_AUTHOR);
-
-    gobject_class->set_property = gst_hailonet2_set_property;
-    gobject_class->get_property = gst_hailonet2_get_property;
-    g_object_class_install_property(gobject_class, PROP_HEF_PATH,
-        g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", nullptr,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_BATCH_SIZE,
-        g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch",
-            MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE,
-        g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer",
-            0, std::numeric_limits<uint32_t>::max(), MIN_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE,
-        g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size",
-            "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits<uint32_t>::max(),
-            MAX_OUTPUTS_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-
-    g_object_class_install_property(gobject_class, PROP_DEVICE_ID,
-        g_param_spec_string("device-id", "Device ID", "Device ID ([<domain>]:<bus>:<device>.<func>, same as in lspci command). Excludes device-count.", NULL,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT,
-        g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT,
-            std::numeric_limits<uint16_t>::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_VDEVICE_GROUP_ID,
-        g_param_spec_string("vdevice-group-id",
-            "VDevice Group ID to share vdevices across hailonets",
-            "Used to share VDevices across different hailonet instances", HAILO_DEFAULT_VDEVICE_GROUP_ID,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-
-    // TODO (HRT-12306): Change is-active behavior
-    g_object_class_install_property(gobject_class, PROP_IS_ACTIVE,
-        g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. "
-            "By default, the hailonet element will not be active unless it is the only one. "
-            "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_PASS_THROUGH,
-        g_param_spec_boolean("pass-through", "Is Element pass-through", "Controls whether the element will perform inference or simply pass buffers through. "
-            "By default, the hailonet element will not be pass-through. "
-            "Setting this property to true disables inference, regardless of the scheduler settings.", false,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-
-    g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM,
-        g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. "
-            "Gets values from the enum GstHailoSchedulingAlgorithms. "
-            "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. "
-            "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ",
-            GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS,
-        g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler,"
-            " as long as at least one send request has been sent.",
-            HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD,
-        g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.",
-            HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY,
-        g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. "
-            "Bigger number represent higher priority",
-            HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-
-    g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE,
-        g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto."
-            "Gets values from the enum GstHailoFormatType. ",
-            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE,
-        g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto."
-            "Gets values from the enum GstHailoFormatType. ",
-            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
-        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_INPUT_FROM_META,
-        g_param_spec_boolean("input-from-meta", "Enable input from meta", "Take network input from metadata instead of video frame.", false,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_NO_TRANSFORM,
-        g_param_spec_boolean("no-transform", "Disable transformations", "Format will remain the same as the HW format.", false,
-            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-
-    g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD,
-        g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score<TH is suppressed.",
-            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_NMS_IOU_THRESHOLD,
-        g_param_spec_float("nms-iou-threshold", "NMS IoU threshold", "Intersection over union overlap Threshold, used in the NMS iterative elimination process where potential duplicates of detected items are suppressed.",
-            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-    g_object_class_install_property(gobject_class, PROP_NMS_MAX_PROPOSALS_PER_CLASS,
-        g_param_spec_uint("nms-max-proposals-per-class", "NMS max proposals per class", "Set a limit for the maximum number of boxes per class.",
-            0, std::numeric_limits<uint32_t>::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-
-    g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE,
-        g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. "
-            "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.",
-            HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-
-    // Deprecated
-    g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY,
-        g_param_spec_uint("vdevice-key",
-            "Deprecated: Indicate whether to re-use or re-create vdevice",
-            "Deprecated: Use vdevice-group-id instead. Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \
-            "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets",
-            MIN_VALID_VDEVICE_KEY, std::numeric_limits<uint32_t>::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
-
-    // See information about the "flush" signal in the element description
-    g_signal_new(
-        "flush",
-        GST_TYPE_HAILONET2,
-        G_SIGNAL_ACTION,
-        0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0
-    );
-}
-
-static void gst_hailonet2_push_buffer_to_thread(GstHailoNet2 *self, GstBuffer *buffer)
-{
-    {
-        std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
-        self->thread_cv.wait(lock, [self] () {
-            return self->buffers_in_thread_queue < self->props.m_outputs_max_pool_size.get();
-        });
-        gst_queue_array_push_tail(self->thread_queue, buffer);
-        self->buffers_in_thread_queue++;
-    }
-    self->thread_cv.notify_all();
-}
-
-// TODO (HRT-12490): reduce code duplication with gst_hailonet2_async_infer
-static hailo_status gst_hailonet2_async_infer_multi_input(GstHailoNet2 *self, GstBuffer *buffer, const std::unordered_map<std::string, uint8_t*> &input_buffers)
-{
-    {
-        std::unique_lock<std::mutex> lock(self->input_queue_mutex);
-        for (auto name : self->infer_model->get_input_names())
-        {
-            auto status = self->infer_bindings.input(name)->set_buffer(MemoryView(input_buffers.at(name),
-                self->infer_model->input(name)->get_frame_size()));
-            CHECK_SUCCESS(status); 
-        }
-        
-        gst_queue_array_push_tail(self->input_queue, buffer);
-    }
-
-    struct TensorInfo {
-        GstBuffer *buffer;
-        GstMapInfo buffer_info;
-    };
-    std::unordered_map<std::string, TensorInfo> tensors;
-    for (auto &output : self->infer_model->outputs()) {
-        GstBuffer *output_buffer = nullptr;
-        GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr);
-        if (GST_FLOW_FLUSHING == flow_result) {
-            return HAILO_STREAM_ABORTED_BY_USER;
-        }
-        CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, "Acquire buffer failed!");
-
-        GstMapInfo buffer_info;
-        gboolean result = gst_buffer_map(output_buffer, &buffer_info, GST_MAP_WRITE);
-        CHECK(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!");
-
-        auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size));
-        CHECK_SUCCESS(status);
-
-        tensors[output.name()] = {output_buffer, buffer_info};
-    }
-
-    auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT);
-    CHECK_SUCCESS(status);
-
-    auto job = self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) {
-        GstBuffer *buffer = nullptr;
-        {
-            std::unique_lock<std::mutex> lock(self->input_queue_mutex);
-            buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->input_queue));
-        }
-
-        for (auto &output : self->infer_model->outputs()) {
-            auto info = tensors.at(output.name());
-            gst_buffer_unmap(info.buffer, &info.buffer_info);
-
-            GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer);
-            buffer_meta->info = self->output_vstream_infos[output.name()];
-
-            (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer);
-            gst_buffer_unref(info.buffer);
-        }
-
-        {
-            std::unique_lock<std::mutex> lock(self->flush_mutex);
-            self->ongoing_frames--;
-        }
-        self->flush_cv.notify_all();
-
-        {
-            std::unique_lock<std::mutex> lock(self->thread_queue_mutex);
-            gst_queue_array_push_tail(self->thread_queue, buffer);
-            self->buffers_in_thread_queue++;
-        }
-        self->thread_cv.notify_all();
-    });
-    CHECK_EXPECTED_AS_STATUS(job);
-    job->detach();
-
-    return HAILO_SUCCESS;
-}
-
-static hailo_status gst_hailonet2_async_infer(GstHailoNet2 *self, GstBuffer * buffer, hailo_pix_buffer_t pix_buffer)
-{
-    {
-        std::unique_lock<std::mutex> lock(self->input_queue_mutex);
-        auto status = self->infer_bindings.input()->set_pix_buffer(pix_buffer);
-        CHECK_SUCCESS(status);
-        
-        gst_queue_array_push_tail(self->input_queue, buffer);
-    }
-
-    struct TensorInfo {
-        GstBuffer *buffer;
-        GstMapInfo buffer_info;
-    };
-    std::unordered_map<std::string, TensorInfo> tensors;
-    for (auto &output : self->infer_model->outputs()) {
-        GstBuffer *output_buffer = nullptr;
-        GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(self->output_buffer_pools[output.name()], &output_buffer, nullptr);
-        if (GST_FLOW_FLUSHING == flow_result) {
-            return HAILO_STREAM_ABORTED_BY_USER;
-        }
-        CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, "Acquire buffer failed!");
-
-        GstMapInfo buffer_info;
-        gboolean result = gst_buffer_map(output_buffer, &buffer_info, GST_MAP_WRITE);
-        CHECK(result, HAILO_INTERNAL_FAILURE, "Failed mapping buffer!");
-
-        auto status = self->infer_bindings.output(output.name())->set_buffer(MemoryView(buffer_info.data, buffer_info.size));
-        CHECK_SUCCESS(status);
-
-        tensors[output.name()] = {output_buffer, buffer_info};
-    }
-
-    auto status = self->configured_infer_model->wait_for_async_ready(WAIT_FOR_ASYNC_READY_TIMEOUT);
-    CHECK_SUCCESS(status);
-
-    auto job = self->configured_infer_model->run_async(self->infer_bindings, [self, tensors] (const AsyncInferCompletionInfo &/*completion_info*/) {
-        GstBuffer *buffer = nullptr;
-        {
-            std::unique_lock<std::mutex> lock(self->input_queue_mutex);
-            buffer = static_cast<GstBuffer*>(gst_queue_array_pop_head(self->input_queue));
-        }
-
-        for (auto &output : self->infer_model->outputs()) {
-            auto info = tensors.at(output.name());
-            gst_buffer_unmap(info.buffer, &info.buffer_info);
-
-            GstHailoTensorMeta *buffer_meta = GST_TENSOR_META_ADD(info.buffer);
-            buffer_meta->info = self->output_vstream_infos[output.name()];
-
-            (void)gst_buffer_add_parent_buffer_meta(buffer, info.buffer);
-            gst_buffer_unref(info.buffer);
-        }
-
-        {
-            std::unique_lock<std::mutex> lock(self->flush_mutex);
-            self->ongoing_frames--;
-        }
-        self->flush_cv.notify_all();
-
-        gst_hailonet2_push_buffer_to_thread(self, buffer);
-    });
-    CHECK_EXPECTED_AS_STATUS(job);
-    job->detach();
-
-    return HAILO_SUCCESS;
-}
-
-static Expected<hailo_pix_buffer_t> gst_hailonet2_construct_pix_buffer(GstHailoNet2 *self, GstBuffer *buffer)
-{
-    GstVideoFrame frame;
-    auto result = gst_video_frame_map(&frame, &self->input_frame_info, buffer,
-        static_cast<GstMapFlags>(GST_MAP_READ | GST_VIDEO_FRAME_MAP_FLAG_NO_REF));
-    CHECK_AS_EXPECTED(result,HAILO_INTERNAL_FAILURE, "gst_video_frame_map failed!");
-
-    hailo_pix_buffer_t pix_buffer = {};
-    pix_buffer.index = 0;
-    pix_buffer.number_of_planes = GST_VIDEO_INFO_N_PLANES(&frame.info);
-
-    for (uint32_t plane_index = 0; plane_index < pix_buffer.number_of_planes; plane_index++) {
-        pix_buffer.planes[plane_index].bytes_used = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
-        pix_buffer.planes[plane_index].plane_size = GST_VIDEO_INFO_PLANE_STRIDE(&frame.info, plane_index) * GST_VIDEO_INFO_COMP_HEIGHT(&frame.info, plane_index);
-        pix_buffer.planes[plane_index].user_ptr = GST_VIDEO_FRAME_PLANE_DATA(&frame, plane_index);
-    }
-
-    gst_video_frame_unmap(&frame);
-    return pix_buffer;
-}
-
-static Expected<std::unordered_map<std::string, uint8_t*>> gst_hailonet2_read_input_buffers_from_meta(GstHailoNet2 *self, GstBuffer *buffer)
-{
-    std::unordered_map<std::string, uint8_t*> input_buffer_metas;
-    gpointer state = NULL;
-    GstMeta *meta;
-
-    while ((meta = gst_buffer_iterate_meta_filtered(buffer, &state, GST_PARENT_BUFFER_META_API_TYPE))) {
-        GstParentBufferMeta *parent_buffer_meta = reinterpret_cast<GstParentBufferMeta*>(meta);
-        GstMapInfo info;
-        gboolean map_succeeded = gst_buffer_map(parent_buffer_meta->buffer, &info, GST_MAP_READ);
-        if (!map_succeeded) {
-            // Failed to map, this buffer might not have a GstHailoTensorMeta, continue
-            continue;
-        }
-        GstHailoTensorMeta *tensor_meta = GST_TENSOR_META_GET(parent_buffer_meta->buffer);
-        if (!tensor_meta) {
-            // Not a tensor meta (this buffer is not a tensor), unmap and continue
-            gst_buffer_unmap(parent_buffer_meta->buffer, &info);
-            continue;
-        }
-        const hailo_vstream_info_t vstream_info = tensor_meta->info;
-        input_buffer_metas[vstream_info.name] = static_cast<uint8_t*>(info.data);
-        gst_buffer_unmap(parent_buffer_meta->buffer, &info);
-    }
-    CHECK_AS_EXPECTED(!input_buffer_metas.empty(),HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer!");
-
-    for (auto &input : self->infer_model->inputs()) {
-        CHECK_AS_EXPECTED(input_buffer_metas.find(input.name()) != input_buffer_metas.end(),
-            HAILO_INTERNAL_FAILURE, "No GstHailoTensorMeta was found in buffer for output: %s", input.name().c_str());
-    }
-
-    return input_buffer_metas;
-}
-
-static GstFlowReturn gst_hailonet2_chain(GstPad * /*pad*/, GstObject * parent, GstBuffer * buffer)
-{
-    GstHailoNet2 *self = GST_HAILONET2(parent);
-    std::unique_lock<std::mutex> lock(self->infer_mutex);
-
-    if (self->props.m_pass_through.get() || !self->props.m_is_active.get()) {
-        gst_hailonet2_push_buffer_to_thread(self, buffer);
-        return GST_FLOW_OK;
-    }
-
-    {
-        std::unique_lock<std::mutex> lock(self->flush_mutex);
-        self->ongoing_frames++;
-    }
-
-    if (self->props.m_input_from_meta.get()) {
-        auto input_buffer_metas = gst_hailonet2_read_input_buffers_from_meta(self, buffer);
-        if (!input_buffer_metas) {
-            return GST_FLOW_ERROR;
-        }
-        auto status = gst_hailonet2_async_infer_multi_input(self, buffer, input_buffer_metas.value());
-        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
-            return GST_FLOW_ERROR;
-        }
-    } else {
-        auto pix_buffer = gst_hailonet2_construct_pix_buffer(self, buffer);
-        if (!pix_buffer) {
-            return GST_FLOW_ERROR;
-        }
-        auto status = gst_hailonet2_async_infer(self, buffer, pix_buffer.value());
-        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
-            return GST_FLOW_ERROR;
-        }
-    }
-
-    return GST_FLOW_OK;
-}
-
-static hailo_status gst_hailonet2_init_infer_model(GstHailoNet2 * self)
-{
-    auto vdevice_params = HailoRTDefaults::get_vdevice_params();
-
-    hailo_device_id_t device_id = {0};
-    if (self->props.m_device_id.was_changed()) {
-        auto expected_device_id = HailoRTCommon::to_device_id(self->props.m_device_id.get());
-        CHECK_EXPECTED_AS_STATUS(expected_device_id);
-        device_id = std::move(expected_device_id.release());
-
-        vdevice_params.device_ids = &device_id;
-    }
-    if (self->props.m_device_count.was_changed()) {
-        vdevice_params.device_count = self->props.m_device_count.get();
-    }
-    if (self->props.m_vdevice_group_id.was_changed()) {
-        vdevice_params.group_id = self->props.m_vdevice_group_id.get();
-    } else if (self->props.m_vdevice_key.was_changed()) {
-        auto key_str = std::to_string(self->props.m_vdevice_key.get());
-        vdevice_params.group_id = key_str.c_str();
-    }
-    if (self->props.m_scheduling_algorithm.was_changed()) {
-        vdevice_params.scheduling_algorithm = self->props.m_scheduling_algorithm.get();
-    }
-
-    auto vdevice = VDevice::create(vdevice_params);
-    CHECK_EXPECTED_AS_STATUS(vdevice);
-    self->vdevice = std::move(vdevice.release());
-
-    auto infer_model = self->vdevice->create_infer_model(self->props.m_hef_path.get());
-    CHECK_EXPECTED_AS_STATUS(infer_model);
-    self->infer_model = infer_model.release();
-
-    return HAILO_SUCCESS;
-}
-
-static const gchar *gst_hailonet2_get_format_string(const InferModel::InferStream &input)
-{
-    switch (input.format().order) {
-    case HAILO_FORMAT_ORDER_RGB4:
-    case HAILO_FORMAT_ORDER_NHWC:
-        if (input.shape().features == RGBA_FEATURES_SIZE) {
-            return "RGBA";
-        }
-        if (input.shape().features == GRAY8_FEATURES_SIZE) {
-            return "GRAY8";
-        }
-        /* Fallthrough */
-    case HAILO_FORMAT_ORDER_NHCW:
-    case HAILO_FORMAT_ORDER_FCR:
-    case HAILO_FORMAT_ORDER_F8CR:
-        if (input.shape().features == GRAY8_FEATURES_SIZE) {
-            return "GRAY8";
-        }
-        CHECK(RGB_FEATURES_SIZE == input.shape().features, nullptr,
-            "Features of input %s is not %d for RGB format! (features=%d)", input.name().c_str(), RGB_FEATURES_SIZE,
-            input.shape().features);
-        return "RGB";
-    case HAILO_FORMAT_ORDER_YUY2:
-        CHECK(YUY2_FEATURES_SIZE == input.shape().features, nullptr,
-            "Features of input %s is not %d for YUY2 format! (features=%d)", input.name().c_str(), YUY2_FEATURES_SIZE,
-            input.shape().features);
-        return "YUY2";
-    case HAILO_FORMAT_ORDER_NV12:
-        CHECK(NV12_FEATURES_SIZE == input.shape().features, nullptr,
-            "Features of input %s is not %d for NV12 format! (features=%d)", input.name().c_str(), NV12_FEATURES_SIZE,
-            input.shape().features);
-        return "NV12";
-    case HAILO_FORMAT_ORDER_NV21:
-        CHECK(NV21_FEATURES_SIZE == input.shape().features, nullptr,
-            "Features of input %s is not %d for NV21 format! (features=%d)", input.name().c_str(), NV21_FEATURES_SIZE,
-            input.shape().features);
-        return "NV21";
-    case HAILO_FORMAT_ORDER_I420:
-        CHECK(I420_FEATURES_SIZE == input.shape().features, nullptr,
-            "Features of input %s is not %d for I420 format! (features=%d)", input.name().c_str(), I420_FEATURES_SIZE,
-            input.shape().features);
-        return "I420";
-    default:
-        ERROR("Input %s has an unsupported format order! order = %d\n", input.name().c_str(), input.format().order);
-        return nullptr;
-    }
-}
-
-static uint32_t get_height_by_order(uint32_t original_height, hailo_format_order_t order)
-{
-    switch (order) {
-    case HAILO_FORMAT_ORDER_NV12:
-    case HAILO_FORMAT_ORDER_NV21:
-        return original_height * 2;
-    default:
-        break;
-    }
-    return original_height;
-}
-
-static GstCaps *gst_hailonet2_get_caps(GstHailoNet2 *self)
-{
-    if (nullptr == self->vdevice) {
-        auto status = gst_hailonet2_init_infer_model(self);
-        if (HAILO_SUCCESS != status) {
-            return nullptr;
-        }
-    }
-
-    // TODO (HRT-12491): check caps based on incoming metadata
-    if (self->props.m_input_from_meta.get()) {
-        GstCaps *new_caps = gst_caps_new_any();
-        self->input_caps = new_caps;
-        return gst_caps_copy(new_caps);
-    }
-
-    auto input = self->infer_model->input();
-    if (!input) {
-        ERROR("Getting input has failed\n");
-        return nullptr;
-    }
-
-    const gchar *format = gst_hailonet2_get_format_string(input.value());
-    if (nullptr == format) {
-        return nullptr;
-    }
-
-    GstCaps *new_caps = gst_caps_new_simple("video/x-raw",
-        "format", G_TYPE_STRING, format,
-        "width", G_TYPE_INT, input->shape().width,
-        "height", G_TYPE_INT, get_height_by_order(input->shape().height, input->format().order),
-        nullptr);
-
-    if (!gst_video_info_from_caps(&self->input_frame_info, new_caps)) {
-        ERROR("gst_video_info_from_caps failed\n");
-        return nullptr;
-    }
-
-    self->input_caps = new_caps;
-    return gst_caps_copy(new_caps);
-}
-
-static gboolean gst_hailonet2_handle_sink_query(GstPad * pad, GstObject * parent, GstQuery * query)
-{
-    GstHailoNet2 *self = GST_HAILONET2(parent);
-    switch (GST_QUERY_TYPE (query)) {
-    case GST_QUERY_CAPS:
-    {
-        GstCaps *caps = gst_hailonet2_get_caps(self);
-        gst_query_set_caps_result(query, caps);
-        gst_caps_unref(caps);
-        return TRUE;
-    }
-    case GST_QUERY_ALLOCATION:
-    {
-        // We implement this to make sure buffers are contiguous in memory
-        gst_query_add_allocation_meta(query, GST_VIDEO_META_API_TYPE, NULL);
-        return gst_pad_query_default(pad, parent, query);
-    }
-    default:
-        return gst_pad_query_default(pad, parent, query);
-    }
-}
-
-static gboolean gst_hailonet2_handle_caps_event(GstHailoNet2 *self, GstCaps */*caps*/)
-{
-    if (nullptr == self->input_caps) {
-        return FALSE;
-    }
-
-    GstCaps *caps_result = gst_pad_peer_query_caps(self->srcpad, self->input_caps);
-    if (gst_caps_is_empty(caps_result)) {
-        return FALSE;
-    }
-
-    if (gst_caps_is_any(caps_result)) {
-        gst_caps_unref(caps_result);
-        return TRUE;
-    }
-
-    GstCaps *outcaps = gst_caps_fixate(caps_result);
-    gboolean res = gst_pad_set_caps(self->srcpad, outcaps);
-    gst_caps_unref(outcaps);
-    return res;
-}
-
-static gboolean gst_hailonet2_sink_event(GstPad *pad, GstObject *parent, GstEvent *event)
-{
-    GstHailoNet2 *self = GST_HAILONET2(parent);
-    switch (GST_EVENT_TYPE(event)) {
-    case GST_EVENT_CAPS:
-    {
-        GstCaps *caps;
-        gst_event_parse_caps(event, &caps);
-        auto result = gst_hailonet2_handle_caps_event(self, caps);
-        gst_event_unref(event);
-        return result;
-    }
-    case GST_EVENT_EOS:
-        self->has_got_eos = true;
-        return gst_pad_push_event(self->srcpad, event);
-    default:
-        return gst_pad_event_default(pad, parent, event);
-    }
-}
-
-static GstPadProbeReturn gst_hailonet2_sink_probe(GstPad */*pad*/, GstPadProbeInfo */*info*/, gpointer user_data)
-{
-    GstHailoNet2 *self = static_cast<GstHailoNet2*>(user_data);
-    auto status = gst_hailonet2_configure(self);
-    if (HAILO_SUCCESS != status) {
-        return GST_PAD_PROBE_DROP;
-    }
-
-    status = gst_hailonet2_allocate_infer_resources(self);
-    if (HAILO_SUCCESS != status) {
-        return GST_PAD_PROBE_DROP;
-    }
-
-    if (HAILO_SCHEDULING_ALGORITHM_NONE != self->props.m_scheduling_algorithm.get()) {
-        self->props.m_is_active = true;
-        return GST_PAD_PROBE_REMOVE;
-    }
-
-    if ((1 == hailonet_count) && (!self->props.m_is_active.was_changed())) {
-        self->props.m_is_active = true;
-    }
-
-    if (self->props.m_is_active.get()) {
-        status = self->configured_infer_model->activate();
-        if (HAILO_SUCCESS != status) {
-            return GST_PAD_PROBE_DROP;
-        }
-    }
-
-    self->has_called_activate = true;
-    return GST_PAD_PROBE_REMOVE;
-}
-
-static void gst_hailonet2_flush_callback(GstHailoNet2 *self, gpointer /*data*/)
-{
-    std::unique_lock<std::mutex> lock(self->flush_mutex);
-    self->flush_cv.wait(lock, [self] () {
-        return 0 == self->ongoing_frames;
-    });
-}
-
-static void gst_hailonet2_init(GstHailoNet2 *self)
-{
-    if (!do_versions_match(GST_ELEMENT(self))) {
-        return;
-    }
-
-    self->sinkpad = gst_pad_new_from_static_template(&sink_template, "sink");
-    gst_pad_set_chain_function(self->sinkpad, gst_hailonet2_chain);
-    gst_pad_set_query_function(self->sinkpad, gst_hailonet2_handle_sink_query);
-    gst_pad_set_event_function(self->sinkpad, GST_DEBUG_FUNCPTR(gst_hailonet2_sink_event));
-    gst_element_add_pad(GST_ELEMENT (self), self->sinkpad);
-    gst_pad_add_probe(self->sinkpad, GST_PAD_PROBE_TYPE_BUFFER, static_cast<GstPadProbeCallback>(gst_hailonet2_sink_probe), self, nullptr);
-
-    self->srcpad = gst_pad_new_from_static_template(&src_template, "src");
-    gst_element_add_pad(GST_ELEMENT (self), self->srcpad);
-
-    self->input_caps = nullptr;
-    self->input_queue = nullptr;
-    self->thread_queue = nullptr;
-    self->is_thread_running = false;
-    self->has_got_eos = false;
-    self->buffers_in_thread_queue = 0;
-    self->props = HailoNet2Properties();
-    self->vdevice = nullptr;
-    self->is_configured = false;
-    self->has_called_activate = false;
-    self->ongoing_frames = 0;
-
-    gchar *parent_name = gst_object_get_name(GST_OBJECT(self));
-    gchar *name = g_strconcat(parent_name, ":hailo_allocator", NULL);
-    g_free(parent_name);
-
-    self->allocator = GST_HAILO_ALLOCATOR(g_object_new(GST_TYPE_HAILO_ALLOCATOR, "name", name, NULL));
-    gst_object_ref_sink(self->allocator);
-    g_free(name);
-
-    g_signal_connect(self, "flush", G_CALLBACK(gst_hailonet2_flush_callback), nullptr);
-
-    hailonet_count++;
-}
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp
deleted file mode 100644
index 119e7a86..00000000
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet2.hpp
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
- * Copyright (c) 2021-2023 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-#ifndef _GST_HAILONET2_HPP_
-#define _GST_HAILONET2_HPP_
-
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wconversion"
-#include <gst/gst.h>
-#pragma GCC diagnostic pop
-
-#include <gst/base/gstqueuearray.h>
-#include <gst/video/gstvideofilter.h>
-
-#include "hailo/infer_model.hpp"
-#include "common.hpp"
-
-#include <queue>
-#include <condition_variable>
-#include <mutex>
-#include <thread>
-
-using namespace hailort;
-
-G_BEGIN_DECLS
-
-#define GST_TYPE_HAILO_ALLOCATOR (gst_hailo_allocator_get_type())
-#define GST_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_CAST ((obj), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocator))
-#define GST_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST ((klass), GST_TYPE_HAILO_ALLOCATOR, GstHailoAllocatorClass))
-#define GST_IS_HAILO_ALLOCATOR(obj) (G_TYPE_CHECK_INSTANCE_TYPE ((obj), GST_TYPE_HAILO_ALLOCATOR))
-#define GST_IS_HAILO_ALLOCATOR_CLASS(klass) (G_TYPE_CHECK_CLASS_TYPE ((klass), GST_TYPE_HAILO_ALLOCATOR))
-
-#define MIN_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE)
-#define MAX_OUTPUTS_POOL_SIZE (MAX_GSTREAMER_BATCH_SIZE * 4)
-
-struct GstHailoAllocator
-{
-    GstAllocator parent;
-    std::unordered_map<GstMemory*, Buffer> buffers;
-};
-
-struct GstHailoAllocatorClass
-{
-    GstAllocatorClass parent;
-};
-
-GType gst_hailo_allocator_get_type(void);
-
-struct HailoNet2Properties final
-{
-public:
-    HailoNet2Properties() : m_hef_path(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
-        m_device_id(nullptr), m_device_count(0), m_vdevice_group_id(nullptr), m_is_active(false), m_pass_through(false),
-        m_outputs_min_pool_size(MIN_OUTPUTS_POOL_SIZE), m_outputs_max_pool_size(MAX_OUTPUTS_POOL_SIZE),
-        m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN), m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS),
-        m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
-        m_input_format_type(HAILO_FORMAT_TYPE_AUTO), m_output_format_type(HAILO_FORMAT_TYPE_AUTO),
-        m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0), m_input_from_meta(false),
-        m_no_transform(false), m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE),
-        m_vdevice_key(DEFAULT_VDEVICE_KEY)
-    {}
-
-    void free_strings()
-    {
-      if (m_hef_path.was_changed()) {
-        g_free(m_hef_path.get());
-      }
-      if (m_device_id.was_changed()) {
-        g_free(m_device_id.get());
-      }
-      if (m_vdevice_group_id.was_changed()) {
-        g_free(m_vdevice_group_id.get());
-      }
-    }
-
-    HailoElemProperty<gchar*> m_hef_path;
-    HailoElemProperty<guint16> m_batch_size;
-    HailoElemProperty<gchar*> m_device_id;
-    HailoElemProperty<guint16> m_device_count;
-    HailoElemProperty<gchar*> m_vdevice_group_id;
-    HailoElemProperty<gboolean> m_is_active;
-    HailoElemProperty<gboolean> m_pass_through;
-    HailoElemProperty<guint> m_outputs_min_pool_size;
-    HailoElemProperty<guint> m_outputs_max_pool_size;
-    HailoElemProperty<hailo_scheduling_algorithm_t> m_scheduling_algorithm;
-    HailoElemProperty<guint32> m_scheduler_timeout_ms;
-    HailoElemProperty<guint32> m_scheduler_threshold;
-    HailoElemProperty<guint8> m_scheduler_priority;
-    HailoElemProperty<hailo_format_type_t> m_input_format_type;
-    HailoElemProperty<hailo_format_type_t> m_output_format_type;
-    HailoElemProperty<gfloat> m_nms_score_threshold;
-    HailoElemProperty<gfloat> m_nms_iou_threshold;
-    HailoElemProperty<guint32> m_nms_max_proposals_per_class;
-    HailoElemProperty<gboolean> m_input_from_meta;
-    HailoElemProperty<gboolean> m_no_transform;
-    HailoElemProperty<gboolean> m_multi_process_service;
-
-    // Deprecated
-    HailoElemProperty<guint32> m_vdevice_key;
-};
-
-typedef struct _GstHailoNet2 {
-  GstElement element;
-  GstPad *sinkpad;
-  GstPad *srcpad;
-  GstQueueArray *input_queue;
-  GstQueueArray *thread_queue;
-  std::atomic_uint32_t buffers_in_thread_queue;
-  std::thread thread;
-  HailoNet2Properties props;
-  GstCaps *input_caps;
-  std::atomic_bool is_thread_running;
-  std::atomic_bool has_got_eos;
-
-  std::unique_ptr<VDevice> vdevice;
-  std::shared_ptr<InferModel> infer_model;
-  std::shared_ptr<ConfiguredInferModel> configured_infer_model;
-  ConfiguredInferModel::Bindings infer_bindings;
-  bool is_configured;
-  std::mutex infer_mutex;
-
-  bool has_called_activate;
-  std::atomic_uint32_t ongoing_frames;
-  std::condition_variable flush_cv;
-  std::mutex flush_mutex;
-
-  GstVideoInfo input_frame_info;
-
-  GstHailoAllocator *allocator;
-  std::unordered_map<std::string, GstBufferPool*> output_buffer_pools;
-  std::unordered_map<std::string, hailo_vstream_info_t> output_vstream_infos;
-
-  std::mutex input_queue_mutex;
-  std::mutex thread_queue_mutex;
-  std::condition_variable thread_cv;
-} GstHailoNet2;
-
-typedef struct _GstHailoNet2Class {
-  GstElementClass parent_class;
-} GstHailoNet2Class;
-
-#define GST_TYPE_HAILONET2 (gst_hailonet2_get_type())
-#define GST_HAILONET2(obj) \
-  (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_HAILONET2,GstHailoNet2))
-#define GST_HAILONET2_CLASS(klass) \
-  (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_HAILONET2,GstHailoNet2Class))
-#define GST_IS_HAILONET2(obj) \
-  (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_HAILONET2))
-#define GST_IS_HAILONET2_CLASS(klass) \
-  (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_HAILONET2))
-
-GType gst_hailonet2_get_type (void);
-
-G_END_DECLS
-
-#endif /* _GST_HAILONET2_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
index 4ae413ee..bcbc350b 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
@@ -17,10 +17,10 @@
  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  */
+#include "sync_gsthailonet.hpp"
+#include "sync_gst_hailosend.hpp"
+#include "sync_gst_hailorecv.hpp"
 #include "gsthailonet.hpp"
-#include "gsthailosend.hpp"
-#include "gsthailorecv.hpp"
-#include "gsthailonet2.hpp"
 #include "gsthailodevicestats.hpp"
 #include "metadata/tensor_meta.hpp"
 
@@ -29,11 +29,11 @@ static gboolean plugin_init(GstPlugin *plugin)
     (void)gst_tensor_meta_get_info();
     (void)gst_tensor_meta_api_get_type();
 
-    return gst_element_register(plugin, "hailonet", GST_RANK_PRIMARY, GST_TYPE_HAILONET) &&
+    return gst_element_register(plugin, "synchailonet", GST_RANK_PRIMARY, GST_TYPE_SYNC_HAILONET) &&
         gst_element_register(plugin, "hailodevicestats", GST_RANK_PRIMARY, GST_TYPE_HAILODEVICESTATS) &&
         gst_element_register(nullptr, "hailosend", GST_RANK_PRIMARY, GST_TYPE_HAILOSEND) &&
         gst_element_register(nullptr, "hailorecv", GST_RANK_PRIMARY, GST_TYPE_HAILORECV) &&
-        gst_element_register(plugin, "hailonet2", GST_RANK_PRIMARY, GST_TYPE_HAILONET2);
+        gst_element_register(plugin, "hailonet", GST_RANK_PRIMARY, GST_TYPE_HAILONET);
 }
 
 GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, GST_VERSION_MINOR, hailo, "hailo gstreamer plugin", plugin_init, VERSION,
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
index e2b0d085..f5ab1db3 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
@@ -302,7 +302,7 @@ Expected<std::shared_ptr<ConfiguredNetworkGroup>> NetworkGroupConfigManager::con
         GST_CHECK_EXPECTED(infos, element, RESOURCE, "Failed getting network infos");
         if ((infos.release().size() > 1) || (scheduling_algorithm == HAILO_SCHEDULING_ALGORITHM_NONE)) {
             // If cng was already configured
-            // But hailonet is not running all networks in the cng (or if not using scheduler) -
+            // But sync_hailonet is not running all networks in the cng (or if not using scheduler) -
             // Do not use multiplexer!
             return found_cng;
         }
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
index c9897143..d205814c 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
@@ -30,7 +30,7 @@
 
 using device_id_t = std::string;
 using network_name_t = std::string;
-using hailonet_name_t = std::string;
+using sync_hailonet_name_t = std::string;
 
 class NetworkGroupConfigManager final
 {
@@ -52,7 +52,7 @@ class NetworkGroupConfigManager final
 
     // TODO: change this map to store only the shared network_groups (used by multiple hailonets with the same vdevices)
     std::unordered_map<std::string, std::weak_ptr<ConfiguredNetworkGroup>> m_configured_net_groups;
-    std::unordered_map<device_id_t, std::unordered_map<network_name_t, hailonet_name_t>> m_configured_networks;
+    std::unordered_map<device_id_t, std::unordered_map<network_name_t, sync_hailonet_name_t>> m_configured_networks;
     std::mutex m_mutex;
 };
 
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp
similarity index 98%
rename from hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp
rename to hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp
index 322545a5..d678fa98 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp
@@ -17,8 +17,8 @@
  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  */
-#include "gsthailorecv.hpp"
-#include "gsthailonet.hpp"
+#include "sync_gst_hailorecv.hpp"
+#include "sync_gsthailonet.hpp"
 #include "common.hpp"
 #include "network_group_handle.hpp"
 #include "metadata/hailo_buffer_flag_meta.hpp"
@@ -182,7 +182,7 @@ GstFlowReturn HailoRecvImpl::handle_frame(GstVideoFilter */*filter*/, GstVideoFr
         switch (meta->flag) {
         case BUFFER_FLAG_FLUSH:
         {
-            hailo_status status = GST_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->signal_was_flushed_event();
+            hailo_status status = GST_SYNC_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->signal_was_flushed_event();
             GST_CHECK(HAILO_SUCCESS == status, GST_FLOW_ERROR, m_element, RESOURCE, "Signalling was flushed event has failed, status = %d", status);
             return GST_BASE_TRANSFORM_FLOW_DROPPED;
         }
@@ -195,7 +195,7 @@ GstFlowReturn HailoRecvImpl::handle_frame(GstVideoFilter */*filter*/, GstVideoFr
         }
     }
 
-    if (!GST_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->is_active()) {
+    if (!GST_SYNC_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->is_active()) {
         return GST_FLOW_OK;
     }
 
@@ -235,7 +235,7 @@ hailo_status HailoRecvImpl::read_from_vstreams(bool should_print_latency)
             GST_DEBUG("%s latency: %f milliseconds", output_info.vstream().name().c_str(), latency.count());
         }
         gst_buffer_unmap(*buffer, &buffer_info);
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             return status;
         }
         GST_CHECK_SUCCESS(status, m_element, STREAM, "Reading from vstream failed, status = %d", status);
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.hpp
similarity index 100%
rename from hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailorecv.hpp
rename to hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.hpp
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp
similarity index 97%
rename from hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp
rename to hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp
index 1c4f536c..ecc1f122 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp
@@ -17,8 +17,8 @@
  * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
  * Boston, MA 02110-1301, USA.
  */
-#include "gsthailosend.hpp"
-#include "gsthailonet.hpp"
+#include "sync_gst_hailosend.hpp"
+#include "sync_gsthailonet.hpp"
 #include "metadata/hailo_buffer_flag_meta.hpp"
 
 #include <chrono>
@@ -87,7 +87,7 @@ Expected<std::unique_ptr<HailoSendImpl>> HailoSendImpl::create(GstHailoSend *ele
     return ptr;
 }
 
-HailoSendImpl::HailoSendImpl(GstHailoSend *element) : m_element(element), m_hailonet(nullptr), m_props(),
+HailoSendImpl::HailoSendImpl(GstHailoSend *element) : m_element(element), m_sync_hailonet(nullptr), m_props(),
     m_batch_size(HAILO_DEFAULT_BATCH_SIZE), m_last_frame_pts(0)
 {
     GST_DEBUG_CATEGORY_INIT(gst_hailosend_debug_category, "hailosend", 0, "debug category for hailosend element");
@@ -136,13 +136,14 @@ GstFlowReturn HailoSendImpl::handle_frame(GstVideoFilter */*filter*/, GstVideoFr
     assert(nullptr != frame);
     m_last_frame_pts = GST_BUFFER_TIMESTAMP(frame->buffer);
 
-    if (!GST_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->is_active()) {
+    if (!GST_SYNC_HAILONET(GST_ELEMENT_PARENT(m_element))->impl->is_active()) {
         GstHailoBufferFlagMeta *meta = GST_HAILO_BUFFER_FLAG_META_ADD(frame->buffer);
         meta->flag = BUFFER_FLAG_SKIP;
         return GST_FLOW_OK;
     }
 
     hailo_pix_buffer_t pix_buffer = {};
+    pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;
     pix_buffer.index = 0;
     pix_buffer.number_of_planes = GST_VIDEO_INFO_N_PLANES(&frame->info);
     for (uint32_t plane_index = 0; plane_index < pix_buffer.number_of_planes; plane_index++) {
@@ -174,7 +175,7 @@ hailo_status HailoSendImpl::write_to_vstreams(const hailo_pix_buffer_t &pix_buff
 {
     for (auto &in_vstream : m_input_vstreams) {
         auto status = in_vstream.write(pix_buffer);
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             return status;
         }
         GST_CHECK_SUCCESS(status, m_element, STREAM, "Failed writing to input vstream %s, status = %d", in_vstream.name().c_str(), status);
@@ -201,9 +202,9 @@ GstCaps *HailoSendImpl::get_caps(GstBaseTransform */*trans*/, GstPadDirection /*
 
     if (0 == m_input_vstream_infos.size()) {
         // Init here because it is guaranteed that we have a parent element
-        m_hailonet = GST_HAILONET(GST_ELEMENT_PARENT(m_element));
+        m_sync_hailonet = GST_SYNC_HAILONET(GST_ELEMENT_PARENT(m_element));
 
-        hailo_status status = m_hailonet->impl->set_hef();
+        hailo_status status = m_sync_hailonet->impl->set_hef();
         if (HAILO_SUCCESS != status) {
             return NULL;
         }
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp
similarity index 97%
rename from hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.hpp
rename to hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp
index 33a4d7a3..9b84c68f 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailosend.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp
@@ -22,7 +22,7 @@
 
 #include "common.hpp"
 #include "network_group_handle.hpp"
-#include "gsthailonet.hpp"
+#include "sync_gsthailonet.hpp"
 
 #include <gst/video/video.h>
 #include <gst/video/gstvideofilter.h>
@@ -92,7 +92,7 @@ class HailoSendImpl final
     hailo_status write_to_vstreams(const hailo_pix_buffer_t &pix_buffer);
     
     GstHailoSend *m_element;
-    GstHailoNet *m_hailonet;
+    GstSyncHailoNet *m_sync_hailonet;
     HailoSendProperties m_props;
     std::vector<hailo_vstream_info_t> m_input_vstream_infos;
     uint32_t m_batch_size;
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp
new file mode 100644
index 00000000..dbfed03c
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp
@@ -0,0 +1,992 @@
+/*
+ * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+#include "sync_gsthailonet.hpp"
+#include "sync_gst_hailosend.hpp"
+#include "sync_gst_hailorecv.hpp"
+#include "hailo_events/hailo_events.hpp"
+#include "metadata/hailo_buffer_flag_meta.hpp"
+#include "hailo/hailort_common.hpp"
+#include "hailo/hailort_defaults.hpp"
+
+#include <sstream>
+#include <algorithm>
+
+GST_DEBUG_CATEGORY_STATIC(gst_sync_hailonet_debug_category);
+#define GST_CAT_DEFAULT gst_sync_hailonet_debug_category
+
+constexpr std::chrono::milliseconds WAIT_FOR_FLUSH_TIMEOUT_MS(1000);
+
+static void gst_sync_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
+static void gst_sync_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec);
+static gboolean gst_hailorecv_src_pad_event(GstPad *pad, GstObject *parent, GstEvent *event);
+static GstPadProbeReturn gst_sync_hailonet_sink_probe(GstPad *pad, GstPadProbeInfo *info, gpointer user_data);
+static GstStateChangeReturn gst_sync_hailonet_change_state(GstElement *element, GstStateChange transition);
+static void gst_sync_hailonet_flush_callback(GstSyncHailoNet *hailonet, gpointer data);
+static void gst_sync_hailonet_inner_queue_overrun_callback(GstElement *queue, gpointer udata);
+static void gst_sync_hailonet_inner_queue_underrun_callback(GstElement *queue, gpointer udata);
+
+enum
+{
+    PROP_0,
+    PROP_DEBUG,
+    PROP_DEVICE_ID,
+    PROP_HEF_PATH,
+    PROP_NETWORK_NAME,
+    PROP_BATCH_SIZE,
+    PROP_OUTPUTS_MIN_POOL_SIZE,
+    PROP_OUTPUTS_MAX_POOL_SIZE,
+    PROP_IS_ACTIVE,
+    PROP_DEVICE_COUNT,
+    PROP_VDEVICE_KEY,
+    PROP_SCHEDULING_ALGORITHM,
+    PROP_SCHEDULER_TIMEOUT_MS,
+    PROP_SCHEDULER_THRESHOLD,
+    PROP_SCHEDULER_PRIORITY,
+    PROP_MULTI_PROCESS_SERVICE,
+    PROP_INPUT_FORMAT_TYPE,
+    PROP_OUTPUT_FORMAT_TYPE,
+    PROP_NMS_SCORE_THRESHOLD,
+    PROP_NMS_IOU_THRESHOLD,
+    PROP_NMS_MAX_PROPOSALS_PER_CLASS,
+};
+
+G_DEFINE_TYPE(GstSyncHailoNet, gst_sync_hailonet, GST_TYPE_BIN);
+
+static void gst_sync_hailonet_class_init(GstSyncHailoNetClass *klass)
+{
+    GObjectClass *gobject_class = G_OBJECT_CLASS(klass);
+    GstElementClass *element_class = GST_ELEMENT_CLASS(klass);
+
+    GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
+    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&src_template));
+
+    GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
+    gst_element_class_add_pad_template(element_class, gst_static_pad_template_get(&sink_template));
+
+    gst_element_class_set_static_metadata(element_class,
+        "sync hailonet element", "Hailo/Network",
+        "Configure and Activate Hailo Network. "
+            "Supports the \"flush\" signal which blocks until there are no buffers currently processesd in the element. "
+            "When deactivating a sync hailonet during runtime (via set_property of \"is-active\" to False), make sure that no frames are being pushed into the "
+            "hailonet, since this operation waits until there are no frames coming in.",
+        PLUGIN_AUTHOR);
+
+    element_class->change_state = GST_DEBUG_FUNCPTR(gst_sync_hailonet_change_state);
+    
+    gobject_class->set_property = gst_sync_hailonet_set_property;
+    gobject_class->get_property = gst_sync_hailonet_get_property;
+    g_object_class_install_property(gobject_class, PROP_DEBUG,
+        g_param_spec_boolean("debug", "Debug flag", "Should print debug information", false,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_DEVICE_ID,
+        g_param_spec_string("device-id", "Device ID", "Device ID ([<domain>]:<bus>:<device>.<func>, same as in lspci command). Excludes device-count.", NULL,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_DEVICE_COUNT,
+        g_param_spec_uint("device-count", "Number of devices to use", "Number of physical devices to use. Excludes device-id.", HAILO_DEFAULT_DEVICE_COUNT,
+            std::numeric_limits<uint16_t>::max(), HAILO_DEFAULT_DEVICE_COUNT, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_VDEVICE_KEY,
+        g_param_spec_uint("vdevice-key",
+            "Indicate whether to re-use or re-create vdevice",
+            "Relevant only when 'device-count' is passed. If not passed, the created vdevice will be unique to this hailonet." \
+            "if multiple hailonets share 'vdevice-key' and 'device-count', the created vdevice will be shared between those hailonets",
+            MIN_VALID_VDEVICE_KEY, std::numeric_limits<uint32_t>::max(), MIN_VALID_VDEVICE_KEY, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_HEF_PATH,
+        g_param_spec_string("hef-path", "HEF Path Location", "Location of the HEF file to read", NULL,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NETWORK_NAME,
+        g_param_spec_string("net-name", "Network Name",
+            "Configure and run this specific network. "
+            "If not passed, configure and run the default network - ONLY if there is one network in the HEF!", NULL,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_BATCH_SIZE,
+        g_param_spec_uint("batch-size", "Inference Batch", "How many frame to send in one batch", MIN_GSTREAMER_BATCH_SIZE, MAX_GSTREAMER_BATCH_SIZE, HAILO_DEFAULT_BATCH_SIZE,
+            (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MIN_POOL_SIZE,
+        g_param_spec_uint("outputs-min-pool-size", "Outputs Minimun Pool Size", "The minimum amount of buffers to allocate for each output layer",
+            0, std::numeric_limits<uint32_t>::max(), DEFAULT_OUTPUTS_MIN_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUTS_MAX_POOL_SIZE,
+        g_param_spec_uint("outputs-max-pool-size", "Outputs Maximum Pool Size",
+            "The maximum amount of buffers to allocate for each output layer or 0 for unlimited", 0, std::numeric_limits<uint32_t>::max(),
+            DEFAULT_OUTPUTS_MAX_POOL_SIZE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_IS_ACTIVE,
+        g_param_spec_boolean("is-active", "Is Network Activated", "Controls whether this element should be active. "
+            "By default, the hailonet element will not be active unless it is the only one. "
+            "Setting this property in combination with 'scheduling-algorithm' different than HAILO_SCHEDULING_ALGORITHM_NONE is not supported.", false,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    g_object_class_install_property(gobject_class, PROP_SCHEDULING_ALGORITHM,
+        g_param_spec_enum("scheduling-algorithm", "Scheduling policy for automatic network group switching", "Controls the Model Scheduler algorithm of HailoRT. "
+            "Gets values from the enum GstHailoSchedulingAlgorithms. "
+            "Using Model Scheduler algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE, excludes the property 'is-active'. "
+            "When using the same VDevice across multiple hailonets, all should have the same 'scheduling-algorithm'. ",
+            GST_TYPE_SCHEDULING_ALGORITHM, HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_TIMEOUT_MS,
+        g_param_spec_uint("scheduler-timeout-ms", "Timeout for for scheduler in ms", "The maximum time period that may pass before getting run time from the scheduler,"
+            " as long as at least one send request has been sent.",
+            HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_THRESHOLD,
+        g_param_spec_uint("scheduler-threshold", "Frames threshold for scheduler", "The minimum number of send requests required before the hailonet is considered ready to get run time from the scheduler.",
+            HAILO_DEFAULT_SCHEDULER_THRESHOLD, std::numeric_limits<uint32_t>::max(), HAILO_DEFAULT_SCHEDULER_THRESHOLD, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_SCHEDULER_PRIORITY,
+        g_param_spec_uint("scheduler-priority", "Priority index for scheduler", "When the scheduler will choose the next hailonet to run, higher priority will be prioritized in the selection. "
+            "Bigger number represent higher priority",
+            HAILO_SCHEDULER_PRIORITY_MIN, HAILO_SCHEDULER_PRIORITY_MAX, HAILO_SCHEDULER_PRIORITY_NORMAL, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_MULTI_PROCESS_SERVICE,
+        g_param_spec_boolean("multi-process-service", "Should run over HailoRT service", "Controls wether to run HailoRT over its service. "
+            "To use this property, the service should be active and scheduling-algorithm should be set. Defaults to false.",
+            HAILO_DEFAULT_MULTI_PROCESS_SERVICE, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_INPUT_FORMAT_TYPE,
+        g_param_spec_enum("input-format-type", "Input format type", "Input format type(auto, float32, uint16, uint8). Default value is auto."
+            "Gets values from the enum GstHailoFormatType. ",
+            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_OUTPUT_FORMAT_TYPE,
+        g_param_spec_enum("output-format-type", "Output format type", "Output format type(auto, float32, uint16, uint8). Default value is auto."
+            "Gets values from the enum GstHailoFormatType. ",
+            GST_TYPE_HAILO_FORMAT_TYPE, HAILO_FORMAT_TYPE_AUTO,
+        (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NMS_SCORE_THRESHOLD,
+        g_param_spec_float("nms-score-threshold", "NMS score threshold", "Threshold used for filtering out candidates. Any box with score<TH is suppressed.",
+            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NMS_IOU_THRESHOLD,
+        g_param_spec_float("nms-iou-threshold", "NMS IoU threshold", "Intersection over union overlap Threshold, used in the NMS iterative elimination process where potential duplicates of detected items are suppressed.",
+            0, 1, 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+    g_object_class_install_property(gobject_class, PROP_NMS_MAX_PROPOSALS_PER_CLASS,
+        g_param_spec_uint("nms-max-proposals-per-class", "NMS max proposals per class", "Set a limit for the maximum number of boxes per class.",
+            0, std::numeric_limits<uint32_t>::max(), 0, (GParamFlags)(G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS)));
+
+    // See information about the "flush" signal in the element description
+    g_signal_new(
+        "flush",
+        GST_TYPE_SYNC_HAILONET,
+        G_SIGNAL_ACTION,
+        0, nullptr, nullptr, nullptr, G_TYPE_NONE, 0
+    );
+}
+
+std::string create_name(std::string prefix, uint32_t id)
+{
+    return prefix + std::to_string(id);
+}
+
+Expected<std::unique_ptr<HailoSyncNetImpl>> HailoSyncNetImpl::create(GstSyncHailoNet *element)
+{
+    if (nullptr == element) {
+        return make_unexpected(HAILO_INVALID_ARGUMENT);
+    }
+
+    auto hailosend_name = create_name("hailosend", HailoSyncNetImpl::m_sync_hailonet_count);
+    GstElement *hailosend = gst_element_factory_make("hailosend", hailosend_name.c_str());
+    if (nullptr == hailosend) {
+        GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating hailosend element in bin!"), (NULL));
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    g_object_set(hailosend, "qos", FALSE, NULL);
+
+    auto hailoqueue_name = create_name("hailoqueue", HailoSyncNetImpl::m_sync_hailonet_count);
+    GstElement *queue = gst_element_factory_make("queue", hailoqueue_name.c_str());
+    if (nullptr == queue) {
+        GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating queue element in bin!"), (NULL));
+        gst_object_unref(hailosend);
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    // Passing 0 disables the features here
+    g_object_set(queue, "max-size-time", (guint64)0, NULL);
+    g_object_set(queue, "max-size-bytes", (guint)0, NULL);
+    g_signal_connect(queue, "overrun", G_CALLBACK(gst_sync_hailonet_inner_queue_overrun_callback), nullptr);
+    g_signal_connect(queue, "underrun", G_CALLBACK(gst_sync_hailonet_inner_queue_underrun_callback), nullptr);
+
+    auto hailorecv_name = create_name("hailorecv", HailoSyncNetImpl::m_sync_hailonet_count);
+    GstElement *hailorecv = gst_element_factory_make("hailorecv", hailorecv_name.c_str());
+    if (nullptr == hailorecv) {
+        GST_ELEMENT_ERROR(element, RESOURCE, FAILED, ("Failed creating hailorecv element in bin!"), (NULL));
+        gst_object_unref(hailosend);
+        gst_object_unref(queue);
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    g_object_set(hailorecv, "qos", FALSE, NULL);
+
+    g_signal_connect(element, "flush", G_CALLBACK(gst_sync_hailonet_flush_callback), nullptr);
+
+    auto was_flushed_event = Event::create_shared(Event::State::not_signalled);
+    GST_CHECK_EXPECTED(was_flushed_event, element, RESOURCE, "Failed allocating memory for event!");
+
+    auto ptr = make_unique_nothrow<HailoSyncNetImpl>(element, hailosend, queue, hailorecv, was_flushed_event.release());
+    if (nullptr == ptr) {
+        return make_unexpected(HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    return ptr;
+}
+
+std::atomic_uint32_t HailoSyncNetImpl::m_sync_hailonet_count(0);
+std::mutex HailoSyncNetImpl::m_mutex;
+HailoSyncNetImpl::HailoSyncNetImpl(GstSyncHailoNet *element, GstElement *hailosend, GstElement *queue, GstElement *hailorecv, EventPtr was_flushed_event) :
+    m_element(element), m_props(), m_output_formats(), m_hailosend(hailosend), m_queue(queue), m_hailorecv(hailorecv),
+    m_net_group_handle(nullptr), m_was_configured(false), m_has_called_activate(false),
+    m_was_flushed_event(was_flushed_event), m_pool(nullptr)
+{
+    GST_DEBUG_CATEGORY_INIT(gst_sync_hailonet_debug_category, "sync hailonet", 0, "debug category for sync hailonet element");
+
+    /* gst_bin_add_many cannot fail. I use this function because the elements are created here and does not come from the outside so,
+     * gst_bin_add will not fail */
+    gst_bin_add_many(GST_BIN(m_element), m_hailosend, m_queue, m_hailorecv, NULL);
+    init_ghost_sink();
+    init_ghost_src();
+
+    ++m_sync_hailonet_count;
+}
+
+HailoSyncNetImpl::~HailoSyncNetImpl()
+{
+    if (nullptr != m_pool) {
+        (void)gst_buffer_pool_set_active(m_pool, FALSE);
+    }
+}
+
+void HailoSyncNetImpl::init_ghost_sink()
+{
+    GstPad *pad = gst_element_get_static_pad(m_hailosend, "sink");
+
+    GstStaticPadTemplate sink_template = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
+    GstPadTemplate *pad_tmpl = gst_static_pad_template_get(&sink_template);
+
+    GstPad *ghost_pad = gst_ghost_pad_new_from_template("sink", pad, pad_tmpl);
+    gst_pad_set_active(ghost_pad, TRUE);
+    gst_element_add_pad(GST_ELEMENT(m_element), ghost_pad);
+
+    gst_pad_add_probe(pad, GST_PAD_PROBE_TYPE_BUFFER, static_cast<GstPadProbeCallback>(gst_sync_hailonet_sink_probe), nullptr, nullptr);
+
+    gst_object_unref(pad_tmpl);
+    gst_object_unref(pad);
+}
+
+void HailoSyncNetImpl::init_ghost_src()
+{
+    GstPad *pad = gst_element_get_static_pad(m_hailorecv, "src");
+
+    GstStaticPadTemplate src_template = GST_STATIC_PAD_TEMPLATE("src", GST_PAD_SRC, GST_PAD_ALWAYS, GST_STATIC_CAPS_ANY);
+    GstPadTemplate *pad_tmpl = gst_static_pad_template_get(&src_template);
+
+    GstPad *ghost_pad = gst_ghost_pad_new_from_template("src", pad, pad_tmpl);
+    gst_pad_set_active(ghost_pad, TRUE);
+    gst_element_add_pad(GST_ELEMENT(m_element), ghost_pad);
+
+    gst_pad_set_event_function(pad, gst_hailorecv_src_pad_event);
+
+    gst_object_unref(pad_tmpl);
+    gst_object_unref(pad);
+}
+
+void HailoSyncNetImpl::set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec)
+{
+    GST_DEBUG_OBJECT(m_element, "set_property");
+
+    if ((object == nullptr) || (value == nullptr) || (pspec == nullptr)) {
+        g_error("set_property got null parameter!");
+        return;
+    }
+
+    switch (property_id) {
+    case PROP_DEBUG:
+    {
+        gboolean debug = g_value_get_boolean(value);
+        g_object_set(m_hailosend, "debug", debug, NULL);
+        g_object_set(m_hailorecv, "debug", debug, NULL);
+        break;
+    }
+    case PROP_DEVICE_ID:
+        if (0 != m_props.m_device_count.get()) {
+            g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
+                g_value_get_string(value), m_props.m_device_count.get());
+            break;
+        }
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the device ID will not take place!");
+            break;
+        }
+        if (nullptr != m_props.m_device_id.get()) {
+            g_free(m_props.m_device_id.get());
+        }
+        m_props.m_device_id = g_strdup(g_value_get_string(value));
+        break;
+    case PROP_DEVICE_COUNT:
+        if (nullptr != m_props.m_device_id.get()) {
+            g_error("device-id and device-count excludes eachother. received device-id=%s, device-count=%d",
+                m_props.m_device_id.get(), g_value_get_uint(value));
+            break;
+        }
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the device count will not take place!");
+            break;
+        }
+        m_props.m_device_count = static_cast<guint16>(g_value_get_uint(value));
+        break;
+    case PROP_VDEVICE_KEY:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the vdevice key will not take place!");
+            break;
+        }
+        m_props.m_vdevice_key = static_cast<guint32>(g_value_get_uint(value));
+        break;
+    case PROP_HEF_PATH:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the HEF path will not take place!");
+            break;
+        }
+        if (nullptr != m_props.m_hef_path.get()) {
+            g_free(m_props.m_hef_path.get());
+        }
+        m_props.m_hef_path = g_strdup(g_value_get_string(value));
+        break;
+    case PROP_NETWORK_NAME:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the network name will not take place!");
+            break;
+        }
+        if (nullptr != m_props.m_network_name.get()) {
+            g_free(m_props.m_network_name.get());
+        }
+        m_props.m_network_name = g_strdup(g_value_get_string(value));
+        break;
+    case PROP_BATCH_SIZE:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the batch size will not take place!");
+            break;
+        }
+        m_props.m_batch_size = static_cast<guint16>(g_value_get_uint(value));
+        break;
+    case PROP_OUTPUTS_MIN_POOL_SIZE:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the outputs minimum pool size will not take place!");
+            break;
+        }
+        g_object_set(m_hailorecv, "outputs-min-pool-size", g_value_get_uint(value), NULL);
+        break;
+    case PROP_OUTPUTS_MAX_POOL_SIZE:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the outputs maximum pool size will not take place!");
+            break;
+        }
+        g_object_set(m_hailorecv, "outputs-max-pool-size", g_value_get_uint(value), NULL);
+        break;
+    case PROP_IS_ACTIVE:
+    {
+        gboolean new_is_active = g_value_get_boolean(value);
+
+        if (m_props.m_scheduling_algorithm.was_changed() && (HAILO_SCHEDULING_ALGORITHM_NONE != m_props.m_scheduling_algorithm.get())) {
+            g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
+            break;
+        }
+
+        if (m_has_called_activate) {
+            if (m_props.m_is_active.get() && !new_is_active) {
+                // Setting this to false before deactivating to signal hailosend and hailorecv to stop inferring
+                m_props.m_is_active = false;
+                hailo_status status = deactivate_network_group();
+                if (HAILO_SUCCESS != status) {
+                    g_error("Deactivating network group failed, status = %d", status);
+                    return;
+                }
+            } else if (!m_props.m_is_active.get() && new_is_active) {
+                hailo_status status = m_net_group_handle->activate_network_group();
+                if (HAILO_SUCCESS != status) {
+                    g_error("Failed activating network group, status = %d", status);
+                    break;
+                }
+                m_props.m_is_active = true;
+            } else {
+                g_warning("Trying to change is-active property state from %d to %d", m_props.m_is_active.get(), new_is_active);
+                break;
+            }
+        } else {
+            m_props.m_is_active = new_is_active;
+        }
+        break;
+    }
+    case PROP_SCHEDULING_ALGORITHM:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the scheduling algorithm will not take place!");
+            break;
+        }
+        if (m_props.m_is_active.was_changed() && (g_value_get_enum(value) != HAILO_SCHEDULING_ALGORITHM_NONE)) {
+            g_error("scheduling-algorithm different than HAILO_SCHEDULING_ALGORITHM_NONE in combination with 'is-active' is not supported.");
+            break;
+        }
+        m_props.m_scheduling_algorithm = static_cast<hailo_scheduling_algorithm_t>(g_value_get_enum(value));
+        break;
+    case PROP_SCHEDULER_TIMEOUT_MS:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the scheduling timeout will not take place!");
+            break;
+        }
+        if (m_props.m_is_active.was_changed()) {
+            g_error("scheduler usage (scheduler-timeout-ms) in combination with 'is-active' is not supported.");
+            break;
+        }
+        m_props.m_scheduler_timeout_ms = g_value_get_uint(value);
+        break;
+    case PROP_SCHEDULER_THRESHOLD:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the scheduling threshold will not take place!");
+            break;
+        }
+        if (m_props.m_is_active.was_changed()) {
+            g_error("scheduler usage (scheduler-threshold) in combination with 'is-active' is not supported.");
+            break;
+        }
+        m_props.m_scheduler_threshold = g_value_get_uint(value);
+        break;
+    case PROP_SCHEDULER_PRIORITY:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the scheduling priority will not take place!");
+            break;
+        }
+        if (m_props.m_is_active.was_changed()) {
+            g_error("scheduler usage (scheduler-priority) in combination with 'is-active' is not supported.");
+            break;
+        }
+        m_props.m_scheduler_priority = static_cast<guint8>(g_value_get_uint(value));
+        break;
+    case PROP_MULTI_PROCESS_SERVICE:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the multi-process-service property will not take place!");
+            break;
+        }
+        m_props.m_multi_process_service = g_value_get_boolean(value);
+        break;
+    case PROP_INPUT_FORMAT_TYPE:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the format type will not take place!");
+            break;
+        }
+        m_props.m_input_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
+        break;
+    case PROP_OUTPUT_FORMAT_TYPE:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the format type will not take place!");
+            break;
+        }
+        m_props.m_output_format_type = static_cast<hailo_format_type_t>(g_value_get_enum(value));
+        break;
+    case PROP_NMS_SCORE_THRESHOLD:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the score threshold will not take place!");
+            break;
+        }
+        m_props.m_nms_score_threshold = static_cast<gfloat>(g_value_get_float(value));
+        break;
+    case PROP_NMS_IOU_THRESHOLD:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the IoU threshold will not take place!");
+            break;
+        }
+        m_props.m_nms_iou_threshold = static_cast<gfloat>(g_value_get_float(value));
+        break;
+    case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
+        if (m_was_configured) {
+            g_warning("The network was already configured so changing the max proposals per class will not take place!");
+            break;
+        }
+        m_props.m_nms_max_proposals_per_class = static_cast<guint32>(g_value_get_uint(value));
+        break;
+    default:
+        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
+        break;
+    }
+}
+
+void HailoSyncNetImpl::get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec)
+{
+    GST_DEBUG_OBJECT(m_element, "get_property");
+
+    if ((object == nullptr) || (value == nullptr) || (pspec == nullptr)) {
+        g_error("get_property got null parameter!");
+        return;
+    }
+
+    switch (property_id) {
+    case PROP_DEBUG:
+    {
+        gboolean debug;
+        g_object_get(m_hailosend, "debug", &debug, nullptr);
+        g_value_set_boolean(value, debug);
+        break;
+    }
+    case PROP_DEVICE_ID:
+        g_value_set_string(value, m_props.m_device_id.get());
+        break;
+    case PROP_DEVICE_COUNT:
+        g_value_set_uint(value, m_props.m_device_count.get());
+        break;
+    case PROP_VDEVICE_KEY:
+        g_value_set_uint(value, m_props.m_vdevice_key.get());
+        break;
+    case PROP_HEF_PATH:
+        g_value_set_string(value, m_props.m_hef_path.get());
+        break;
+    case PROP_NETWORK_NAME:
+        g_value_set_string(value, m_props.m_network_name.get());
+        break;
+    case PROP_BATCH_SIZE:
+        g_value_set_uint(value, m_props.m_batch_size.get());
+        break;
+    case PROP_OUTPUTS_MIN_POOL_SIZE:
+    {
+        guint outputs_min_pool_size;
+        g_object_get(m_hailorecv, "outputs-min-pool-size", &outputs_min_pool_size, nullptr);
+        g_value_set_uint(value, outputs_min_pool_size);
+        break;
+    }
+    case PROP_OUTPUTS_MAX_POOL_SIZE:
+    {
+        guint outputs_max_pool_size;
+        g_object_get(m_hailorecv, "outputs-max-pool-size", &outputs_max_pool_size, nullptr);
+        g_value_set_uint(value, outputs_max_pool_size);
+        break;
+    }
+    case PROP_IS_ACTIVE:
+        g_value_set_boolean(value, m_props.m_is_active.get());
+        break;
+    case PROP_SCHEDULING_ALGORITHM:
+        g_value_set_enum(value, m_props.m_scheduling_algorithm.get());
+        break;
+    case PROP_SCHEDULER_TIMEOUT_MS:
+        g_value_set_uint(value, m_props.m_scheduler_timeout_ms.get());
+        break;
+    case PROP_SCHEDULER_THRESHOLD:
+        g_value_set_uint(value, m_props.m_scheduler_threshold.get());
+        break;
+    case PROP_SCHEDULER_PRIORITY:
+        g_value_set_uint(value, m_props.m_scheduler_priority.get());
+        break;
+    case PROP_MULTI_PROCESS_SERVICE:
+        g_value_set_boolean(value, m_props.m_multi_process_service.get());
+        break;
+    case PROP_INPUT_FORMAT_TYPE:
+        g_value_set_enum(value, m_props.m_input_format_type.get());
+        break;
+    case PROP_OUTPUT_FORMAT_TYPE:
+        g_value_set_enum(value, m_props.m_output_format_type.get());
+        break;
+    case PROP_NMS_SCORE_THRESHOLD:
+        g_value_set_float(value, m_props.m_nms_score_threshold.get());
+        break;
+    case PROP_NMS_IOU_THRESHOLD:
+        g_value_set_float(value, m_props.m_nms_iou_threshold.get());
+        break;
+    case PROP_NMS_MAX_PROPOSALS_PER_CLASS:
+        g_value_set_uint(value, m_props.m_nms_max_proposals_per_class.get());
+        break;
+    default:
+        G_OBJECT_WARN_INVALID_PROPERTY_ID(object, property_id, pspec);
+        break;
+    }
+}
+
+hailo_status HailoSyncNetImpl::set_hef()
+{
+    m_net_group_handle = make_unique_nothrow<NetworkGroupHandle>(GST_ELEMENT(m_element));
+    GST_CHECK(nullptr != m_net_group_handle, HAILO_OUT_OF_HOST_MEMORY, m_element, RESOURCE, "Failed allocating memory for network handle!");
+
+    hailo_status status = m_net_group_handle->set_hef(m_props.m_device_id.get(), m_props.m_device_count.get(),
+        m_props.m_vdevice_key.get(), m_props.m_scheduling_algorithm.get(), static_cast<bool>(m_props.m_multi_process_service.get()),
+        m_props.m_hef_path.get());
+    if (HAILO_SUCCESS != status) {
+        return status;
+    }
+
+    if (m_props.m_multi_process_service.get()) {
+        GST_CHECK(m_props.m_scheduling_algorithm.get() != HAILO_SCHEDULING_ALGORITHM_NONE,
+            HAILO_INVALID_OPERATION, m_element, RESOURCE, "To use multi-process-service please set scheduling-algorithm.");
+    }
+
+    if (nullptr == m_props.m_network_name.get()) {
+        // TODO: HRT-4957
+        GST_CHECK(m_net_group_handle->hef()->get_network_groups_names().size() == 1, HAILO_INVALID_ARGUMENT, m_element, RESOURCE,
+            "Network group has to be specified when there are more than one network groups in the HEF!");
+        auto network_group_name = m_net_group_handle->hef()->get_network_groups_names()[0];
+
+        auto networks_infos = m_net_group_handle->hef()->get_network_infos(network_group_name.c_str());
+        GST_CHECK_EXPECTED_AS_STATUS(networks_infos, m_element, RESOURCE, "Getting network infos from network group name was failed, status %d", networks_infos.status());
+        GST_CHECK(networks_infos.value().size() == 1, HAILO_INVALID_ARGUMENT, m_element, RESOURCE,
+            "Network has to be specified when there are more than one network in the network group!");
+
+        std::string default_ng_name = HailoRTDefaults::get_network_name(network_group_name);
+        m_props.m_network_name = g_strdup(default_ng_name.c_str());
+    }
+
+    auto input_vstream_infos = m_net_group_handle->hef()->get_input_vstream_infos(m_props.m_network_name.get());
+    GST_CHECK_EXPECTED_AS_STATUS(input_vstream_infos, m_element, RESOURCE, "Getting input vstream infos from HEF has failed, status = %d",
+        input_vstream_infos.status());
+
+    // TODO: HRT-4095
+    GST_CHECK(1 == input_vstream_infos->size(), HAILO_INVALID_OPERATION, m_element, RESOURCE, "sync hailonet element supports only HEFs with one input for now!");
+
+    auto input_vstream_info = input_vstream_infos.value()[0];
+    GST_HAILOSEND(m_hailosend)->impl->set_input_vstream_infos(input_vstream_infos.release());
+    GST_HAILOSEND(m_hailosend)->impl->set_batch_size(m_props.m_batch_size.get());
+
+    GstBufferPool *pool = gst_buffer_pool_new();
+    GstStructure *config = gst_buffer_pool_get_config(pool);
+
+    auto frame_size = HailoRTCommon::get_frame_size(input_vstream_info, input_vstream_info.format);
+    gst_buffer_pool_config_set_params(config, nullptr, frame_size, 1, 1);
+
+    gboolean result = gst_buffer_pool_set_config(pool, config);
+    GST_CHECK(result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Could not set config buffer pool");
+
+    result = gst_buffer_pool_set_active(pool, TRUE);
+    GST_CHECK(result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Could not set buffer pool active");
+
+    m_pool = pool;
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status HailoSyncNetImpl::configure_network_group()
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    g_object_set(m_queue, "max-size-buffers", MAX_BUFFER_COUNT(m_props.m_batch_size.get()), NULL);
+
+    auto network_group_name = get_network_group_name(m_props.m_network_name.get());
+    GST_CHECK_EXPECTED_AS_STATUS(network_group_name, m_element, RESOURCE, "Could not get network group name from name %s, status = %d",
+        m_props.m_network_name.get(), network_group_name.status());
+
+    hailo_status status = m_net_group_handle->configure_network_group(network_group_name->c_str(), m_props.m_scheduling_algorithm.get(), m_props.m_batch_size.get());
+    if (HAILO_SUCCESS != status) {
+        return status;
+    }
+    m_was_configured = true;
+
+    if (m_props.m_scheduler_timeout_ms.was_changed()) {
+        status = m_net_group_handle->set_scheduler_timeout(m_props.m_network_name.get(), m_props.m_scheduler_timeout_ms.get());
+        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler timeout failed, status = %d", status);
+    }
+    if (m_props.m_scheduler_threshold.was_changed()) {
+        status = m_net_group_handle->set_scheduler_threshold(m_props.m_network_name.get(), m_props.m_scheduler_threshold.get());
+        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler threshold failed, status = %d", status);
+    }
+    if (m_props.m_scheduler_priority.was_changed()) {
+        status = m_net_group_handle->set_scheduler_priority(m_props.m_network_name.get(), m_props.m_scheduler_priority.get());
+        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting scheduler priority failed, status = %d", status);
+    }
+
+    auto vstreams = m_net_group_handle->create_vstreams(m_props.m_network_name.get(), m_props.m_scheduling_algorithm.get(), m_output_formats,
+        m_props.m_input_format_type.get(), m_props.m_output_format_type.get());
+    GST_CHECK_EXPECTED_AS_STATUS(vstreams, m_element, RESOURCE, "Creating vstreams failed, status = %d", status);
+
+    GST_HAILOSEND(m_hailosend)->impl->set_input_vstreams(std::move(vstreams->first));
+
+    // Check that if one of the NMS params are changed, we have NMS outputs in the model
+    auto has_nms_output = std::any_of(vstreams->second.begin(), vstreams->second.end(), [](const auto &vs)
+    {
+        return HailoRTCommon::is_nms(vs.get_info());
+    });
+
+    for (auto &out_vs : vstreams->second) {
+        if (m_props.m_nms_score_threshold.was_changed()) {
+            GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS score threshold is set, but there is no NMS output in this model.");
+            if (HailoRTCommon::is_nms(out_vs.get_info())) {
+                status = out_vs.set_nms_score_threshold(m_props.m_nms_score_threshold.get());
+                GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS score threshold failed, status = %d", status);
+            }
+        }
+        if (m_props.m_nms_iou_threshold.was_changed()) {
+            GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS IoU threshold is set, but there is no NMS output in this model.");
+            if (HailoRTCommon::is_nms(out_vs.get_info())) {
+                status = out_vs.set_nms_iou_threshold(m_props.m_nms_iou_threshold.get());
+                GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS IoU threshold failed, status = %d", status);
+            }
+        }
+        if (m_props.m_nms_max_proposals_per_class.was_changed()) {
+            GST_CHECK(has_nms_output, HAILO_INVALID_OPERATION, m_element, RESOURCE, "NMS max proposals per class is set, but there is no NMS output in this model.");
+            if (HailoRTCommon::is_nms(out_vs.get_info())) {
+                status = out_vs.set_nms_max_proposals_per_class(m_props.m_nms_max_proposals_per_class.get());
+                GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting NMS max proposals per class failed, status = %d", status);
+            }
+        }
+    }
+
+    status = GST_HAILORECV(m_hailorecv)->impl->set_output_vstreams(std::move(vstreams->second), m_props.m_batch_size.get());
+    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Setting output vstreams failed, status = %d", status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status HailoSyncNetImpl::activate_hailonet()
+{
+    if (HAILO_SCHEDULING_ALGORITHM_NONE != m_props.m_scheduling_algorithm.get()) {
+        m_props.m_is_active = true;
+        return HAILO_SUCCESS;
+    }
+
+    if ((1 == m_sync_hailonet_count) && (!m_props.m_is_active.was_changed())) {
+        m_props.m_is_active = true;
+    }
+
+    if (m_props.m_is_active.get()) {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        hailo_status status = m_net_group_handle->activate_network_group();
+        if (HAILO_SUCCESS != status) {
+            return status;
+        }
+    }
+
+    m_has_called_activate = true;
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::string> HailoSyncNetImpl::get_network_group_name(const std::string &network_name)
+{
+    for (const auto &network_group_name : m_net_group_handle->hef()->get_network_groups_names()) {
+        // Look for network_group with the given name
+        if ((network_name == network_group_name) || (network_name == HailoRTDefaults::get_network_name(network_group_name))) {
+            return std::string(network_group_name);
+        }
+
+        auto network_infos = m_net_group_handle->hef()->get_network_infos(network_group_name);
+        GST_CHECK_EXPECTED(network_infos, m_element, RESOURCE, "Could not get network infos of group %s, status = %d", network_group_name.c_str(),
+            network_infos.status());
+
+        // Look for network with the given name
+        for (const auto &network_info : network_infos.value()) {
+            if (network_name == network_info.name) {
+                return std::string(network_group_name);
+            }
+        }
+    }
+
+    GST_ELEMENT_ERROR(m_element, RESOURCE, FAILED, ("Failed to get network group name from the name %s!", network_name.c_str()), (NULL));
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
+hailo_status HailoSyncNetImpl::link_elements()
+{
+    /* Link elements here because only here we have the HEF and the Caps format */
+    if (!gst_element_link_many(m_hailosend, m_queue, m_hailorecv, NULL)) {
+        GST_ELEMENT_ERROR(m_element, RESOURCE, FAILED, ("Could not add link elements in bin!"), (NULL));
+        return HAILO_INTERNAL_FAILURE;
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status HailoSyncNetImpl::abort_streams()
+{
+    if (!m_props.m_is_active.get()) {
+        return HAILO_SUCCESS;
+    }
+
+    auto status = GST_HAILOSEND(m_hailosend)->impl->abort_vstreams();
+    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed aborting input VStreams of hailosend, status = %d", status);
+    status = GST_HAILORECV(m_hailorecv)->impl->abort_vstreams();
+    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed aborting output VStreams of hailorecv, status = %d", status);
+    return HAILO_SUCCESS;
+}
+
+hailo_status HailoSyncNetImpl::deactivate_network_group()
+{
+    auto was_deactivated = m_net_group_handle->remove_network_group();
+    GST_CHECK_EXPECTED_AS_STATUS(was_deactivated, m_element, RESOURCE, "Failed removing network, status = %d", was_deactivated.status());
+
+    if (was_deactivated.value()) {
+        return clear_vstreams();
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status HailoSyncNetImpl::clear_vstreams()
+{
+    if (nullptr != GST_HAILOSEND(m_hailosend)->impl) {
+        hailo_status status = GST_HAILOSEND(m_hailosend)->impl->clear_vstreams();
+        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed clearing input VStreams of hailosend, status = %d", status);
+    }
+
+    if (nullptr != GST_HAILORECV(m_hailorecv)->impl) {
+        hailo_status status = GST_HAILORECV(m_hailorecv)->impl->clear_vstreams();
+        GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed clearing output VStreams of hailorecv, status = %d", status);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+gboolean HailoSyncNetImpl::src_pad_event(GstEvent *event)
+{
+    assert(nullptr != event);
+
+    auto parsed_event = HailoSetOutputFormatEvent::parse(event);
+    if (HAILO_SUCCESS != parsed_event.status()) {
+        return FALSE;
+    }
+
+    m_output_formats = std::move(parsed_event->formats);
+    return TRUE;
+}
+
+GstPadProbeReturn HailoSyncNetImpl::sink_probe()
+{
+    hailo_status status = activate_hailonet();
+    GST_CHECK(HAILO_SUCCESS == status, GST_PAD_PROBE_REMOVE, m_element, RESOURCE, "Failed activating network, status = %d", status);
+    return GST_PAD_PROBE_REMOVE;
+}
+
+gboolean HailoSyncNetImpl::is_active()
+{
+    return m_props.m_is_active.get();
+}
+
+hailo_status HailoSyncNetImpl::flush()
+{
+    GstBuffer *buffer = nullptr;
+    GstFlowReturn flow_result = gst_buffer_pool_acquire_buffer(m_pool, &buffer, nullptr);
+    GST_CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Acquire buffer failed!");
+
+    GstHailoBufferFlagMeta *buffer_meta = GST_HAILO_BUFFER_FLAG_META_ADD(buffer);
+    buffer_meta->flag = BUFFER_FLAG_FLUSH;
+    GST_BUFFER_TIMESTAMP(buffer) = GST_HAILOSEND(m_hailosend)->impl->last_frame_pts();
+
+    GstPad *pad = gst_element_get_static_pad(m_hailosend, "src");
+    flow_result = gst_pad_push(pad, buffer);
+    GST_CHECK(GST_FLOW_OK == flow_result, HAILO_INTERNAL_FAILURE, m_element, RESOURCE, "Pushing buffer to queue has failed!");
+
+    hailo_status status = m_was_flushed_event->wait(WAIT_FOR_FLUSH_TIMEOUT_MS);
+    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed waiting for flushed event, status = %d", status);
+
+    status = m_was_flushed_event->reset();
+    GST_CHECK_SUCCESS(status, m_element, RESOURCE, "Failed resetting flushed event, status = %d", status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status HailoSyncNetImpl::signal_was_flushed_event()
+{
+    return m_was_flushed_event->signal();
+}
+
+static void gst_sync_hailonet_init(GstSyncHailoNet *self)
+{
+    if (!do_versions_match(GST_ELEMENT(self))) {
+        return;
+    }
+
+    auto sync_hailonet_impl = HailoSyncNetImpl::create(self);
+    if (!sync_hailonet_impl) {
+        GST_ELEMENT_ERROR(self, RESOURCE, FAILED, ("Creating sync hailonet implementation has failed! status = %d", sync_hailonet_impl.status()), (NULL));
+        return;
+    }
+
+    self->impl = sync_hailonet_impl.release();
+}
+
+static void gst_sync_hailonet_set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec)
+{
+    GST_SYNC_HAILONET(object)->impl->set_property(object, property_id, value, pspec);
+}
+
+static void gst_sync_hailonet_get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec)
+{
+    GST_SYNC_HAILONET(object)->impl->get_property(object, property_id, value, pspec);
+}
+
+static gboolean gst_hailorecv_src_pad_event(GstPad */*pad*/, GstObject *parent, GstEvent *event)
+{
+    gboolean result = GST_SYNC_HAILONET(GST_ELEMENT_PARENT(parent))->impl->src_pad_event(event);
+    if (result) {
+        return TRUE;
+    }
+
+    GstBaseTransform *trans = GST_BASE_TRANSFORM_CAST(parent);
+    return GST_BASE_TRANSFORM_GET_CLASS(trans)->src_event(trans, event);
+}
+
+static GstPadProbeReturn gst_sync_hailonet_sink_probe(GstPad *pad, GstPadProbeInfo */*info*/, gpointer /*user_data*/)
+{
+    return GST_SYNC_HAILONET(GST_ELEMENT_PARENT(gst_pad_get_parent(pad)))->impl->sink_probe();
+}
+
+static GstStateChangeReturn gst_sync_hailonet_change_state(GstElement *element, GstStateChange transition)
+{
+    GstStateChangeReturn ret = GST_ELEMENT_CLASS(gst_sync_hailonet_parent_class)->change_state(element, transition);
+    if (GST_STATE_CHANGE_FAILURE == ret) {
+        return ret;
+    }
+
+    auto &sync_hailonet = GST_SYNC_HAILONET(element)->impl;
+    switch (transition) {
+    case GST_STATE_CHANGE_NULL_TO_READY:
+    {
+        hailo_status status = sync_hailonet->link_elements();
+        GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Linking elements has failed, status = %d\n", status);
+        break;
+    }
+    case GST_STATE_CHANGE_READY_TO_PAUSED:
+    {
+        hailo_status status = sync_hailonet->configure_network_group();
+        GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Configuring network group failed, status = %d\n", status);
+        break;
+    }
+    case GST_STATE_CHANGE_PLAYING_TO_PAUSED:
+    {
+        hailo_status status = sync_hailonet->abort_streams();
+        GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Aborting streams has failed, status = %d\n", status);
+        break;
+    }
+    case GST_STATE_CHANGE_READY_TO_NULL:
+    {
+        if (HAILO_SCHEDULING_ALGORITHM_NONE == sync_hailonet->get_props().m_scheduling_algorithm.get()) {
+            auto status = sync_hailonet->deactivate_network_group();
+            GST_CHECK(HAILO_SUCCESS == status, GST_STATE_CHANGE_FAILURE, element, RESOURCE, "Deactivating network group failed, status = %d\n", status);
+        }
+
+        // Cleanup all of hailonet memory
+        sync_hailonet.reset();
+        break;
+    }
+    default:
+        break;
+    }
+
+    return ret;
+}
+
+static void gst_sync_hailonet_flush_callback(GstSyncHailoNet *sync_hailonet, gpointer /*data*/)
+{
+    (void)sync_hailonet->impl->flush();
+}
+
+static void gst_sync_hailonet_inner_queue_overrun_callback(GstElement *queue, gpointer /*udata*/)
+{
+    if (GST_SYNC_HAILONET(GST_ELEMENT_PARENT(queue))->impl->is_active()) {
+        GST_INFO("Inner queue of %s is overrun!", GST_ELEMENT_NAME(GST_ELEMENT_PARENT(queue)));
+    }
+}
+
+static void gst_sync_hailonet_inner_queue_underrun_callback(GstElement *queue, gpointer /*udata*/)
+{
+    if (GST_SYNC_HAILONET(GST_ELEMENT_PARENT(queue))->impl->is_active()) {
+        GST_INFO("Inner queue of %s is underrun!", GST_ELEMENT_NAME(GST_ELEMENT_PARENT(queue)));
+    }
+}
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp
new file mode 100644
index 00000000..00b26125
--- /dev/null
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2021-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public
+ * License along with this library; if not, write to the
+ * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ */
+#ifndef _GST_SYNC_HAILONET_HPP_
+#define _GST_SYNC_HAILONET_HPP_
+
+#include "common.hpp"
+#include "network_group_handle.hpp"
+#include "hailo/expected.hpp"
+#include "hailo/event.hpp"
+
+#include <atomic>
+#include <condition_variable>
+
+G_BEGIN_DECLS
+
+#define GST_TYPE_SYNC_HAILONET (gst_sync_hailonet_get_type())
+#define GST_SYNC_HAILONET(obj) (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_SYNC_HAILONET,GstSyncHailoNet))
+#define GST_SYNC_HAILONET_CLASS(klass) (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_SYNC_HAILONET,GstSyncHailoNetClass))
+#define GST_IS_SYNC_HAILONET(obj) (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_SYNC_HAILONET))
+#define GST_IS_SYNC_HAILONET_CLASS(obj) (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_SYNC_HAILONET))
+
+class HailoSyncNetImpl;
+struct GstSyncHailoNet
+{
+    GstBin parent;
+    std::unique_ptr<HailoSyncNetImpl> impl;
+};
+
+struct GstSyncHailoNetClass
+{
+    GstBinClass parent;
+};
+
+struct SyncHailoNetProperties final
+{
+public:
+    SyncHailoNetProperties() : m_device_id(nullptr), m_hef_path(nullptr), m_network_name(nullptr), m_batch_size(HAILO_DEFAULT_BATCH_SIZE),
+        m_is_active(false), m_device_count(0), m_vdevice_key(DEFAULT_VDEVICE_KEY), m_scheduling_algorithm(HAILO_SCHEDULING_ALGORITHM_ROUND_ROBIN),
+        m_scheduler_timeout_ms(HAILO_DEFAULT_SCHEDULER_TIMEOUT_MS), m_scheduler_threshold(HAILO_DEFAULT_SCHEDULER_THRESHOLD), m_scheduler_priority(HAILO_SCHEDULER_PRIORITY_NORMAL),
+        m_multi_process_service(HAILO_DEFAULT_MULTI_PROCESS_SERVICE), m_input_format_type(HAILO_FORMAT_TYPE_AUTO),
+        m_output_format_type(HAILO_FORMAT_TYPE_AUTO), m_nms_score_threshold(0), m_nms_iou_threshold(0), m_nms_max_proposals_per_class(0)
+
+    {}
+
+    HailoElemProperty<gchar*> m_device_id;
+    HailoElemProperty<gchar*> m_hef_path;
+    HailoElemProperty<gchar*> m_network_name; // This property can be network group name or a network name
+    HailoElemProperty<guint16> m_batch_size;
+    HailoElemProperty<gboolean> m_is_active;
+    HailoElemProperty<guint16> m_device_count;
+    HailoElemProperty<guint32> m_vdevice_key;
+    HailoElemProperty<hailo_scheduling_algorithm_t> m_scheduling_algorithm;
+    HailoElemProperty<guint32> m_scheduler_timeout_ms;
+    HailoElemProperty<guint32> m_scheduler_threshold;
+    HailoElemProperty<guint8> m_scheduler_priority;
+    HailoElemProperty<gboolean> m_multi_process_service;
+    HailoElemProperty<hailo_format_type_t> m_input_format_type;
+    HailoElemProperty<hailo_format_type_t> m_output_format_type;
+    HailoElemProperty<gfloat> m_nms_score_threshold;
+    HailoElemProperty<gfloat> m_nms_iou_threshold;
+    HailoElemProperty<guint32> m_nms_max_proposals_per_class;
+};
+
+class HailoSyncNetImpl final
+{
+public:
+    static Expected<std::unique_ptr<HailoSyncNetImpl>> create(GstSyncHailoNet *element);
+    HailoSyncNetImpl(GstSyncHailoNet *element, GstElement *hailosend, GstElement *queue, GstElement *hailorecv, EventPtr was_flushed_event);
+    ~HailoSyncNetImpl();
+
+    void set_property(GObject *object, guint property_id, const GValue *value, GParamSpec *pspec);
+    void get_property(GObject *object, guint property_id, GValue *value, GParamSpec *pspec);
+    hailo_status set_hef();
+    hailo_status link_elements();
+    hailo_status configure_network_group();
+    hailo_status activate_hailonet();
+    hailo_status abort_streams();
+
+    gboolean src_pad_event(GstEvent *event);
+    GstPadProbeReturn sink_probe();
+    gboolean is_active();
+    hailo_status flush();
+    hailo_status signal_was_flushed_event();
+
+    hailo_status deactivate_network_group();
+    SyncHailoNetProperties &get_props() {
+        return m_props;
+    }
+
+private:
+    void init_ghost_sink();
+    void init_ghost_src();
+    Expected<std::string> get_network_group_name(const std::string &network_name);
+
+    hailo_status clear_vstreams();
+
+    static std::atomic_uint32_t m_sync_hailonet_count;
+    static std::mutex m_mutex;
+    GstSyncHailoNet *m_element;
+    SyncHailoNetProperties m_props;
+    std::vector<hailo_format_with_name_t> m_output_formats;
+    GstElement *m_hailosend;
+    GstElement *m_queue;
+    GstElement *m_hailorecv;
+    std::unique_ptr<NetworkGroupHandle> m_net_group_handle;
+    bool m_was_configured;
+    bool m_has_called_activate;
+    EventPtr m_was_flushed_event;
+    GstBufferPool *m_pool;
+};
+
+GType gst_sync_hailonet_get_type(void);
+
+G_END_DECLS
+
+#endif /* _GST_SYNC_HAILONET_HPP_ */
diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
index 2c3e5475..f0d8ea98 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
@@ -127,10 +127,8 @@ def _raise_indicative_status_exception(self, libhailort_exception):
             raise HailoRTInvalidFrameException("An invalid frame was received") from libhailort_exception
         if string_error_code == "HAILO_TIMEOUT":
             raise HailoRTTimeout("Received a timeout - hailort has failed because a timeout had occurred") from libhailort_exception
-        if string_error_code == "HAILO_STREAM_ABORTED_BY_HW":
-            raise HailoRTStreamAborted("Stream aborted due to an external event") from libhailort_exception
-        if string_error_code == "HAILO_STREAM_ABORTED_BY_USER":
-            raise HailoRTStreamAbortedByUser("Stream was aborted by user") from libhailort_exception
+        if string_error_code == "HAILO_STREAM_ABORT":
+            raise HailoRTStreamAborted("Stream was aborted") from libhailort_exception
 
         if string_error_code == "HAILO_INVALID_OPERATION":
             raise HailoRTInvalidOperationException("Invalid operation. See hailort.log for more information") from libhailort_exception
@@ -720,11 +718,10 @@ def get_vstream_names_from_stream_name(self, stream_name):
             return self._configured_network.get_vstream_names_from_stream_name(stream_name)
 
     def set_scheduler_timeout(self, timeout_ms, network_name=None):
-        """Sets the maximum time period that may pass before getting run time from the scheduler,
-            even without reaching the minimum required send requests (e.g. threshold - see set_scheduler_threshold()),
-            as long as at least one send request has been sent.
-            This time period is measured since the last time the scheduler gave this network group run time.
-        
+        """Sets the maximum time period that may pass before receiving run time from the scheduler.
+            This will occur providing at least one send request has been sent, there is no minimum requirement for send
+            requests, (e.g. threshold - see set_scheduler_threshold()).
+
         Args:
             timeout_ms (int): Timeout in milliseconds.
         """
@@ -1058,10 +1055,25 @@ def set_nms_max_proposals_per_class(self, max_proposals_per_class):
             max_proposals_per_class (int): NMS max proposals per class to set.
 
         Note:
+            This function must be called before starting inference!
             This function will fail in cases where there is no output with NMS operations on the CPU.
         """
         return self._infer_pipeline.set_nms_max_proposals_per_class(max_proposals_per_class)
 
+    def set_nms_max_accumulated_mask_size(self, max_accumulated_mask_size):
+        """Set maximum accumulated mask size for all the detections in a frame.
+            Used in order to change the output buffer frame size,
+            in cases where the output buffer is too small for all the segmentation detections.
+
+        Args:
+            max_accumulated_mask_size (int): NMS max accumulated mask size.
+
+        Note:
+            This function must be called before starting inference!
+            This function will fail in cases where there is no output with NMS operations on the CPU.
+        """
+        return self._infer_pipeline.set_nms_max_accumulated_mask_size(max_accumulated_mask_size)
+
     def __exit__(self, *args):
         self._infer_pipeline.release()
         return False
@@ -1487,8 +1499,8 @@ class HailoFormatFlags(_pyhailort.FormatFlags):
 
 SUPPORTED_PROTOCOL_VERSION = 2
 SUPPORTED_FW_MAJOR = 4
-SUPPORTED_FW_MINOR = 16
-SUPPORTED_FW_REVISION = 2
+SUPPORTED_FW_MINOR = 17
+SUPPORTED_FW_REVISION = 0
 
 MEGA_MULTIPLIER = 1000.0 * 1000.0
 
@@ -3120,6 +3132,20 @@ def set_nms_max_proposals_per_class(self, max_proposals_per_class):
         """
         return self._recv_object.set_nms_max_proposals_per_class(max_proposals_per_class)
 
+    def set_nms_max_accumulated_mask_size(self, max_accumulated_mask_size):
+        """Set maximum accumulated mask size for all the detections in a frame.
+            Used in order to change the output buffer frame size,
+            in cases where the output buffer is too small for all the segmentation detections.
+
+        Args:
+            max_accumulated_mask_size (int): NMS max accumulated mask size.
+
+        Note:
+            This function must be called before starting inference!
+            This function will fail in cases where there is no output with NMS operations on the CPU.
+        """
+        return self._recv_object.set_nms_max_accumulated_mask_size(max_accumulated_mask_size)
+
 
 class OutputVStreams(object):
     """Output virtual streams pipelines that allows to receive data, to be used as a context manager."""
diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py b/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py
index b6620c6e..a3590887 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py
@@ -23,7 +23,7 @@ class PlatformCommands:
         'fw-update': ('Firmware update tool', FWUpdaterCLI),
         'ssb-update': ('Second stage boot update tool', SSBUpdaterCLI),
         'fw-config': ('Firmware configuration tool', FWConfigCommandCLI),
-        'udp-rate-limiter': ('Limit UDP rate', UDPRateLimiterCLI),
+        'udp-rate-limiter': ('Limit the UDP rate', UDPRateLimiterCLI),
         'fw-control': ('Useful firmware control operations', ControlCommandCLI),
         'fw-logger': ('Download fw logs to a file', LoggerCommandCLI),
         'scan': ('Scans for devices (Ethernet or PCIE)', ScanCommandCLI),
diff --git a/hailort/libhailort/bindings/python/platform/setup.py b/hailort/libhailort/bindings/python/platform/setup.py
index f6b5f5ad..5ba4ce3c 100644
--- a/hailort/libhailort/bindings/python/platform/setup.py
+++ b/hailort/libhailort/bindings/python/platform/setup.py
@@ -69,6 +69,6 @@ def _get_package_paths():
             "linux_aarch64",
         ],
         url="https://hailo.ai/",
-        version="4.16.2",
+        version="4.17.0",
         zip_safe=False,
     )
diff --git a/hailort/libhailort/bindings/python/src/CMakeLists.txt b/hailort/libhailort/bindings/python/src/CMakeLists.txt
index 91c03fb5..d22a0987 100644
--- a/hailort/libhailort/bindings/python/src/CMakeLists.txt
+++ b/hailort/libhailort/bindings/python/src/CMakeLists.txt
@@ -49,7 +49,7 @@ set_target_properties(_pyhailort PROPERTIES
     # VISIBILITY_INLINES_HIDDEN YES
 )
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 target_link_libraries(_pyhailort PRIVATE HailoRT::libhailort)
 if(WIN32)
diff --git a/hailort/libhailort/bindings/python/src/bindings_common.hpp b/hailort/libhailort/bindings/python/src/bindings_common.hpp
index d135e52c..2367af4e 100644
--- a/hailort/libhailort/bindings/python/src/bindings_common.hpp
+++ b/hailort/libhailort/bindings/python/src/bindings_common.hpp
@@ -48,7 +48,7 @@ class HailoRTBindingsCommon
         case HAILO_FORMAT_ORDER_HAILO_NMS:
             return { HailoRTCommon::get_nms_host_shape_size(vstream_info.nms_shape) };
         case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK: {
-            return { HailoRTCommon::get_nms_with_byte_mask_host_shape_size(vstream_info.nms_shape, user_format) };
+            return {HailoRTCommon::get_nms_host_frame_size(vstream_info.nms_shape, user_format) / HailoRTCommon::get_format_data_bytes(user_format)};
         }
         case HAILO_FORMAT_ORDER_NC:
             return {shape.features};
diff --git a/hailort/libhailort/bindings/python/src/internal/CMakeLists.txt b/hailort/libhailort/bindings/python/src/internal/CMakeLists.txt
deleted file mode 100644
index 0e85942c..00000000
--- a/hailort/libhailort/bindings/python/src/internal/CMakeLists.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-cmake_minimum_required(VERSION 3.15.0)
-
-include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
-include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/readerwriterqueue.cmake)
-
-pybind11_add_module(_pyhailort_internal SHARED
-    pyhailort_internal.cpp
-    control_api.cpp
-    $<TARGET_OBJECTS:libhailort>
-)
-
-add_dependencies(_pyhailort_internal libhailort)
-
-set_target_properties(_pyhailort_internal PROPERTIES
-    CXX_STANDARD              14
-    CXX_STANDARD_REQUIRED     YES
-)
-
-target_include_directories(_pyhailort_internal
-    PRIVATE
-    $<BUILD_INTERFACE:${PYHAILORT_DIR}>
-    $<BUILD_INTERFACE:${HAILORT_INC_DIR}>
-    $<BUILD_INTERFACE:${HAILORT_COMMON_DIR}>
-    $<BUILD_INTERFACE:${HAILORT_SRC_DIR}>
-    $<BUILD_INTERFACE:${COMMON_INC_DIR}>
-    $<BUILD_INTERFACE:${DRIVER_INC_DIR}>
-)
-
-target_link_libraries(_pyhailort_internal PRIVATE
-    hef_proto
-    spdlog::spdlog
-    readerwriterqueue
-    profiler_proto
-    scheduler_mon_proto)
-if(HAILO_BUILD_SERVICE)
-    target_link_libraries(_pyhailort_internal PRIVATE grpc++_unsecure hailort_rpc_grpc_proto)
-endif()
-
-if(WIN32)
-    target_link_libraries(_pyhailort_internal PRIVATE Ws2_32 Iphlpapi Shlwapi)
-endif()
-
-target_compile_options(_pyhailort_internal PRIVATE ${HAILORT_COMPILE_OPTIONS})
-exclude_archive_libs_symbols(_pyhailort_internal)
diff --git a/hailort/libhailort/bindings/python/src/internal/control_api.cpp b/hailort/libhailort/bindings/python/src/internal/control_api.cpp
deleted file mode 100644
index 07d790c7..00000000
--- a/hailort/libhailort/bindings/python/src/internal/control_api.cpp
+++ /dev/null
@@ -1,256 +0,0 @@
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file control_api.cpp
- * @brief Defines binding to control functions
- *
- **/
-
-#include "control_api.hpp"
-#include "utils.hpp"
-#include "hailo/device.hpp"
-#include "common/utils.hpp"
-
-namespace hailort
-{
-
-void ControlWrapper::set_clock_freq(DeviceWrapper &device, uint32_t clock_freq)
-{
-    auto status = Control::set_clock_freq(*device, clock_freq);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::close_all_streams(DeviceWrapper &device)
-{
-    auto status = Control::close_all_streams(*device);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::config_ahb_to_axi(DeviceWrapper &device, bool use_64bit_data_only)
-{
-    CONTROL_PROTOCOL__config_core_top_type_t config_type = CONTROL_PROTOCOL__CONFIG_CORE_TOP_TYPE_AHB_TO_AXI;
-    CONTROL_PROTOCOL__config_core_top_params_t params = {0};
-    params.ahb_to_axi.enable_use_64bit_data_only = use_64bit_data_only;
-
-    auto status = Control::config_core_top(*device, config_type, &params);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::phy_operation(DeviceWrapper &device, CONTROL_PROTOCOL__phy_operation_t operation_type)
-{
-    auto status = Control::phy_operation(*device, operation_type);
-    VALIDATE_STATUS(status);
-}
-
-uint32_t ControlWrapper::latency_measurement_read(DeviceWrapper &device)
-{
-    uint32_t inbound_to_outbound_latency_nsec = 0;
-
-    auto status = Control::latency_measurement_read(*device, &inbound_to_outbound_latency_nsec);
-    VALIDATE_STATUS(status);
-
-    return inbound_to_outbound_latency_nsec;
-}
-
-void ControlWrapper::latency_measurement_config(DeviceWrapper &device, uint8_t latency_measurement_en,
-    uint32_t inbound_start_buffer_number, uint32_t outbound_stop_buffer_number, uint32_t inbound_stream_index,
-    uint32_t outbound_stream_index)
-{
-    auto status = Control::latency_measurement_config(*device, latency_measurement_en, inbound_start_buffer_number,
-            outbound_stop_buffer_number, inbound_stream_index, outbound_stream_index);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::start_firmware_update(DeviceWrapper &device)
-{
-    auto status = Control::start_firmware_update(*device);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::finish_firmware_update(DeviceWrapper &device)
-{
-    auto status = Control::finish_firmware_update(*device);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::write_firmware_update(DeviceWrapper &device, uint32_t offset, py::bytes data, uint32_t length)
-{
-    auto status = Control::write_firmware_update(*device, offset, (uint8_t*)std::string(data).c_str(), length);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::validate_firmware_update(DeviceWrapper &device, py::bytes md5_raw_data, uint32_t firmware_size)
-{
-    MD5_SUM_t expected_md5 = {0};
-    memcpy(&expected_md5, (uint8_t*)std::string(md5_raw_data).c_str(), sizeof(expected_md5));
-
-    auto status = Control::validate_firmware_update(*device, &expected_md5, firmware_size);
-    VALIDATE_STATUS(status);
-}
-
-py::bytes ControlWrapper::sensor_get_config(DeviceWrapper &device, uint32_t section_index, uint32_t offset, uint32_t data_length)
-{
-    std::unique_ptr<std::string> response = make_unique_nothrow<std::string>(data_length, '\x00');
-    VALIDATE_NOT_NULL(response, HAILO_OUT_OF_HOST_MEMORY);
-    
-    auto status = Control::sensor_get_config(*device, section_index, offset, data_length, (uint8_t*)(response->data()));
-    VALIDATE_STATUS(status);
-
-     return *response;
-}
-
-void ControlWrapper::idle_time_set_measurement(DeviceWrapper &device, bool measurement_enable)
-{
-    auto status = Control::idle_time_set_measurement(*device, measurement_enable);
-    VALIDATE_STATUS(status);
-}
-
-uint64_t ControlWrapper::idle_time_get_measurement(DeviceWrapper &device)
-{
-    uint64_t measurement = 0;
-
-    auto status = Control::idle_time_get_measurement(*device, &measurement);
-    VALIDATE_STATUS(status);
-
-    return measurement;
-}
-
-void ControlWrapper::d2h_notification_manager_set_host_info(DeviceWrapper &device, uint16_t host_port, uint32_t host_ip_address)
-{
-    auto status = Control::d2h_notification_manager_set_host_info(*device, host_port, host_ip_address);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::d2h_notification_manager_send_host_info_notification(DeviceWrapper &device, uint8_t notification_priority)
-{
-    auto status = Control::d2h_notification_manager_send_host_info_notification(*device, notification_priority);
-    VALIDATE_STATUS(status);
-}
-
-/* Context switch */
-void ControlWrapper::set_context_switch_breakpoint(DeviceWrapper &device, 
-        uint8_t breakpoint_id,
-        bool break_at_any_network_group_index, uint8_t network_group_index, 
-        bool break_at_any_batch_index, uint16_t batch_index, 
-        bool break_at_any_context_index,uint8_t context_index, 
-        bool break_at_any_action_index, uint16_t action_index) 
-{
-    CONTROL_PROTOCOL__context_switch_breakpoint_control_t breakpoint_control = 
-        CONTROL_PROTOCOL__CONTEXT_SWITCH_BREAKPOINT_CONTROL_SET;
-    CONTROL_PROTOCOL__context_switch_breakpoint_data_t breakpoint_data = {
-        break_at_any_network_group_index,
-        network_group_index,
-        break_at_any_batch_index,
-        batch_index,
-        break_at_any_context_index,
-        context_index,
-        break_at_any_action_index,
-        action_index};
-
-    auto status = Control::config_context_switch_breakpoint(*device, breakpoint_id, breakpoint_control, &breakpoint_data);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::continue_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id) 
-{
-    CONTROL_PROTOCOL__context_switch_breakpoint_control_t breakpoint_control = 
-        CONTROL_PROTOCOL__CONTEXT_SWITCH_BREAKPOINT_CONTROL_CONTINUE;
-    CONTROL_PROTOCOL__context_switch_breakpoint_data_t breakpoint_data = {false,0,false,0,false,0,false,0};
-
-    auto status = Control::config_context_switch_breakpoint(*device, breakpoint_id, 
-            breakpoint_control, &breakpoint_data);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::clear_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id) 
-{
-    CONTROL_PROTOCOL__context_switch_breakpoint_control_t breakpoint_control = 
-        CONTROL_PROTOCOL__CONTEXT_SWITCH_BREAKPOINT_CONTROL_CLEAR;
-    CONTROL_PROTOCOL__context_switch_breakpoint_data_t breakpoint_data = {false,0,false,0,false,0,false,0};
-
-    auto status = Control::config_context_switch_breakpoint(*device, breakpoint_id,
-            breakpoint_control, &breakpoint_data);
-    VALIDATE_STATUS(status);
-}
-
-uint8_t ControlWrapper::get_context_switch_breakpoint_status(DeviceWrapper &device, uint8_t breakpoint_id) 
-{
-    CONTROL_PROTOCOL__context_switch_debug_sys_status_t breakpoint_status = 
-        CONTROL_PROTOCOL__CONTEXT_SWITCH_DEBUG_SYS_STATUS_COUNT;
-
-    auto status = Control::get_context_switch_breakpoint_status(*device, breakpoint_id,
-            &breakpoint_status);
-    VALIDATE_STATUS(status);
-
-    return static_cast<uint8_t>(breakpoint_status);
-}
-
-void ControlWrapper::config_context_switch_timestamp(DeviceWrapper &device, uint16_t batch_index) 
-{
-    auto status = Control::config_context_switch_timestamp(*device, batch_index, true);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::remove_context_switch_timestamp_configuration(DeviceWrapper &device) 
-{
-    auto status = Control::config_context_switch_timestamp(*device, 0, false);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::enable_debugging(DeviceWrapper &device, bool is_rma)
-{
-    auto status = Control::enable_debugging(*device, is_rma);
-    VALIDATE_STATUS(status);
-}
-
-void ControlWrapper::add_to_python_module(py::module &m)
-{
-    m.def("_set_clock_freq", &ControlWrapper::set_clock_freq);
-    m.def("close_all_streams", &ControlWrapper::close_all_streams);
-    m.def("config_ahb_to_axi", &ControlWrapper::config_ahb_to_axi);
-    m.def("phy_operation", &ControlWrapper::phy_operation);
-    m.def("latency_measurement_read", &ControlWrapper::latency_measurement_read);
-    m.def("latency_measurement_config", &ControlWrapper::latency_measurement_config);
-    m.def("start_firmware_update", &ControlWrapper::start_firmware_update);
-    m.def("finish_firmware_update", &ControlWrapper::finish_firmware_update);
-    m.def("write_firmware_update", &ControlWrapper::write_firmware_update);
-    m.def("validate_firmware_update", &ControlWrapper::validate_firmware_update);
-    m.def("sensor_get_config", &ControlWrapper::sensor_get_config);
-    m.def("idle_time_set_measurement", &ControlWrapper::idle_time_set_measurement);
-    m.def("idle_time_get_measurement", &ControlWrapper::idle_time_get_measurement);
-    m.def("d2h_notification_manager_set_host_info", &ControlWrapper::d2h_notification_manager_set_host_info);
-    m.def("d2h_notification_manager_send_host_info_notification", &ControlWrapper::d2h_notification_manager_send_host_info_notification);
-    m.def("set_context_switch_breakpoint", &set_context_switch_breakpoint);
-    m.def("continue_context_switch_breakpoint", &continue_context_switch_breakpoint);
-    m.def("clear_context_switch_breakpoint", &clear_context_switch_breakpoint);
-    m.def("get_context_switch_breakpoint_status", &get_context_switch_breakpoint_status);
-    m.def("config_context_switch_timestamp", &config_context_switch_timestamp);
-    m.def("remove_context_switch_timestamp_configuration", &remove_context_switch_timestamp_configuration);
-    m.def("enable_debugging", &enable_debugging);
-    
-    // TODO: HRT-5764 - Remove 'py::module_local()' when removing _pyhailort_internal from external
-    // py::module_local() is needed because these enums are currently in both _pyhailort and _pyhailort_internal, 
-    // and when trying to import one of them on the python side you will get the error:
-    // ImportError: generic_type: type "enum_name" is already registered!
-    // py::module_local() tells pybind11 to keep the external class/enum binding localized to the module. 
-    py::enum_<CONTROL_PROTOCOL__context_switch_debug_sys_status_t>(m, "ContextSwitchBreakpointStatus", py::module_local())
-        .value("CONTEXT_SWITCH_BREAKPOINT_STATUS_CLEARED",CONTROL_PROTOCOL__CONTEXT_SWITCH_DEBUG_SYS_STATUS_CLEARED)
-        .value("CONTEXT_SWITCH_BREAKPOINT_STATUS_WAITING_FOR_BREAKPOINT",CONTROL_PROTOCOL__CONTEXT_SWITCH_DEBUG_SYS_STATUS_WAITING_FOR_BREAKPOINT)
-        .value("CONTEXT_SWITCH_BREAKPOINT_STATUS_REACHED_BREAKPOINT",CONTROL_PROTOCOL__CONTEXT_SWITCH_DEBUG_SYS_STATUS_REACHED_BREAKPOINT)
-        ;
-
-    py::enum_<CONTROL_PROTOCOL__phy_operation_t>(m, "CONTROL_PROTOCOL__phy_operation_t", py::module_local())
-        .value("PHY_OPERATION_RESET", CONTROL_PROTOCOL__PHY_OPERATION_RESET)
-        ;
-
-    py::enum_<CONTROL_PROTOCOL__mipi_deskew_enable_t>(m, "CONTROL_PROTOCOL__mipi_deskew_enable_t", py::module_local())
-        .value("MIPI__DESKEW_FORCE_DISABLE", CONTROL_PROTOCOL__MIPI_DESKEW__FORCE_DISABLE)
-        .value("MIPI__DESKEW_FORCE_ENABLE", CONTROL_PROTOCOL__MIPI_DESKEW__FORCE_ENABLE)
-        .value("MIPI__DESKEW_DEFAULT", CONTROL_PROTOCOL__MIPI_DESKEW__DEFAULT)
-        ;
-
-}
-
-} /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/python/src/internal/control_api.hpp b/hailort/libhailort/bindings/python/src/internal/control_api.hpp
deleted file mode 100644
index ffcf26de..00000000
--- a/hailort/libhailort/bindings/python/src/internal/control_api.hpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file control_api.hpp
- * @brief Defines binding to control functions
- *
- **/
-
-#ifndef _CONTROL_API_HPP_
-#define _CONTROL_API_HPP_
-
-#include "device_common/control.hpp"
-#include "utils.hpp"
-
-#include "device_api.hpp"
-
-#include <pybind11/pybind11.h>
-#include <pybind11/pybind11.h>
-#include <pybind11/numpy.h>
-#include <pybind11/detail/common.h>
-#include <pybind11/stl.h>
-#include <pybind11/complex.h>
-#include <pybind11/functional.h>
-
-namespace hailort
-{
-
-class ControlWrapper {
-public:
-    static void add_to_python_module(py::module &m);
-
-    static void set_clock_freq(DeviceWrapper &device, uint32_t clock_freq);
-    static void close_all_streams(DeviceWrapper &device);
-    static void config_ahb_to_axi(DeviceWrapper &device, bool use_64bit_data_only);
-    static void phy_operation(DeviceWrapper &device, CONTROL_PROTOCOL__phy_operation_t operation_type);
-    static uint32_t latency_measurement_read(DeviceWrapper &device);
-    static void latency_measurement_config(DeviceWrapper &device, uint8_t latency_measurement_en,
-        uint32_t inbound_start_buffer_number, uint32_t outbound_stop_buffer_number, uint32_t inbound_stream_index,
-        uint32_t outbound_stream_index);
-    static void start_firmware_update(DeviceWrapper &device);
-    static void finish_firmware_update(DeviceWrapper &device);
-    static void write_firmware_update(DeviceWrapper &device, uint32_t offset, py::bytes data, uint32_t length);
-    static void validate_firmware_update(DeviceWrapper &device, py::bytes md5_raw_data, uint32_t firmware_size);
-    static py::bytes sensor_get_config(DeviceWrapper &device, uint32_t section_index, uint32_t offset, uint32_t data_length);
-    static void idle_time_set_measurement(DeviceWrapper &device, bool measurement_enable);
-    static uint64_t idle_time_get_measurement(DeviceWrapper &device);
-    static void d2h_notification_manager_set_host_info(DeviceWrapper &device, uint16_t host_port, uint32_t host_ip_address);
-    static void d2h_notification_manager_send_host_info_notification(DeviceWrapper &device, uint8_t notification_priority);
-    static void enable_debugging(DeviceWrapper &device, bool is_rma);
-
-    /* Context switch */
-    static void set_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id,
-        bool break_at_any_network_group_index, uint8_t network_group_index, 
-        bool break_at_any_batch_index, uint16_t batch_index, 
-        bool break_at_any_context_index,uint8_t context_index, 
-        bool break_at_any_action_index, uint16_t action_index);
-    static void continue_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id);
-    static void clear_context_switch_breakpoint(DeviceWrapper &device, uint8_t breakpoint_id);
-    static uint8_t get_context_switch_breakpoint_status(DeviceWrapper &device, uint8_t breakpoint_id);
-    static void config_context_switch_timestamp(DeviceWrapper &device, uint16_t batch_index);
-    static void remove_context_switch_timestamp_configuration(DeviceWrapper &device);
-};
-
-} /* namespace hailort */
-
-#endif /* _CONTROL_API_HPP_ */
diff --git a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp b/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp
deleted file mode 100644
index c9d8e728..00000000
--- a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.cpp
+++ /dev/null
@@ -1,405 +0,0 @@
-
-
-#include "hailo/hailort.h"
-
-#include "transform/transform_internal.hpp"
-#include "bindings_common.hpp"
-
-#include "pyhailort_internal.hpp"
-#include "control_api.hpp"
-#include "utils.hpp"
-#include "utils.h"
-
-#include <pybind11/pybind11.h>
-#include <pybind11/numpy.h>
-#include <pybind11/detail/common.h>
-#include <pybind11/stl.h>
-#include <pybind11/complex.h>
-#include <pybind11/functional.h>
-#include <vector>
-
-
-namespace hailort
-{
-// TODO: Remove (HRT-9944)
-// Duplicated for hailo post process test with python API.
-static const uint32_t TEST_NUM_OF_CLASSES = 80;
-
-
-Expected<Buffer> get_expected_buffer_float32()
-{
-    static const uint32_t DETECTION_CLASS_ID_1 = 0;
-    static const float32_t CLASS_ID_1_DETECTION_COUNT = 5;
-    static const uint32_t DETECTION_CLASS_ID_3 = 2;
-    static const float32_t CLASS_ID_3_DETECTION_COUNT = 2;
-    static const uint32_t DETECTION_CLASS_ID_8 = 7;
-    static const float32_t CLASS_ID_8_DETECTION_COUNT = 1;
-    static const uint32_t DETECTION_CLASS_ID_26 = 25;
-    static const float32_t CLASS_ID_26_DETECTION_COUNT = 1;
-
-    static const hailo_bbox_float32_t bbox1_0 = {
-        /*.y_min =*/ 0.5427529811859131f,
-        /*.x_min =*/ 0.2485126256942749f,
-        /*.y_max =*/ 0.6096446067f,
-        /*.x_max =*/ 0.27035075984f,
-        /*.score =*/ 0.7761699557304382f,
-    };
-    
-    static const hailo_bbox_float32_t bbox1_1 = {
-        /*.y_min =*/ 0.5454554557800293f,
-        /*.x_min =*/ 0.33257606625556948f,
-        /*.y_max =*/ 0.7027952075f,
-        /*.x_max =*/ 0.40901548415f,
-        /*.score =*/ 0.7637669444084168f,
-    };
-    
-    static const hailo_bbox_float32_t bbox1_2 = {
-        /*.y_min =*/ 0.5521867275238037f,
-        /*.x_min =*/ 0.19988654553890229f,
-        /*.y_max =*/ 0.60256312787f,
-        /*.x_max =*/ 0.21917282976f,
-        /*.score =*/ 0.7451231479644775f,
-    };
-
-    static const hailo_bbox_float32_t bbox1_3 = {
-        /*.y_min =*/ 0.5514537692070007f,
-        /*.x_min =*/ 0.2693796157836914f,
-        /*.y_max =*/ 0.60397491604f,
-        /*.x_max =*/ 0.28537025302f,
-        /*.score =*/ 0.3756354749202728f,
-    };
-    
-    static const hailo_bbox_float32_t bbox1_4 = {
-        /*.y_min =*/ 0.553998589515686f,
-        /*.x_min =*/ 0.18612079322338105f,
-        /*.y_max =*/ 0.58339602686f,
-        /*.x_max =*/ 0.2008818537f,
-        /*.score =*/ 0.3166312277317047f,
-    };
-
-    static const hailo_bbox_float32_t bbox3_0 = {
-        /*.y_min =*/ 0.5026738047599793f,
-        /*.x_min =*/ -0.005611047148704529f,
-        /*.y_max =*/ 0.65071095526f,
-        /*.x_max =*/ 0.13888412714f,
-        /*.score =*/ 0.5734351277351379f,
-    };
-
-    static const hailo_bbox_float32_t bbox3_1 = {
-        /*.y_min =*/ 0.5620155334472656f,
-        /*.x_min =*/ 0.16757474839687348f,
-        /*.y_max =*/ 0.58410947769f,
-        /*.x_max =*/ 0.19325175508f,
-        /*.score =*/ 0.4062519371509552f,
-    };
-
-    static const hailo_bbox_float32_t bbox8_0 = {
-        /*.y_min =*/ 0.5028372406959534f,
-        /*.x_min =*/ -0.0017736181616783143f,
-        /*.y_max =*/ 0.65114967525f,
-        /*.x_max =*/ 0.13592261821f,
-        /*.score =*/ 0.4223918318748474f,
-    };
-
-    static const hailo_bbox_float32_t bbox26_0 = {
-        /*.y_min =*/ 0.5854946374893189f,
-        /*.x_min =*/ 0.2693060040473938f,
-        /*.y_max =*/ 0.68259389698f,
-        /*.x_max =*/ 0.38090330362f,
-        /*.score =*/ 0.6338639259338379f,
-    };
-
-    static const uint32_t DETECTION_COUNT = 9;
-    auto buffer_size = (DETECTION_COUNT * sizeof(hailo_bbox_float32_t)) + (TEST_NUM_OF_CLASSES * sizeof(float32_t));
-    auto buffer_expected = Buffer::create(buffer_size, 0);
-    CHECK_EXPECTED(buffer_expected);
-    auto buffer = buffer_expected.release();
-
-    size_t offset = 0;
-    for (uint32_t class_index = 0; class_index < TEST_NUM_OF_CLASSES; class_index++) {
-        if (DETECTION_CLASS_ID_1 == class_index) {
-            memcpy(buffer.data() + offset, &CLASS_ID_1_DETECTION_COUNT, sizeof(CLASS_ID_1_DETECTION_COUNT));
-            offset += sizeof(CLASS_ID_1_DETECTION_COUNT);
-
-            memcpy(buffer.data() + offset, &bbox1_0, sizeof(bbox1_0));
-            offset += sizeof(bbox1_0);
-
-            memcpy(buffer.data() + offset, &bbox1_1, sizeof(bbox1_1));
-            offset += sizeof(bbox1_1);
-
-            memcpy(buffer.data() + offset, &bbox1_2, sizeof(bbox1_2));
-            offset += sizeof(bbox1_2);
-
-            memcpy(buffer.data() + offset, &bbox1_3, sizeof(bbox1_3));
-            offset += sizeof(bbox1_3);
-
-            memcpy(buffer.data() + offset, &bbox1_4, sizeof(bbox1_4));
-            offset += sizeof(bbox1_4);
-        }
-        else if (DETECTION_CLASS_ID_3 == class_index) {
-            memcpy(buffer.data() + offset, &CLASS_ID_3_DETECTION_COUNT, sizeof(CLASS_ID_3_DETECTION_COUNT));
-            offset += sizeof(CLASS_ID_3_DETECTION_COUNT);
-
-            memcpy(buffer.data() + offset, &bbox3_0, sizeof(bbox3_0));
-            offset += sizeof(bbox3_0);
-
-            memcpy(buffer.data() + offset, &bbox3_1, sizeof(bbox3_1));
-            offset += sizeof(bbox3_1);
-        }
-        else if (DETECTION_CLASS_ID_8 == class_index) {
-            memcpy(buffer.data() + offset, &CLASS_ID_8_DETECTION_COUNT, sizeof(CLASS_ID_8_DETECTION_COUNT));
-            offset += sizeof(CLASS_ID_8_DETECTION_COUNT);
-
-            memcpy(buffer.data() + offset, &bbox8_0, sizeof(bbox8_0));
-            offset += sizeof(bbox8_0);
-        }
-        else if (DETECTION_CLASS_ID_26 == class_index) {
-            memcpy(buffer.data() + offset, &CLASS_ID_26_DETECTION_COUNT, sizeof(CLASS_ID_26_DETECTION_COUNT));
-            offset += sizeof(CLASS_ID_26_DETECTION_COUNT);
-
-            memcpy(buffer.data() + offset, &bbox26_0, sizeof(bbox26_0));
-            offset += sizeof(bbox26_0);
-        }
-        else {
-            offset += sizeof(float32_t);
-        }
-    }
-
-    return buffer;
-}
-
-py::array PyhailortInternal::get_yolov5_post_process_expected_buffer()
-{
-    auto buffer = get_expected_buffer_float32();
-    VALIDATE_EXPECTED(buffer);
-
-    auto type = py::dtype(HailoRTBindingsCommon::convert_format_type_to_string(HAILO_FORMAT_TYPE_FLOAT32));
-    auto shape = *py::array::ShapeContainer({buffer->size()});
-
-    // Note: The ownership of the buffer is transferred to Python wrapped as a py::array.
-    //       When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor
-    //       is called too (and it deletes the raw buffer)
-    auto unmanaged_addr_exp = buffer->storage().release();
-    VALIDATE_EXPECTED(unmanaged_addr_exp);
-    const auto unmanaged_addr = unmanaged_addr_exp.release();
-    return py::array(type, shape, unmanaged_addr,
-        py::capsule(unmanaged_addr, [](void *p) { delete reinterpret_cast<uint8_t*>(p); }));
-}
-
-void PyhailortInternal::demux_output_buffer(
-    py::bytes src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape,
-    std::map<std::string, py::array> dst_buffers, const LayerInfo &mux_layer_info)
-{
-    const size_t hw_frame_size = HailoRTCommon::get_frame_size(src_shape, src_format);
-    auto expected_output_demuxer = OutputDemuxerBase::create(hw_frame_size, mux_layer_info);
-    VALIDATE_EXPECTED(expected_output_demuxer);
-
-    auto demuxer = expected_output_demuxer.release();
-
-    std::map<std::string, MemoryView> dst_ptrs;
-    for (auto &dst_buffer_pair : dst_buffers) {
-        dst_ptrs.insert(std::make_pair(dst_buffer_pair.first,
-            MemoryView(reinterpret_cast<uint8_t*>(dst_buffer_pair.second.mutable_data()),
-                dst_buffer_pair.second.nbytes())));
-    }
-
-    const auto src_str = static_cast<std::string>(src);
-    auto status = demuxer.transform_demux(
-        MemoryView(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(src_str.c_str())), src_str.length()), dst_ptrs);
-    VALIDATE_STATUS(status);
-}
-
-void PyhailortInternal::transform_input_buffer(
-    py::array src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape,
-    uintptr_t dst, size_t dst_size, const hailo_format_t &dst_format, const hailo_3d_image_shape_t &dst_shape,
-    const std::vector<hailo_quant_info_t> &dst_quant_infos)
-{
-    auto transform_context = InputTransformContext::create(src_shape, src_format, dst_shape, dst_format,
-        dst_quant_infos);
-    VALIDATE_EXPECTED(transform_context);
-
-    MemoryView dst_buffer(reinterpret_cast<uint8_t*>(dst), dst_size);
-    auto status = transform_context.value()->transform(
-        MemoryView::create_const(const_cast<uint8_t*>(reinterpret_cast<const uint8_t*>(src.data())), src.nbytes()),
-        dst_buffer);
-    VALIDATE_STATUS(status);
-}
-
-void PyhailortInternal::transform_output_buffer(
-    py::bytes src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape,
-    py::array dst, const hailo_format_t &dst_format, const hailo_3d_image_shape_t &dst_shape,
-    const std::vector<hailo_quant_info_t> &dst_quant_infos)
-{
-    auto transform_context = OutputTransformContext::create(src_shape, src_format, dst_shape, dst_format,
-        dst_quant_infos, {});
-    VALIDATE_EXPECTED(transform_context);
-
-    const auto src_str = static_cast<std::string>(src);
-    MemoryView dst_buffer(reinterpret_cast<uint8_t*>(dst.mutable_data()), dst.nbytes());
-    auto status = transform_context.value()->transform(MemoryView::create_const(src_str.c_str(),
-        src_str.length()), dst_buffer);
-    VALIDATE_STATUS(status);
-}
-
-void PyhailortInternal::transform_output_buffer_nms(
-    py::bytes src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape,
-    py::array dst, const hailo_format_t &dst_format, const hailo_3d_image_shape_t &dst_shape,
-    const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info)
-{
-    auto transform_context = OutputTransformContext::create(src_shape, src_format, dst_shape, dst_format,
-        dst_quant_infos, nms_info);
-    VALIDATE_EXPECTED(transform_context);
-
-    const auto src_str = static_cast<std::string>(src);
-    MemoryView dst_buffer(reinterpret_cast<uint8_t*>(dst.mutable_data()), dst.nbytes());
-    auto status = transform_context.value()->transform(MemoryView::create_const(src_str.c_str(),
-        src_str.size()), dst_buffer);
-    VALIDATE_STATUS(status);
-}
-
-bool PyhailortInternal::is_input_transformation_required(
-    const hailo_3d_image_shape_t &src_shape, const hailo_format_t &src_format,
-    const hailo_3d_image_shape_t &dst_shape, const hailo_format_t &dst_format,
-    const std::vector<hailo_quant_info_t> &quant_infos)
-{
-    auto expected_is_transforamtion_required = InputTransformContext::is_transformation_required(src_shape, src_format, dst_shape, dst_format,
-        quant_infos);
-    VALIDATE_EXPECTED(expected_is_transforamtion_required);
-
-    return expected_is_transforamtion_required.release();
-}
-
-bool PyhailortInternal::is_output_transformation_required(
-    const hailo_3d_image_shape_t &src_shape, const hailo_format_t &src_format,
-    const hailo_3d_image_shape_t &dst_shape, const hailo_format_t &dst_format,
-    const std::vector<hailo_quant_info_t> &quant_infos)
-{
-    auto expected_is_transforamtion_required = OutputTransformContext::is_transformation_required(src_shape, src_format, dst_shape, dst_format,
-        quant_infos);
-    VALIDATE_EXPECTED(expected_is_transforamtion_required);
-
-    return expected_is_transforamtion_required.release();
-}
-
-py::list PyhailortInternal::get_all_layers_info(const HefWrapper &hef, const std::string &net_group_name)
-{
-    auto core_op_metadata = hef.hef_ptr()->pimpl->get_core_op_metadata(net_group_name);
-    VALIDATE_EXPECTED(core_op_metadata);
-
-    return py::cast(core_op_metadata.value()->get_all_layer_infos());
-}
-
-PYBIND11_MODULE(_pyhailort_internal, m) {
-    ControlWrapper::add_to_python_module(m);
-    m.def("get_yolov5_post_process_expected_buffer", &PyhailortInternal::get_yolov5_post_process_expected_buffer);
-    m.def("demux_output_buffer", &PyhailortInternal::demux_output_buffer);
-    m.def("transform_input_buffer", &PyhailortInternal::transform_input_buffer);
-    m.def("transform_output_buffer", &PyhailortInternal::transform_output_buffer);
-    m.def("transform_output_buffer_nms", &PyhailortInternal::transform_output_buffer_nms);
-    m.def("is_input_transformation_required", &PyhailortInternal::is_input_transformation_required);
-    m.def("is_output_transformation_required", &PyhailortInternal::is_output_transformation_required);
-    m.def("get_all_layers_info", &PyhailortInternal::get_all_layers_info);
-
-    py::class_<BufferIndices>(m, "BufferIndices", py::module_local())
-        .def_readonly("index", &BufferIndices::index)
-        .def_readonly("cluster_index", &BufferIndices::cluster_index)
-        ;
-
-    py::enum_<SENSOR_CONFIG_OPCODES_t>(m, "SensorConfigOpCode")
-        .value("SENSOR_CONFIG_OPCODES_WR", SENSOR_CONFIG_OPCODES_WR)
-        .value("SENSOR_CONFIG_OPCODES_RD", SENSOR_CONFIG_OPCODES_RD)
-        .value("SENSOR_CONFIG_OPCODES_RMW", SENSOR_CONFIG_OPCODES_RMW)
-        .value("SENSOR_CONFIG_OPCODES_DELAY", SENSOR_CONFIG_OPCODES_DELAY)
-        ;
-
-    py::class_<LayerInfo>(m, "HailoLayerInfo", py::module_local())
-        .def_readonly("is_mux", &LayerInfo::is_mux)
-        .def_readonly("mux_predecessors", &LayerInfo::predecessor)
-        .def_readonly("is_multi_planar", &LayerInfo::is_multi_planar)
-        .def_readonly("planes", &LayerInfo::planes)
-        .def_readonly("plane_index", &LayerInfo::plane_index)
-        .def_readonly("is_defused_nms", &LayerInfo::is_defused_nms)
-        .def_readonly("fused_nms_layer", &LayerInfo::fused_nms_layer)
-        .def_property_readonly("shape", [](LayerInfo& self)
-        {
-            switch (self.format.order) {
-                case HAILO_FORMAT_ORDER_NC:
-                    return py::make_tuple(self.shape.features);
-                case HAILO_FORMAT_ORDER_NHW:
-                    return py::make_tuple(self.shape.height, self.shape.width);
-                default:
-                    return py::make_tuple(self.shape.height, self.shape.width, self.shape.features);
-            }
-        })
-        .def_property_readonly("height", [](LayerInfo& self)
-        {
-            return self.shape.height;
-        })
-        .def_property_readonly("width", [](LayerInfo& self)
-        {
-            return self.shape.width;
-        })
-        .def_property_readonly("features", [](LayerInfo& self)
-        {
-            return self.shape.features;
-        })
-        .def("hw_shape", [](LayerInfo& self)
-        {
-            return py::make_tuple(self.hw_shape.height, self.hw_shape.width, self.hw_shape.features);
-        })
-        .def_property_readonly("padded_height", [](LayerInfo& self)
-        {
-            return self.hw_shape.height;
-        })
-        .def_property_readonly("padded_width", [](LayerInfo& self)
-        {
-            return self.hw_shape.width;
-        })
-        .def_property_readonly("padded_features", [](LayerInfo& self)
-        {
-            return self.hw_shape.features;
-        })
-        .def_readonly("data_bytes", &LayerInfo::hw_data_bytes)
-        .def_readonly("format", &LayerInfo::format)
-        .def_property_readonly("format_order", [](LayerInfo& self)
-        {
-            return self.format.order;
-        })
-        .def_readonly("direction", &LayerInfo::direction)
-        .def_readonly("sys_index", &LayerInfo::stream_index)
-        .def_readonly("name", &LayerInfo::name)
-        .def_readonly("quant_infos", &LayerInfo::quant_infos)
-        // For backwards compatibility (accessing qp through layer_info directly)
-        .def_property_readonly("qp_zp", [](LayerInfo& self)
-        {
-            return self.quant_info.qp_zp;
-        })
-        .def_property_readonly("qp_scale", [](LayerInfo& self)
-        {
-            return self.quant_info.qp_scale;
-        })
-        .def_property_readonly("limvals_min", [](LayerInfo& self)
-        {
-            return self.quant_info.limvals_min;
-        })
-        .def_property_readonly("limvals_max", [](LayerInfo& self)
-        {
-            return self.quant_info.limvals_max;
-        })
-        .def_readonly("nms_info", &LayerInfo::nms_info)
-        .def_readonly("height_gcd", &LayerInfo::height_gcd)
-        .def_readonly("height_ratios", &LayerInfo::height_ratios)
-        .def_readonly("buffer_indices", &LayerInfo::buffer_indices)
-        .def_property_readonly("core_bytes_per_buffer", [](LayerInfo& self)
-        {
-            return self.nn_stream_config.core_bytes_per_buffer;
-        })
-        .def_property_readonly("core_buffers_per_frame", [](LayerInfo& self)
-        {
-            return self.nn_stream_config.core_buffers_per_frame;
-        })
-        .def_readonly("network_name", &LayerInfo::network_name)
-        ;
-}
-
-} /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.hpp b/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.hpp
deleted file mode 100644
index bb6f0de7..00000000
--- a/hailort/libhailort/bindings/python/src/internal/pyhailort_internal.hpp
+++ /dev/null
@@ -1,53 +0,0 @@
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file pyhailort_internal.hpp
- * @brief Defines binding of internal functions over Python.
- **/
-
-#ifndef _PYHAILORT_INTERNAL_
-#define _PYHAILORT_INTERNAL_
-
-#include "hef/hef_internal.hpp"
-
-#include "hef_api.hpp"
-#include "utils.hpp"
-#include "utils.h"
-#include <pybind11/pybind11.h>
-#include <pybind11/numpy.h>
-#include <pybind11/detail/common.h>
-#include <pybind11/stl.h>
-#include <pybind11/complex.h>
-#include <pybind11/functional.h>
-#include <vector>
-
-
-namespace hailort
-{
-
-class PyhailortInternal {
-public:
-    static py::array get_yolov5_post_process_expected_buffer();
-    static void demux_output_buffer(py::bytes src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape,
-        std::map<std::string, py::array> dst_buffers, const LayerInfo &mux_layer_info);
-    static void transform_input_buffer(py::array src, const hailo_format_t &src_format, const hailo_3d_image_shape_t &src_shape,
-        uintptr_t dst, size_t dst_size, const hailo_format_t &dst_format, const hailo_3d_image_shape_t &dst_shape,
-        const std::vector<hailo_quant_info_t> &dst_quant_infos);
-    static void transform_output_buffer(py::bytes src, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &src_shape, py::array dst, const hailo_format_t &dst_format,
-        const hailo_3d_image_shape_t &dst_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos);
-    static void transform_output_buffer_nms(py::bytes src, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &src_shape, py::array dst, const hailo_format_t &dst_format,
-        const hailo_3d_image_shape_t &dst_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info);
-    static bool is_input_transformation_required(const hailo_3d_image_shape_t &src_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &quant_infos);
-    static bool is_output_transformation_required(const hailo_3d_image_shape_t &src_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &quant_infos);
-    static py::list get_all_layers_info(const HefWrapper &hef, const std::string &net_group_name);
-};
-
-} /* namespace hailort */
-
-#endif /* _PYHAILORT_INTERNAL_ */
\ No newline at end of file
diff --git a/hailort/libhailort/bindings/python/src/network_group_api.cpp b/hailort/libhailort/bindings/python/src/network_group_api.cpp
index 925f5f2c..db13dd46 100644
--- a/hailort/libhailort/bindings/python/src/network_group_api.cpp
+++ b/hailort/libhailort/bindings/python/src/network_group_api.cpp
@@ -71,7 +71,7 @@ void ActivatedAppContextManagerWrapper::add_to_python_module(py::module &m)
     ;
 
     py::class_<ActivatedNetworkGroup>(m, "ActivatedNetworkGroup")
-        .def("get_intermediate_buffer", [](ActivatedNetworkGroup& self, uint8_t src_context_index,
+        .def("get_intermediate_buffer", [](ActivatedNetworkGroup& self, uint16_t src_context_index,
             uint8_t src_stream_index)
         {
             auto buff = self.get_intermediate_buffer(std::make_pair(src_context_index, src_stream_index));
diff --git a/hailort/libhailort/bindings/python/src/pyhailort.cpp b/hailort/libhailort/bindings/python/src/pyhailort.cpp
index 5d9d4670..5b0a8c54 100644
--- a/hailort/libhailort/bindings/python/src/pyhailort.cpp
+++ b/hailort/libhailort/bindings/python/src/pyhailort.cpp
@@ -501,19 +501,19 @@ PYBIND11_MODULE(_pyhailort, m) {
         .def(py::init<>())
         .def_readonly("number_of_classes", &hailo_nms_shape_t::number_of_classes)
         .def_readonly("max_bboxes_per_class", &hailo_nms_shape_t::max_bboxes_per_class)
-        .def_readonly("max_mask_size", &hailo_nms_shape_t::max_mask_size)
+        .def_readonly("max_accumulated_mask_size", &hailo_nms_shape_t::max_accumulated_mask_size)
         .def(py::pickle(
             [](const hailo_nms_shape_t &nms_shape) { // __getstate__
                 return py::make_tuple(
                     nms_shape.number_of_classes,
                     nms_shape.max_bboxes_per_class,
-                    nms_shape.max_mask_size);
+                    nms_shape.max_accumulated_mask_size);
             },
             [](py::tuple t) { // __setstate__
                 hailo_nms_shape_t nms_shape;
                 nms_shape.number_of_classes = t[0].cast<uint32_t>();
                 nms_shape.max_bboxes_per_class = t[1].cast<uint32_t>();
-                nms_shape.max_mask_size = t[2].cast<uint32_t>();
+                nms_shape.max_accumulated_mask_size = t[2].cast<uint32_t>();
                 return nms_shape;
             }
         ))
diff --git a/hailort/libhailort/bindings/python/src/vstream_api.cpp b/hailort/libhailort/bindings/python/src/vstream_api.cpp
index 1bf05780..82dbb54a 100644
--- a/hailort/libhailort/bindings/python/src/vstream_api.cpp
+++ b/hailort/libhailort/bindings/python/src/vstream_api.cpp
@@ -155,7 +155,7 @@ void OutputVStreamWrapper::add_to_python_module(py::module &m)
         // Note: The ownership of the buffer is transferred to Python wrapped as a py::array.
         //       When the py::array isn't referenced anymore in Python and is destructed, the py::capsule's dtor
         //       is called too (and it deletes the raw buffer)
-        auto unmanaged_addr_exp = buffer->storage().release();
+        auto unmanaged_addr_exp = buffer->release();
         VALIDATE_EXPECTED(unmanaged_addr_exp);
         const auto unmanaged_addr = unmanaged_addr_exp.release();
         return py::array(get_dtype(self), get_shape(self), unmanaged_addr,
@@ -176,6 +176,11 @@ void OutputVStreamWrapper::add_to_python_module(py::module &m)
         hailo_status status = self.set_nms_max_proposals_per_class(max_proposals_per_class);
         VALIDATE_STATUS(status);
     })
+    .def("set_nms_max_accumulated_mask_size", [](OutputVStream &self, uint32_t max_accumulated_mask_size)
+    {
+        hailo_status status = self.set_nms_max_accumulated_mask_size(max_accumulated_mask_size);
+        VALIDATE_STATUS(status);
+    })
     .def_property_readonly("info", [](OutputVStream &self)
     {
         return self.get_info();
@@ -403,6 +408,10 @@ void InferVStreamsWrapper::add_to_python_module(py::module &m)
     {
         VALIDATE_STATUS(self.m_infer_pipeline->set_nms_max_proposals_per_class(max_proposals_per_class));
     })
+    .def("set_nms_max_accumulated_mask_size", [](InferVStreamsWrapper &self, uint32_t max_accumulated_mask_size)
+    {
+        VALIDATE_STATUS(self.m_infer_pipeline->set_nms_max_accumulated_mask_size(max_accumulated_mask_size));
+    })
     ;
 }
 
diff --git a/hailort/libhailort/examples/README.md b/hailort/libhailort/examples/README.md
index 47cc9a44..b4b19f11 100644
--- a/hailort/libhailort/examples/README.md
+++ b/hailort/libhailort/examples/README.md
@@ -57,8 +57,8 @@ The following examples are provided, demonstrating the HailoRT API:
     - For Windows, in case of restricted execution policy, either change the policy, or run the script with "PowerShell -NoProfile -ExecutionPolicy Bypass -File <FilePath>"
   - `notification_callback_example` - Demonstrates how to work with notification callbacks, same as `notification_callback_example` C example.
 You can find more details about each example in the HailoRT user guide.
-  - `async_infer_example` - Basic asynchronous inference of a shortcut network, uses HailoRT C++ api.
-  - `async_infer_functionality_example` - More advanced asynchronous inference of a multiple input and output model, uses HailoRT C++ api.
+  - `async_infer_basic_example` - Basic asynchronous inference of a multiple input and output model, uses HailoRT C++ api.
+  - `async_infer_advanced_example` - More advanced asynchronous inference of a multi planar model, uses HailoRT C++ api.
 ## Compiling with CMake
 Examples are configured and compiled using the following commands:
 ```sh
@@ -87,3 +87,11 @@ To run an example, use (from this examples directory):
   ```sh
   build/<c/cpp>/<example_name>/<example_name> [params..]
   ```
+
+## Hailo Application Code Examples
+
+The examples in this page are for demonstrating HailoRT API usage.
+
+Hailo also offers an additional set of
+[Application Code Examples](https://github.com/hailo-ai/Hailo-Application-Code-Examples),
+which are more application-oriented.
\ No newline at end of file
diff --git a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
index 6a2e675b..ded5ebce 100644
--- a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(data_quantization_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
index 1fd6b7aa..acc59088 100644
--- a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(infer_pipeline_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
index 5ed386f1..f191cc8f 100644
--- a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(multi_device_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
index bea6c1cc..d894e5bc 100644
--- a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(multi_network_vstream_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
index fe145e20..f659f135 100644
--- a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(notification_callback_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
index 76d85fd1..16a7faa3 100644
--- a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(power_measurement_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
index 8477fc8a..305c47a6 100644
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(raw_async_streams_single_thread_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
index 1fa7838c..d4ce56e7 100644
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
@@ -50,11 +50,11 @@ static void output_done_callback(const hailo_stream_read_async_completion_info_t
         // Real applications can forward the buffer to post-process/display. Here we just re-launch new async reads.
         status = hailo_stream_read_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size,
             output_done_callback, stream);
-        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
+        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORT != status)) {
             fprintf(stderr, "Failed read async with status=%d\n", status);
         }
         break;
-    case HAILO_STREAM_ABORTED_BY_USER:
+    case HAILO_STREAM_ABORT:
         // Transfer was canceled, finish gracefully.
         break;
     default:
@@ -73,11 +73,11 @@ static void input_done_callback(const hailo_stream_write_async_completion_info_t
         // new async writes.
         status = hailo_stream_write_raw_buffer_async(stream, completion_info->buffer_addr, completion_info->buffer_size,
             input_done_callback, stream);
-        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
+        if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORT != status)) {
             fprintf(stderr, "Failed write async with status=%d\n", status);
         }
         break;
-    case HAILO_STREAM_ABORTED_BY_USER:
+    case HAILO_STREAM_ABORT:
         // Transfer was canceled, finish gracefully.
         break;
     default:
@@ -85,7 +85,13 @@ static void input_done_callback(const hailo_stream_write_async_completion_info_t
     }
 }
 
-static hailo_status infer(hailo_configured_network_group network_group, size_t number_input_streams,
+typedef struct {
+    void *addr;
+    size_t size;
+    hailo_dma_buffer_direction_t direction;
+} allocated_buffer_t;
+
+static hailo_status infer(hailo_device device, hailo_configured_network_group network_group, size_t number_input_streams,
     hailo_input_stream *input_streams, size_t number_output_streams, hailo_output_stream *output_streams,
     size_t ongoing_transfers)
 {
@@ -95,7 +101,8 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
     size_t frame_size = 0;
     size_t stream_index = 0;
     void *current_buffer = NULL;
-    void *buffers[MAX_EDGE_LAYERS * MAX_ONGOING_TRANSFERS] = {0};
+
+    allocated_buffer_t buffers[MAX_EDGE_LAYERS * MAX_ONGOING_TRANSFERS] = {0};
     size_t allocated_buffers = 0;
 
     // We launch "ongoing_transfers" async operations for both input and output streams. On each async callback, we launch
@@ -108,7 +115,12 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
             // Buffers read from async operation must be page aligned.
             current_buffer = page_aligned_alloc(frame_size);
             REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_shutdown, "allocation failed");
-            buffers[allocated_buffers++] = current_buffer;
+            buffers[allocated_buffers++] = (allocated_buffer_t){ current_buffer, frame_size, HAILO_DMA_BUFFER_DIRECTION_D2H };
+
+            // If the same buffer is used multiple times on async-io, to improve performance, it is recommended to
+            // pre-map it into the device.
+            status = hailo_device_dma_map_buffer(device, current_buffer, frame_size, HAILO_DMA_BUFFER_DIRECTION_D2H);
+            REQUIRE_SUCCESS(status, l_shutdown, "Failed map buffer with status=%d", status);
 
             status = hailo_stream_read_raw_buffer_async(output_streams[stream_index], current_buffer, frame_size,
                 output_done_callback, output_streams[stream_index]);
@@ -124,7 +136,12 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
             // Buffers written to async operation must be page aligned.
             current_buffer = page_aligned_alloc(frame_size);
             REQUIRE_ACTION(INVALID_ADDR != current_buffer, status=HAILO_OUT_OF_HOST_MEMORY, l_shutdown, "allocation failed");
-            buffers[allocated_buffers++] = current_buffer;
+            buffers[allocated_buffers++] = (allocated_buffer_t){ current_buffer, frame_size, HAILO_DMA_BUFFER_DIRECTION_H2D };
+
+            // If the same buffer is used multiple times on async-io, to improve performance, it is recommended to
+            // pre-map it into the device.
+            status = hailo_device_dma_map_buffer(device, current_buffer, frame_size, HAILO_DMA_BUFFER_DIRECTION_H2D);
+            REQUIRE_SUCCESS(status, l_shutdown, "Failed map buffer with status=%d", status);
 
             status = hailo_stream_write_raw_buffer_async(input_streams[stream_index], current_buffer, frame_size,
                 input_done_callback, input_streams[stream_index]);
@@ -138,11 +155,14 @@ static hailo_status infer(hailo_configured_network_group network_group, size_t n
     status = HAILO_SUCCESS;
 l_shutdown:
     // Calling hailo_shutdown_network_group will ensure that all async operations are done. All pending async I/O
-    // operations will be canceled and their callbacks called with status=HAILO_STREAM_ABORTED_BY_USER.
+    // operations will be canceled and their callbacks called with status=HAILO_STREAM_ABORT.
     (void) hailo_shutdown_network_group(network_group);
 
     // There are no async I/O operations ongoing so it is safe to free the buffers now.
-    for (i = 0; i < allocated_buffers; i++) page_aligned_free(buffers[i], frame_size);
+    for (i = 0; i < allocated_buffers; i++) {
+        (void) hailo_device_dma_unmap_buffer(device, buffers[i].addr, buffers[i].size, buffers[i].direction);
+        page_aligned_free(buffers[i].addr, buffers[i].size);
+    }
 
     return status;
 }
@@ -239,7 +259,7 @@ int main()
     REQUIRE_SUCCESS(status, l_release_device, "Failed activate network group");
 
     // Run infer.
-    status = infer(network_group, number_input_streams, input_streams, number_output_streams, output_streams,
+    status = infer(device, network_group, number_input_streams, input_streams, number_output_streams, output_streams,
         ongoing_transfers);
     REQUIRE_SUCCESS(status, l_deactivate, "Failed performing inference");
 
diff --git a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
index 2d4245eb..31ce2faf 100644
--- a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(raw_streams_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
index e71deee9..466b8e3d 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
index 8b290202..9a84f3f1 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
@@ -192,7 +192,7 @@ int main()
     write_thread_args_t write_args[HEF_COUNT][MAX_EDGE_LAYERS];
     read_thread_args_t read_args[HEF_COUNT][MAX_EDGE_LAYERS];
 
-    char HEF_FILES[HEF_COUNT][MAX_HEF_PATH_LEN] = {"hefs/multi_network_shortcut_net.hef", "hefs/shortcut_net.hef"};
+    char HEF_FILES[HEF_COUNT][MAX_HEF_PATH_LEN] = {"hefs/shortcut_net_nv12.hef", "hefs/shortcut_net.hef"};
     // Note: default batch_size is 0, which is not used in this example
     uint16_t batch_sizes[HEF_COUNT] = {BATCH_SIZE_1, BATCH_SIZE_2};
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
index aab5aec0..0fab3bb5 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_manually_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
index 0ef434b9..b6657446 100644
--- a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(vstreams_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/cpp/CMakeLists.txt b/hailort/libhailort/examples/cpp/CMakeLists.txt
index a39e2867..c0b31e4b 100644
--- a/hailort/libhailort/examples/cpp/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/CMakeLists.txt
@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.0.0)
 
 add_subdirectory(vstreams_example)
 add_subdirectory(infer_pipeline_example)
-add_subdirectory(async_infer_example)
-add_subdirectory(async_infer_functionality_example)
+add_subdirectory(async_infer_basic_example)
+add_subdirectory(async_infer_advanced_example)
 add_subdirectory(raw_streams_example)
 add_subdirectory(multi_network_vstream_example)
 add_subdirectory(switch_network_groups_example)
@@ -17,8 +17,8 @@ add_subdirectory(notification_callback_example)
 set(CPP_EXAMPLE_TARGETS
     cpp_vstreams_example
     cpp_infer_pipeline_example
-    cpp_async_infer_example
-    cpp_async_infer_functionality_example
+    cpp_async_infer_basic_example
+    cpp_async_infer_advanced_example
     cpp_raw_streams_example
     cpp_multi_network_vstream_example
     cpp_switch_network_groups_example
@@ -39,4 +39,4 @@ if(NOT CMAKE_SYSTEM_NAME STREQUAL QNX)
 endif()
 
 add_custom_target(cpp_hailort_examples)
-add_dependencies(cpp_hailort_examples ${CPP_EXAMPLE_TARGETS})
\ No newline at end of file
+add_dependencies(cpp_hailort_examples ${CPP_EXAMPLE_TARGETS})
diff --git a/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt
new file mode 100644
index 00000000..4b7789a8
--- /dev/null
+++ b/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt
@@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.0.0)
+
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
+
+add_executable(cpp_async_infer_advanced_example async_infer_advanced_example.cpp)
+target_link_libraries(cpp_async_infer_advanced_example PRIVATE HailoRT::libhailort)
+
+if(WIN32)
+    target_compile_options(cpp_async_infer_advanced_example PRIVATE
+        /DWIN32_LEAN_AND_MEAN
+        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+        /wd4201 /wd4251
+    )
+endif()
+
+set_target_properties(cpp_async_infer_advanced_example PROPERTIES CXX_STANDARD 14)
diff --git a/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp b/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
similarity index 69%
rename from hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp
rename to hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
index e2a5e228..b5ed3df4 100644
--- a/hailort/libhailort/examples/cpp/async_infer_functionality_example/async_infer_functionality_example.cpp
+++ b/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
@@ -3,9 +3,10 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file async_infer_functionality_example.cpp
- * This example demonstrates the Async Infer API usage with a specific model with multiple inputs and outputs
+ * @file async_infer_advanced_example.cpp
+ * This example demonstrates the Async Infer API usage with a specific model that has multi-planar input
  * and changes configutrations of the streams.
+ * Multiple infer jobs are triggered, and waiting for the last one ensures that all the rest will arrive as well.
  **/
 
 #include "hailo/hailort.hpp"
@@ -43,46 +44,67 @@ int main()
         return vdevice.status();
     }
 
-    auto infer_model_exp = vdevice.value()->create_infer_model("hefs/multi_network_shortcut_net.hef");
+    // Create infer model from HEF file.
+    auto infer_model_exp = vdevice.value()->create_infer_model("hefs/shortcut_net_nv12.hef");
     if (!infer_model_exp) {
         std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl;
         return infer_model_exp.status();
     }
     auto infer_model = infer_model_exp.release();
 
-    infer_model->input("multi_network_shortcut_net_scope1/input_layer_0")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
-    infer_model->output("multi_network_shortcut_net_scope1/shortcut0")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
-    infer_model->input("multi_network_shortcut_net_scope2/input_layer_1")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
-    infer_model->output("multi_network_shortcut_net_scope2/shortcut1")->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
+    infer_model->output()->set_format_type(HAILO_FORMAT_TYPE_FLOAT32);
 
+    // Configure the infer model
     auto configured_infer_model = infer_model->configure();
     if (!configured_infer_model) {
         std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl;
         return configured_infer_model.status();
     }
 
-    // We store buffers vector here as a guard for the memory. The buffer will be freed only after
+    // The buffers are stored here as a guard for the memory. The buffer will be freed only after
     // configured_infer_model will be released.
     std::vector<std::shared_ptr<uint8_t>> buffer_guards;
 
+    // Create infer bindings
     auto bindings = configured_infer_model->create_bindings();
     if (!bindings) {
         std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl;
         return bindings.status();
     }
 
+    // Set the input buffers of the bindings.
     for (const auto &input_name : infer_model->get_input_names()) {
         size_t input_frame_size = infer_model->input(input_name)->get_frame_size();
-        auto input_buffer = page_aligned_alloc(input_frame_size);
-        auto status = bindings->input(input_name)->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
+
+        // create pix_buffer
+        const auto Y_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 2 / 3);
+        const auto UV_PLANE_SIZE = static_cast<uint32_t>(input_frame_size * 1 / 3);
+        assert (Y_PLANE_SIZE + UV_PLANE_SIZE == input_frame_size);
+        auto y_plane_buffer = page_aligned_alloc(Y_PLANE_SIZE);
+        auto uv_plane_buffer = page_aligned_alloc(UV_PLANE_SIZE);
+        hailo_pix_buffer_t pix_buffer{};
+        pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;
+        pix_buffer.number_of_planes = 2;
+        // Y Plane
+        pix_buffer.planes[0].bytes_used = Y_PLANE_SIZE;
+        pix_buffer.planes[0].plane_size = Y_PLANE_SIZE;
+        pix_buffer.planes[0].user_ptr = reinterpret_cast<void*>(y_plane_buffer.get());
+        // UV Plane
+        pix_buffer.planes[1].bytes_used = UV_PLANE_SIZE;
+        pix_buffer.planes[1].plane_size = UV_PLANE_SIZE;
+        pix_buffer.planes[1].user_ptr = reinterpret_cast<void*>(uv_plane_buffer.get());
+
+        auto status = bindings->input(input_name)->set_pix_buffer(pix_buffer);
         if (HAILO_SUCCESS != status) {
             std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
             return status;
         }
 
-        buffer_guards.push_back(input_buffer);
+        buffer_guards.push_back(y_plane_buffer);
+        buffer_guards.push_back(uv_plane_buffer);
     }
 
+    // Set the output buffers of the bindings.
     for (const auto &output_name : infer_model->get_output_names()) {
         size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
         auto output_buffer = page_aligned_alloc(output_frame_size);
@@ -111,6 +133,7 @@ int main()
             std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl;
             return job.status();
         }
+        // detach() is called in order for jobs to run in parallel (and not one after the other)
         job->detach();
 
         if (i == FRAMES_COUNT - 1) {
@@ -124,6 +147,7 @@ int main()
         std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl;
         return status;
     }
-    
+
+    std::cout << "Inference finished successfully" << std::endl;
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt
new file mode 100644
index 00000000..4ebb8599
--- /dev/null
+++ b/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt
@@ -0,0 +1,16 @@
+cmake_minimum_required(VERSION 3.0.0)
+
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
+
+add_executable(cpp_async_infer_basic_example async_infer_basic_example.cpp)
+target_link_libraries(cpp_async_infer_basic_example PRIVATE HailoRT::libhailort)
+
+if(WIN32)
+    target_compile_options(cpp_async_infer_basic_example PRIVATE
+        /DWIN32_LEAN_AND_MEAN
+        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
+        /wd4201 /wd4251
+    )
+endif()
+
+set_target_properties(cpp_async_infer_basic_example PROPERTIES CXX_STANDARD 14)
diff --git a/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp b/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
similarity index 59%
rename from hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp
rename to hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
index 30c744de..ec78ac77 100644
--- a/hailort/libhailort/examples/cpp/async_infer_example/async_infer_example.cpp
+++ b/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
@@ -3,8 +3,8 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file async_infer_example.cpp
- * This example demonstrates the Async Infer API usage and assumes the model has only one input and output.
+ * @file async_infer_basic_example.cpp
+ * This example demonstrates the Async Infer API usage with a specific model.
  **/
 
 #include "hailo/hailort.hpp"
@@ -42,6 +42,7 @@ int main()
         return vdevice.status();
     }
 
+    // Create infer model from HEF file.
     auto infer_model_exp = vdevice.value()->create_infer_model(HEF_FILE);
     if (!infer_model_exp) {
         std::cerr << "Failed to create infer model, status = " << infer_model_exp.status() << std::endl;
@@ -49,45 +50,60 @@ int main()
     }
     auto infer_model = infer_model_exp.release();
 
+    // Configure the infer model
     auto configured_infer_model = infer_model->configure();
     if (!configured_infer_model) {
         std::cerr << "Failed to create configured infer model, status = " << configured_infer_model.status() << std::endl;
         return configured_infer_model.status();
     }
 
+    // The buffers are stored here as a guard for the memory. The buffer will be freed only after
+    // configured_infer_model will be released.
+    std::vector<std::shared_ptr<uint8_t>> buffer_guards;
+
     auto bindings = configured_infer_model->create_bindings();
     if (!bindings) {
         std::cerr << "Failed to create infer bindings, status = " << bindings.status() << std::endl;
         return bindings.status();
     }
 
-    size_t input_frame_size = infer_model->input()->get_frame_size();
-    auto input_buffer = page_aligned_alloc(input_frame_size);
-    auto status = bindings->input()->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
-    if (HAILO_SUCCESS != status) {
-        std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
-        return status;
+    for (const auto &input_name : infer_model->get_input_names()) {
+        size_t input_frame_size = infer_model->input(input_name)->get_frame_size();
+        auto input_buffer = page_aligned_alloc(input_frame_size);
+        auto status = bindings->input(input_name)->set_buffer(MemoryView(input_buffer.get(), input_frame_size));
+        if (HAILO_SUCCESS != status) {
+            std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
+            return status;
+        }
+
+        buffer_guards.push_back(input_buffer);
     }
 
-    size_t output_frame_size = infer_model->output()->get_frame_size();
-    auto output_buffer = page_aligned_alloc(output_frame_size);
-    status = bindings->output()->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
-    if (HAILO_SUCCESS != status) {
-        std::cerr << "Failed to set infer input buffer, status = " << status << std::endl;
-        return status;
+    for (const auto &output_name : infer_model->get_output_names()) {
+        size_t output_frame_size = infer_model->output(output_name)->get_frame_size();
+        auto output_buffer = page_aligned_alloc(output_frame_size);
+        auto status = bindings->output(output_name)->set_buffer(MemoryView(output_buffer.get(), output_frame_size));
+        if (HAILO_SUCCESS != status) {
+            std::cerr << "Failed to set infer output buffer, status = " << status << std::endl;
+            return status;
+        }
+
+        buffer_guards.push_back(output_buffer);
     }
 
+    // Run the async infer job.
     auto job = configured_infer_model->run_async(bindings.value());
     if (!job) {
         std::cerr << "Failed to start async infer job, status = " << job.status() << std::endl;
         return job.status();
     }
 
-    status = job->wait(std::chrono::milliseconds(1000));
+    auto status = job->wait(std::chrono::milliseconds(1000));
     if (HAILO_SUCCESS != status) {
         std::cerr << "Failed to wait for infer to finish, status = " << status << std::endl;
         return status;
     }
-    
+
+    std::cout << "Inference finished successfully" << std::endl;
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt
deleted file mode 100644
index 650edb46..00000000
--- a/hailort/libhailort/examples/cpp/async_infer_example/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-cmake_minimum_required(VERSION 3.0.0)
-
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
-
-add_executable(cpp_async_infer_example async_infer_example.cpp)
-target_link_libraries(cpp_async_infer_example PRIVATE HailoRT::libhailort)
-
-if(WIN32)
-    target_compile_options(cpp_async_infer_example PRIVATE
-        /DWIN32_LEAN_AND_MEAN
-        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
-        /wd4201 /wd4251
-    )
-endif()
-
-set_target_properties(cpp_async_infer_example PROPERTIES CXX_STANDARD 14)
diff --git a/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt
deleted file mode 100644
index 06d4e340..00000000
--- a/hailort/libhailort/examples/cpp/async_infer_functionality_example/CMakeLists.txt
+++ /dev/null
@@ -1,16 +0,0 @@
-cmake_minimum_required(VERSION 3.0.0)
-
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
-
-add_executable(cpp_async_infer_functionality_example async_infer_functionality_example.cpp)
-target_link_libraries(cpp_async_infer_functionality_example PRIVATE HailoRT::libhailort)
-
-if(WIN32)
-    target_compile_options(cpp_async_infer_functionality_example PRIVATE
-        /DWIN32_LEAN_AND_MEAN
-        /DNOMINMAX                  # NOMINMAX is required in order to play nice with std::min/std::max (otherwise Windows.h defines it's own)
-        /wd4201 /wd4251
-    )
-endif()
-
-set_target_properties(cpp_async_infer_functionality_example PROPERTIES CXX_STANDARD 14)
diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
index eccb90be..9a55ec16 100644
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_infer_pipeline_example infer_pipeline_example.cpp)
 target_link_libraries(cpp_infer_pipeline_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
index 6913af9d..c5d1a80f 100644
--- a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_device_example multi_device_example.cpp)
 target_link_libraries(cpp_multi_device_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
index 6f2ccbcf..a3b00bc7 100644
--- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_network_vstream_example multi_network_vstream_example.cpp)
 target_link_libraries(cpp_multi_network_vstream_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
index 433c6c4c..dac78781 100644
--- a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_process_example multi_process_example.cpp)
 target_link_libraries(cpp_multi_process_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.ps1 b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.ps1
index 884f7915..158f7741 100644
--- a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.ps1
+++ b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.ps1
@@ -5,7 +5,7 @@ Param(
 )
 
 $max_processes_count = 8
-$first_hef="hefs\multi_network_shortcut_net.hef"
+$first_hef="hefs\shortcut_net_nv12.hef"
 $second_hef="hefs\shortcut_net.hef"
 $executable_base_name="cpp_multi_process_example"
 $executable_name="$executable_base_name.exe"
diff --git a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.sh b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.sh
index 7b7e6fd9..38933711 100755
--- a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.sh
+++ b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-readonly first_hef="hefs/multi_network_shortcut_net.hef"
+readonly first_hef="hefs/shortcut_net_nv12.hef"
 readonly second_hef="hefs/shortcut_net.hef"
 readonly max_processes_count=8
 readonly default_processes_count=1
diff --git a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
index 8929fafb..b95cda57 100644
--- a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_notification_callback_example notification_callback_example.cpp)
 target_link_libraries(cpp_notification_callback_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
index 83f4dd5c..837e7c56 100644
--- a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.0.0)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_power_measurement_example power_measurement_example.cpp)
 target_link_libraries(cpp_power_measurement_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
index 1b03be65..9b30a355 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_async_streams_multi_thread_example raw_async_streams_multi_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_multi_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
index cbc99fad..c48cb153 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
@@ -71,8 +71,8 @@ Expected<std::shared_ptr<ConfiguredNetworkGroup>> configure_network_group(Device
 static void output_async_callback(const OutputStream::CompletionInfo &completion_info)
 {
     // Real applications can free the buffer or forward it to post-process/display.
-    if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORTED_BY_USER != completion_info.status)) {
-        // We will get HAILO_STREAM_ABORTED_BY_USER when activated_network_group is destructed.
+    if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORT != completion_info.status)) {
+        // We will get HAILO_STREAM_ABORT when activated_network_group is destructed.
         std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl;
     }
 }
@@ -80,13 +80,13 @@ static void output_async_callback(const OutputStream::CompletionInfo &completion
 static void input_async_callback(const InputStream::CompletionInfo &completion_info)
 {
     // Real applications can free the buffer or reuse it for next transfer.
-    if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORTED_BY_USER  != completion_info.status)) {
-        // We will get HAILO_STREAM_ABORTED_BY_USER  when activated_network_group is destructed.
+    if ((HAILO_SUCCESS != completion_info.status) && (HAILO_STREAM_ABORT  != completion_info.status)) {
+        // We will get HAILO_STREAM_ABORT  when activated_network_group is destructed.
         std::cerr << "Got an unexpected status on callback. status=" << completion_info.status << std::endl;
     }
 }
 
-static hailo_status infer(ConfiguredNetworkGroup &network_group)
+static hailo_status infer(Device &device, ConfiguredNetworkGroup &network_group)
 {
     // Assume one input and output
     auto &output = network_group.get_output_streams()[0].get();
@@ -101,6 +101,16 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group)
     auto output_buffer = page_aligned_alloc(output.get_frame_size());
     auto input_buffer = page_aligned_alloc(input.get_frame_size());
 
+    // If the same buffer is used multiple times on async-io, to improve performance, it is recommended to pre-map it
+    // into the device. The DmaMappedBuffer object manages the mapping, and it'll be unmapped when it is destroyed.
+    // Notice that the buffer must be alive as long as the mapping is alive, so we define it after the buffers.
+    auto output_mapping = DmaMappedBuffer::create(device, output_buffer.get(), output.get_frame_size(), HAILO_DMA_BUFFER_DIRECTION_D2H);
+    auto input_mapping = DmaMappedBuffer::create(device, input_buffer.get(), input.get_frame_size(), HAILO_DMA_BUFFER_DIRECTION_H2D);
+    if (!output_mapping || !input_mapping) {
+        std::cerr << "Failed to map buffer with status=" << input_mapping.status() << ", " << output_mapping.status() << std::endl;
+        return HAILO_INTERNAL_FAILURE;
+    }
+
     std::atomic<hailo_status> output_status(HAILO_UNINITIALIZED);
     std::thread output_thread([&]() {
         while (true) {
@@ -127,16 +137,16 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group)
     std::this_thread::sleep_for(std::chrono::seconds(5));
 
     // Calling shutdown on a network group will ensure that all async operations are done. All pending
-    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
+    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORT.
     // Only after the shutdown is called, we can safely free the buffers and any variable captured inside the async
     // callback lambda.
     network_group.shutdown();
 
-    // Thread should be stopped with HAILO_STREAM_ABORTED_BY_USER status.
+    // Thread should be stopped with HAILO_STREAM_ABORT status.
     output_thread.join();
     input_thread.join();
 
-    if ((HAILO_STREAM_ABORTED_BY_USER != output_status) || (HAILO_STREAM_ABORTED_BY_USER != input_status)) {
+    if ((HAILO_STREAM_ABORT != output_status) || (HAILO_STREAM_ABORT != input_status)) {
         std::cerr << "Got unexpected statues from thread: " << output_status << ", " << input_status << std::endl;
         return HAILO_INTERNAL_FAILURE;
     }
@@ -165,7 +175,7 @@ int main()
         return EXIT_FAILURE;
     }
 
-    auto status = infer(*network_group.value());
+    auto status = infer(*device.value(), *network_group.value());
     if (HAILO_SUCCESS != status) {
         return EXIT_FAILURE;
     }
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
index cf7e24d5..c132b53c 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
index e402a8dd..d86f9f72 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
@@ -61,11 +61,11 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group)
         case HAILO_SUCCESS:
             // Real applications can forward the buffer to post-process/display. Here we just re-launch new async read.
             status = output.read_async(completion_info.buffer_addr, completion_info.buffer_size, read_done);
-            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
+            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORT != status)) {
                 std::cerr << "Failed read async with status=" << status << std::endl;
             }
             break;
-        case HAILO_STREAM_ABORTED_BY_USER:
+        case HAILO_STREAM_ABORT:
             // Transfer was canceled, finish gracefully.
             break;
         default:
@@ -80,11 +80,11 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group)
             // Real applications may free the buffer and replace it with new buffer ready to be sent. Here we just
             // re-launch new async write.
             status = input.write_async(completion_info.buffer_addr, completion_info.buffer_size, write_done);
-            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORTED_BY_USER != status)) {
+            if ((HAILO_SUCCESS != status) && (HAILO_STREAM_ABORT != status)) {
                 std::cerr << "Failed read async with status=" << status << std::endl;
             }
             break;
-        case HAILO_STREAM_ABORTED_BY_USER:
+        case HAILO_STREAM_ABORT:
             // Transfer was canceled, finish gracefully.
             break;
         default:
@@ -121,7 +121,7 @@ static hailo_status infer(ConfiguredNetworkGroup &network_group)
     std::this_thread::sleep_for(std::chrono::seconds(5));
 
     // Calling shutdown on a network group will ensure that all async operations are done. All pending
-    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORTED_BY_USER.
+    // operations will be canceled and their callbacks will be called with status=HAILO_STREAM_ABORT.
     // Only after the shutdown is called, we can safely free the buffers and any variable captured inside the async
     // callback lambda.
     network_group.shutdown();
diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
index 709110a7..6cf42fd0 100644
--- a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_streams_example raw_streams_example.cpp)
 target_link_libraries(cpp_raw_streams_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
index 1b3a6895..115feac1 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_switch_network_groups_example switch_network_groups_example.cpp)
 target_link_libraries(cpp_switch_network_groups_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
index 8b5c1f22..9f4ac2bc 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_switch_network_groups_manually_example switch_network_groups_manually_example.cpp)
 target_link_libraries(cpp_switch_network_groups_manually_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
index f0a8ad08..fddc3b99 100644
--- a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.16.2 EXACT REQUIRED)
+find_package(HailoRT 4.17.0 EXACT REQUIRED)
 
 add_executable(cpp_vstreams_example vstreams_example.cpp)
 target_link_libraries(cpp_vstreams_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/hef.proto b/hailort/libhailort/hef.proto
index 52504937..59625f86 100644
--- a/hailort/libhailort/hef.proto
+++ b/hailort/libhailort/hef.proto
@@ -51,6 +51,8 @@ enum ProtoHEFExtensionType {
     OUTPUT_SCALE_PER_FEATURE = 25;
     PERIPH_CALCULATION_IN_HAILORT = 26;
     HAILO_NET_FLOW_YOLOV8_NMS = 27;
+    BATCH_REGISTER_CONFIG = 28;
+    HAILO_NET_FLOW_BBOX_DECODING = 29;
     UNUSED = 0XFFFF;
 }
 
@@ -78,7 +80,7 @@ message ProtoHEFHeader {
     uint64 version = 4;
 }
 
-// Enum describing the different possible hw_archs
+// Enum describing the different possible hw_archs. Must be aligned to device_internal::HEFHwArch
 enum ProtoHEFHwArch {
     PROTO__HW_ARCH__HAILO8 = 0;
     PROTO__HW_ARCH__HAILO8P = 1;
@@ -291,6 +293,9 @@ message ProtoHEFNmsOp {
     // Index of background class for background removal
     uint32 background_removal_index = 6;
 
+    // Whether the op contains bbox decoding only
+    bool bbox_decoding_only = 13;
+
     // Additional information needed for specific NMS types
     oneof nms_op {
         ProtoHEFYoloNmsOp yolo_nms_op = 7; // YOLOv5 post process
@@ -574,6 +579,7 @@ message ProtoHEFAction {
         ProtoHEFActionDebugSleep debug_sleep = 12;
         ProtoHEFActionEnableNMS enable_nms = 13;
         ProtoHEFActionWriteDataByType write_data_by_type = 14;
+        ProtoHEFActionSwitchLcuBatch switch_lcu_batch = 15;
     }
 }
 
@@ -690,7 +696,7 @@ message ProtoHEFActionEnableLcu {
     // Address at lcu to mark as complete apon reach (after lcu_kernel_done_count times)
     uint32 lcu_kernel_done_address = 3;
 
-    // Amount of times lcu_kernel_done_addess should be reached before marking is done
+    // Amount of times lcu_kernel_done_addess should be visited before done
     uint32 lcu_kernel_done_count = 4;
 
     // Address to indicate where the FW should write to
@@ -701,6 +707,18 @@ message ProtoHEFActionEnableLcu {
     uint32 network_index = 6;
 }
 
+message ProtoHEFActionSwitchLcuBatch {
+    // Index of the lcu
+    uint32 lcu_index = 1;
+
+    // Index of the cluster of the lcu
+    uint32 cluster_index = 2;
+
+    // network index index- name given by networks_names
+    // in ProtoHEFNetworkGroup
+    uint32 network_index = 6;
+}
+
 message ProtoHEFActionEnableNMS {
     // Index of the nms unit
     uint32 nms_unit_index = 1;
diff --git a/hailort/libhailort/include/hailo/buffer.hpp b/hailort/libhailort/include/hailo/buffer.hpp
index 1b647f95..08c6b136 100644
--- a/hailort/libhailort/include/hailo/buffer.hpp
+++ b/hailort/libhailort/include/hailo/buffer.hpp
@@ -11,7 +11,6 @@
 #define _HAILO_BUFFER_HPP_
 
 #include "hailo/expected.hpp"
-#include "hailo/buffer_storage.hpp"
 
 #include <memory>
 #include <cstdint>
@@ -24,9 +23,25 @@
 namespace hailort
 {
 
+class BufferStorage;
+using BufferStoragePtr = std::shared_ptr<BufferStorage>;
+
 class Buffer;
 using BufferPtr = std::shared_ptr<Buffer>;
 
+
+/*! Buffer storage parameters. Analogical to hailo_buffer_parameters_t */
+struct HAILORTAPI BufferStorageParams
+{
+public:
+
+    static BufferStorageParams create_dma();
+    // Defaults to heap params
+    BufferStorageParams();
+
+    hailo_buffer_flags_t flags;
+};
+
 class HAILORTAPI Buffer final
 {
 public:
@@ -50,9 +65,7 @@ class HAILORTAPI Buffer final
 
     // Empty buffer (points to null, size is zero)
     Buffer();
-    // Buffer backed by the storage param
-    Buffer(BufferStoragePtr storage);
-    ~Buffer() = default;
+    ~Buffer();
 
     Buffer(const Buffer& other) = delete;
     Buffer& operator=(const Buffer& other) = delete;
@@ -156,9 +169,20 @@ class HAILORTAPI Buffer final
     uint32_t& as_uint32();
     uint64_t& as_uint64();
 
+    // Returns the pointer managed by this object and releases ownership
+    Expected<void *> release() noexcept;
+
+    // Internal functions
+    static Expected<Buffer> create(BufferStoragePtr storage, bool register_storage = true);
+
 private:
+    class StorageImpl;
+
+    // Buffer backed by the storage param
+    Buffer(std::unique_ptr<StorageImpl> storage);
+
     // Initialization dependency
-    BufferStoragePtr m_storage;
+    std::unique_ptr<StorageImpl> m_storage_impl;
     uint8_t *m_data;
     size_t m_size;
 };
diff --git a/hailort/libhailort/include/hailo/buffer_storage.hpp b/hailort/libhailort/include/hailo/buffer_storage.hpp
deleted file mode 100644
index ce227e48..00000000
--- a/hailort/libhailort/include/hailo/buffer_storage.hpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file buffer_storage.hpp
- * @brief TODO: fill me (HRT-10026)
- **/
-
-#ifndef _HAILO_BUFFER_STORAGE_HPP_
-#define _HAILO_BUFFER_STORAGE_HPP_
-
-#include "hailo/hailort.h"
-#include "hailo/expected.hpp"
-
-#include <memory>
-#include <cstdint>
-#include <functional>
-#include <vector>
-#include <unordered_map>
-#include <string>
-
-
-/** hailort namespace */
-namespace hailort
-{
-
-// Forward declarations
-class Device;
-class VDevice;
-class VdmaDevice;
-class BufferStorage;
-class HeapStorage;
-class DmaStorage;
-class UserBufferStorage;
-class HailoRTDriver;
-class Buffer;
-
-namespace vdma {
-    class DmaAbleBuffer;
-    using DmaAbleBufferPtr = std::shared_ptr<DmaAbleBuffer>;
-
-    class MappedBuffer;
-    using MappedBufferPtr = std::shared_ptr<MappedBuffer>;
-}
-
-
-/*! Buffer storage parameters. Analogical to hailo_buffer_parameters_t */
-struct HAILORTAPI BufferStorageParams
-{
-public:
-    struct HeapParams
-    {
-    public:
-        HeapParams();
-    };
-
-    struct DmaMappingParams
-    {
-    public:
-        static Expected<DmaMappingParams> create(const hailo_buffer_dma_mapping_params_t &params);
-        // DmaMappingParams for a buffer to be mapped to device
-        DmaMappingParams(Device &device, hailo_dma_buffer_direction_t data_direction);
-        // DmaMappingParams for a buffer to be mapped to all the underlying devices held by vdevice
-        DmaMappingParams(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction);
-        // DmaMappingParams for a buffer to be lazily mapped upon it's first async transfer to a given device
-        DmaMappingParams();
-
-        // Note: We hold a pointer to a Device/VDevice/neither, since DmaMappingParams support mapping to
-        //       a device, vdevice or lazy mapping
-        Device *device;
-        VDevice *vdevice;
-        hailo_dma_buffer_direction_t data_direction;
-
-    private:
-        DmaMappingParams(const hailo_buffer_dma_mapping_params_t &params);
-    };
-
-    static Expected<BufferStorageParams> create(const hailo_buffer_parameters_t &params);
-    // Dma buffer params for lazy mapping
-    static BufferStorageParams create_dma();
-    // Dma buffer params for mapping to device in data_direction
-    static BufferStorageParams create_dma(Device &device, hailo_dma_buffer_direction_t data_direction);
-    // Dma buffer params for mapping to vdevice in data_direction
-    static BufferStorageParams create_dma(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction);
-
-    // Defaults to heap params
-    BufferStorageParams();
-
-    hailo_buffer_flags_t flags;
-    union {
-        HeapParams heap_params;
-        DmaMappingParams dma_mapping_params;
-    };
-};
-
-using BufferStoragePtr = std::shared_ptr<BufferStorage>;
-
-class HAILORTAPI BufferStorage
-{
-public:
-    enum class Type {
-        HEAP,
-        DMA,
-        USER_BUFFER
-    };
-
-    static Expected<BufferStoragePtr> create(size_t size, const BufferStorageParams &params);
-
-    BufferStorage(BufferStorage&& other) noexcept = default;
-    BufferStorage(const BufferStorage &) = delete;
-    BufferStorage &operator=(BufferStorage &&) = delete;
-    BufferStorage &operator=(const BufferStorage &) = delete;
-    virtual ~BufferStorage() = default;
-
-    Type type() const;
-    virtual size_t size() const = 0;
-    virtual void *user_address() = 0;
-    // Returns the pointer managed by this object and releases ownership
-    // TODO: Add a free function pointer? (HRT-10024)
-    // // Free the returned pointer with `delete`
-    // TODO: after release the containing buffer will hold pointers to values that were released.
-    //       Document that this can happen? Disable this behavior somehow? (HRT-10024)
-    virtual Expected<void *> release() noexcept = 0;
-    // Maps the storage to device in data_direction.
-    // - If the mapping is new - true is returned.
-    // - If the mapping already exists - false is returned.
-    // - Otherwise - Unexpected with a failure status is returned.
-    // Note: This buffer storage must be destroyed before the device it is mapped to is destroyed!
-    //       Failing to do so will lead to unexpected results
-    // TODO: resolve this issue (HRT-12361)
-    virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) = 0;
-    virtual Expected<bool> dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) = 0;
-
-    // Internal functions
-    virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) = 0;
-
-protected:
-    explicit BufferStorage(Type type);
-
-    const Type m_type;
-};
-
-using HeapStoragePtr = std::shared_ptr<HeapStorage>;
-
-class HAILORTAPI HeapStorage : public BufferStorage
-{
-public:
-    static Expected<HeapStoragePtr> create(size_t size);
-    HeapStorage(std::unique_ptr<uint8_t[]> data, size_t size);
-    HeapStorage(HeapStorage&& other) noexcept;
-    HeapStorage(const HeapStorage &) = delete;
-    HeapStorage &operator=(HeapStorage &&) = delete;
-    HeapStorage &operator=(const HeapStorage &) = delete;
-    virtual ~HeapStorage() = default;
-
-    virtual size_t size() const override;
-    virtual void *user_address() override;
-    virtual Expected<void *> release() noexcept override;
-    virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override;
-    virtual Expected<bool> dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override;
-
-    // Internal functions
-    virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) override;
-
-private:
-    std::unique_ptr<uint8_t[]> m_data;
-    size_t m_size;
-};
-
-// ************************************* NOTE - START ************************************* //
-// DmaStorage isn't currently supported and is for internal use only                        //
-// **************************************************************************************** //
-using DmaStoragePtr = std::shared_ptr<DmaStorage>;
-
-// TODO: HRT-10026 doc this
-class HAILORTAPI DmaStorage : public BufferStorage
-{
-public:
-    // Creates a DmaStorage instance holding a dma-able buffer size bytes large.
-    // The buffer isn't mapped to dma until dma_map is called.
-    static Expected<DmaStoragePtr> create(size_t size);
-    // Creates a DmaStorage instance holding a dma-able buffer size bytes large.
-    // The buffer is mapped to device in data_direction.
-    static Expected<DmaStoragePtr> create(size_t size,
-        hailo_dma_buffer_direction_t data_direction, Device &device);
-    // Creates a DmaStorage instance holding a dma-able buffer size bytes large.
-    // The buffer is mapped to vdevice.get_physical_devices() in data_direction.
-    static Expected<DmaStoragePtr> create(size_t size,
-        hailo_dma_buffer_direction_t data_direction, VDevice &vdevice);
-
-    // TODO: doc that the addr needs to be on a new page and aligned to 64B (HRT-9559)
-    //       probably best just to call mmap
-    // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address.
-    // The buffer isn't mapped to dma until dma_map is called.
-    static Expected<DmaStoragePtr> create_from_user_address(void *user_address, size_t size);
-    // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address.
-    // The buffer is mapped to device in data_direction.
-    static Expected<DmaStoragePtr> create_from_user_address(void *user_address, size_t size,
-        hailo_dma_buffer_direction_t data_direction, Device &device);
-    // Creates a DmaStorage instance backed by the size bytes large buffer pointed to by user_address.
-    // The buffer is mapped to vdevice.get_physical_devices() in data_direction.
-    static Expected<DmaStoragePtr> create_from_user_address(void *user_address, size_t size,
-        hailo_dma_buffer_direction_t data_direction, VDevice &device);
-    // Creates a DMA-able buffer from given user buffer at address given of size length if possible,
-    // Otherwise allocates new one length of size
-    static Expected<std::shared_ptr<Buffer>> create_dma_able_buffer_from_user_size(void *addr, size_t size);
-
-    DmaStorage(const DmaStorage &other) = delete;
-    DmaStorage &operator=(const DmaStorage &other) = delete;
-    DmaStorage(DmaStorage &&other) noexcept = default;
-    DmaStorage &operator=(DmaStorage &&other) = delete;
-    virtual ~DmaStorage();
-
-    virtual size_t size() const override;
-    virtual void *user_address() override;
-    virtual Expected<void *> release() noexcept override;
-    // TODO: thread safety (HRT-10669)
-    virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override;
-    virtual Expected<bool> dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override;
-
-    // Internal functions
-    DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer);
-    virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) override;
-
-private:
-    // - Creates a backing DmaAbleBuffer:
-    //   - If user_address is null, it'll be allocated by hailort
-    //   - Otherwise, it'll be a non owning wrapper of the user's buffer
-    // - The said buffer is mapped physical_devices in data_direction.
-    // - By default (if physical_devices is empty), no mapping will occur
-    static Expected<DmaStoragePtr> create(void *user_address, size_t size,
-        hailo_dma_buffer_direction_t data_direction = HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM,
-        std::vector<std::reference_wrapper<Device>> &&physical_devices = {});
-
-    // Initialization dependency
-    vdma::DmaAbleBufferPtr m_dma_able_buffer;
-    // For each device (key is device_id), we store some vdma mapping.
-    // TODO: use (device_id, direction) as key or have two dicts (HRT-10656)
-    using UnmappingCallback = std::function<void()>;
-    std::unordered_map<std::string, std::pair<vdma::MappedBufferPtr, UnmappingCallback>> m_mappings;
-};
-
-
-using UserBufferStoragePtr = std::shared_ptr<UserBufferStorage>;
-class HAILORTAPI UserBufferStorage : public BufferStorage
-{
-public:
-    static Expected<UserBufferStoragePtr> create(void *user_address, const size_t size);
-
-    UserBufferStorage(void *user_address, const size_t size);
-    UserBufferStorage(const UserBufferStorage &other) = delete;
-    UserBufferStorage &operator=(const UserBufferStorage &other) = delete;
-    UserBufferStorage(UserBufferStorage &&other) noexcept = default;
-    UserBufferStorage &operator=(UserBufferStorage &&other) = delete;
-    virtual ~UserBufferStorage() = default;
-
-    virtual size_t size() const override;
-    virtual void *user_address() override;
-    virtual Expected<void *> release() noexcept override;
-    virtual Expected<bool> dma_map(Device &device, hailo_dma_buffer_direction_t data_direction) override;
-    virtual Expected<bool> dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction) override;
-
-    // Internal functions
-    virtual Expected<vdma::MappedBufferPtr> get_dma_mapped_buffer(const std::string &device_id) override;
-
-    // Craete storage for user buffer to store mappings. Used internally not by the user.
-    static Expected<std::shared_ptr<Buffer>> create_storage_from_user_buffer(void *addr, size_t size);
-
-private:
-
-    void * m_user_address;
-    const size_t m_size;
-
-    using UnmappingCallback = std::function<void()>;
-    std::unordered_map<std::string, std::pair<vdma::MappedBufferPtr, UnmappingCallback>> m_mappings;
-};
-
-// ************************************** NOTE - END ************************************** //
-// DmaStorage isn't currently supported and is for internal use only                      //
-// **************************************************************************************** //
-
-} /* namespace hailort */
-
-#endif /* _HAILO_BUFFER_STORAGE_HPP_ */
diff --git a/hailort/libhailort/include/hailo/device.hpp b/hailort/libhailort/include/hailo/device.hpp
index 11097919..88f0959f 100644
--- a/hailort/libhailort/include/hailo/device.hpp
+++ b/hailort/libhailort/include/hailo/device.hpp
@@ -34,14 +34,6 @@ namespace hailort
 class Device;
 using NotificationCallback = std::function<void(Device &device, const hailo_notification_t &notification, void *opaque)>;
 
-namespace vdma {
-    class DmaAbleBuffer;
-    using DmaAbleBufferPtr = std::shared_ptr<DmaAbleBuffer>;
-
-    class MappedBuffer;
-    using MappedBufferPtr = std::shared_ptr<MappedBuffer>;
-}
-
 /** @} */ // end of group_type_definitions
 
 /*! Represents the Hailo device (chip). */
@@ -700,42 +692,44 @@ class HAILORTAPI Device
      */
     virtual bool is_stream_interface_supported(const hailo_stream_interface_t &stream_interface) const = 0;
 
-    // TODO: Also link to async infer - ConfiguredInferModel, Bindings etc. Just like we did for
-    //       InputStream::write_async and OutputStream::read_async (HRT-11039)
     /**
      * Maps the buffer pointed to by @a address for DMA transfers to/from this device, in the specified
-     * @a direction.
-     * DMA mapping of buffers in advance may improve the performance of `InputStream::write_async()` or
-     * `OutputStream::read_async()`. This improvement will be realized if the buffer is reused multiple times
-     * across different async operations.
-     * - For buffers that will be written to the device via `InputStream::write_async()`, use `HAILO_H2D_STREAM`
-     *   for the @a direction parameter.
-     * - For buffers that will be read from the device via `OutputStream::read_async()`, use `HAILO_D2H_STREAM`
-     *   for the @a direction parameter.
-     *
-     * @param[in] address       The address of the buffer to be mapped
-     * @param[in] size          The buffer's size in bytes
-     * @param[in] direction     The direction of the mapping
+     * @a data_direction.
+     * DMA mapping of buffers in advance may improve the performance of async API. This improvement will become
+     * apparent when the buffer is reused multiple times across different async operations.
+     *
+     * For high level API (aka InferModel), buffers bound using ConfiguredInferModel::Bindings::InferStream::set_buffer
+     * can be mapped.
+     *
+     * For low level API (aka InputStream/OutputStream), buffers passed to InputStream::write_async and
+     * OutputStream::read_async can be mapped.
+     *
+     * @param[in] address       The address of the buffer to be mapped.
+     * @param[in] size          The buffer's size in bytes.
+     * @param[in] direction     The direction of the mapping. For input streams, use `HAILO_DMA_BUFFER_DIRECTION_H2D`
+     *                          and for output streams, use `HAILO_DMA_BUFFER_DIRECTION_D2H`.
+     *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     * @note The DMA mapping will be freed upon calling dma_unmap() with @a address and @a direction, or when the
-     *       @a Device object is destroyed.
-     * @note The buffer pointed to by @a address cannot be freed until it is unmapped (via dma_unmap() or @a Device
+     *
+     * @note The DMA mapping will be released upon calling dma_unmap() with @a address, @a size and @a data_direction, or
+     *       when the @a VDevice object is destroyed.
+     * @note The buffer pointed to by @a address cannot be released until it is unmapped (via dma_unmap() or @a Device
      *       destruction).
      */
-    virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction);
+    virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction);
 
     /**
      * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from this device, in the direction
      * @a direction.
      *
-     * @param[in] address       The address of the buffer to be un-mapped
-     * @param[in] direction     The direction of the mapping
+     * @param[in] address       The address of the buffer to be un-mapped.
+     * @param[in] size          The buffer's size in bytes.
+     * @param[in] direction     The direction of the mapping.
+     *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
      */
-    virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction);
+    virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction);
 
-    virtual Expected<std::pair<vdma::MappedBufferPtr, bool>> try_dma_map(vdma::DmaAbleBufferPtr buffer,
-        hailo_stream_direction_t direction);
     virtual hailo_status direct_write_memory(uint32_t address, const void *buffer, uint32_t size);
     virtual hailo_status direct_read_memory(uint32_t address, void *buffer, uint32_t size);
     hailo_status set_overcurrent_state(bool should_activate);
@@ -745,12 +739,12 @@ class HAILORTAPI Device
     // The sum of the number of contexts will fit in uint8_t
     Expected<std::vector<uint8_t>> get_number_of_dynamic_contexts_per_network_group();
     Expected<Buffer> download_context_action_list(uint32_t network_group_id, uint8_t context_type,
-        uint8_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size = 10000);
+        uint16_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size = 10000);
     // The batch configured is reset between network groups
     hailo_status set_context_action_list_timestamp_batch(uint16_t batch_index);
     hailo_status set_context_switch_breakpoint(uint8_t breakpoint_id, bool break_at_any_network_group_index,
         uint8_t network_group_index, bool break_at_any_batch_index, uint16_t batch_index,  bool break_at_any_context_index,
-        uint8_t context_index, bool break_at_any_action_index, uint16_t action_index);
+        uint16_t context_index, bool break_at_any_action_index, uint16_t action_index);
     hailo_status continue_context_switch_breakpoint(uint8_t breakpoint_id);
     hailo_status clear_context_switch_breakpoint(uint8_t breakpoint_id);
     Expected<uint8_t> get_context_switch_breakpoint_status(uint8_t breakpoint_id);
diff --git a/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp b/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp
new file mode 100644
index 00000000..3eb8c69c
--- /dev/null
+++ b/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp
@@ -0,0 +1,95 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file dma_mapped_buffer.hpp
+ * @brief Object that keeps DMA mapping to some device/vdevice alive.
+ **/
+
+#ifndef _HAILO_DMA_MAPPED_BUFFER_HPP_
+#define _HAILO_DMA_MAPPED_BUFFER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/vdevice.hpp"
+#include "hailo/device.hpp"
+#include "hailo/expected.hpp"
+
+namespace hailort
+{
+
+/*!
+ * \class DmaMappedBuffer
+ * \brief A wrapper class for mapping and unmapping buffers using VDevice::dma_map and VDevice::dma_unmap (or their
+ * variants for Device).
+ *
+ * The DmaMappedBuffer class provides a convenient way to keep a DMA mapping on a buffer active.
+ * It encapsulates the functionality of mapping and unmapping buffers using VDevice::dma_map and
+ * VDevice::dma_unmap, as well as their variants for Device.
+ *
+ * \note The buffer pointed to by address cannot be released until this object is destroyed.
+ *
+ * Example:
+ * \code{.cpp}
+ * // Create a DmaMappedBuffer object for a VDevice
+ * void* user_address = ...;
+ * size_t size = ...;
+ * hailo_dma_buffer_direction_t direction = ...;
+ * Expected<DmaMappedBuffer> mapped_buffer = DmaMappedBuffer::create(vdevice, user_address, size, direction);
+ * if (!mapped_buffer.has_value()) {
+ *     // Handle error
+ * } else {
+ *     // Use the mapped buffer
+ * }
+ * \endcode
+ */
+class HAILORTAPI DmaMappedBuffer final {
+public:
+    /**
+     * Creates a DmaMappedBuffer object for a VDevice.
+     *
+     * @param vdevice       The VDevice object to use for mapping the buffer.
+     * @param user_address  The user address of the buffer to be mapped.
+     * @param size          The size of the buffer to be mapped.
+     * @param direction     The direction of the DMA transfer.
+     *
+     * @return An Expected object containing the created DmaMappedBuffer on success, or an error on failure.
+     */
+    static Expected<DmaMappedBuffer> create(VDevice &vdevice, void *user_address, size_t size,
+        hailo_dma_buffer_direction_t direction);
+
+    /**
+     * Creates a DmaMappedBuffer object for a Device.
+     *
+     * @param device        The Device object to use for mapping the buffer.
+     * @param user_address  The user address of the buffer to be mapped.
+     * @param size          The size of the buffer to be mapped.
+     * @param direction     The direction of the DMA transfer.
+     *
+     * @return An Expected object containing the created DmaMappedBuffer on success, or an error on failure.
+     */
+    static Expected<DmaMappedBuffer> create(Device &device, void *user_address, size_t size,
+        hailo_dma_buffer_direction_t direction);
+
+    /**
+     * The destructor automatically unmaps the buffer.
+     */
+    ~DmaMappedBuffer();
+
+    DmaMappedBuffer(const DmaMappedBuffer &) = delete;
+    DmaMappedBuffer &operator=(const DmaMappedBuffer &) = delete;
+
+
+    DmaMappedBuffer(DmaMappedBuffer &&other);
+    DmaMappedBuffer &operator=(DmaMappedBuffer &&other);
+
+private:
+    class Impl;
+    explicit DmaMappedBuffer(std::unique_ptr<Impl> impl);
+
+    std::unique_ptr<Impl> m_impl;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_DMA_MAPPED_BUFFER_HPP_ */
diff --git a/hailort/libhailort/include/hailo/event.hpp b/hailort/libhailort/include/hailo/event.hpp
index 1dc82611..c013ea78 100644
--- a/hailort/libhailort/include/hailo/event.hpp
+++ b/hailort/libhailort/include/hailo/event.hpp
@@ -129,7 +129,7 @@ class HAILORTAPI Semaphore : public Waitable
     using Waitable::Waitable;
 
     static Expected<Semaphore> create(uint32_t initial_count);
-    static SemaphorePtr create_shared(uint32_t initial_count);
+    static Expected<SemaphorePtr> create_shared(uint32_t initial_count);
 
     virtual hailo_status signal() override;
     virtual bool is_auto_reset() override;
diff --git a/hailort/libhailort/include/hailo/expected.hpp b/hailort/libhailort/include/hailo/expected.hpp
index d911539d..13dcaff2 100644
--- a/hailort/libhailort/include/hailo/expected.hpp
+++ b/hailort/libhailort/include/hailo/expected.hpp
@@ -186,6 +186,8 @@ class Unexpected final
         m_status(status)
     {}
 
+    operator hailo_status() { return m_status; }
+
     hailo_status m_status;
 };
 
@@ -217,6 +219,17 @@ class Expected final
     template<class U>
     friend class Expected;
 
+    /**
+     * Construct a new Expected<T> from an Unexpected status.
+     *
+     * NOTE: Asserting that status is not HAILO_SUCCESS if NDEBUG is not defined.
+     */
+    Expected(Unexpected unexpected) :
+        m_status(unexpected.m_status)
+    {
+        assert(unexpected.m_status != HAILO_SUCCESS);
+    }
+
     /**
      * Default constructor
      * 
@@ -335,17 +348,6 @@ class Expected final
         m_status(HAILO_SUCCESS)
     {}
 
-    /**
-     * Construct a new Expected<T> from an Unexpected status.
-     * 
-     * NOTE: Asserting that status is not HAILO_SUCCESS if NDEBUG is not defined.
-     */
-    Expected(const Unexpected &unexpected) :
-        m_status(unexpected.m_status)
-    {
-        assert(unexpected.m_status != HAILO_SUCCESS);
-    }
-
     Expected<T>& operator=(const Expected<T> &other) = delete;
     Expected<T>& operator=(Expected<T> &&other) noexcept = delete;
     Expected<T>& operator=(const T &other) = delete;
diff --git a/hailort/libhailort/include/hailo/hailort.h b/hailort/libhailort/include/hailo/hailort.h
index d218b229..66ff9451 100644
--- a/hailort/libhailort/include/hailo/hailort.h
+++ b/hailort/libhailort/include/hailo/hailort.h
@@ -76,6 +76,8 @@ extern "C" {
 #define HAILO_SCHEDULER_PRIORITY_MIN (0)
 
 #define MAX_NUMBER_OF_PLANES (4)
+#define NUMBER_OF_PLANES_NV12_NV21 (2)
+#define NUMBER_OF_PLANES_I420 (3)
 
 typedef float float32_t;
 typedef double float64_t;
@@ -145,8 +147,8 @@ typedef uint16_t nms_bbox_counter_t;
     HAILO_STATUS__X(59, HAILO_THREAD_NOT_ACTIVATED                    /*!< The given thread has not been activated */)\
     HAILO_STATUS__X(60, HAILO_THREAD_NOT_JOINABLE                     /*!< The given thread is not joinable */)\
     HAILO_STATUS__X(61, HAILO_NOT_FOUND                               /*!< Could not find element */)\
-    HAILO_STATUS__X(62, HAILO_STREAM_ABORTED_BY_HW                    /*!< Stream aborted due to an external event */)\
-    HAILO_STATUS__X(63, HAILO_STREAM_ABORTED_BY_USER                  /*!< Stream recv/send was aborted */)\
+    HAILO_STATUS__X(62, HAILO_RESERVED_STATUS                         /*!< Reserved for future use */)\
+    HAILO_STATUS__X(63, HAILO_STREAM_ABORT                            /*!< Stream recv/send was aborted */)\
     HAILO_STATUS__X(64, HAILO_PCIE_DRIVER_NOT_INSTALLED               /*!< Pcie driver is not installed */)\
     HAILO_STATUS__X(65, HAILO_NOT_AVAILABLE                           /*!< Component is not available */)\
     HAILO_STATUS__X(66, HAILO_TRAFFIC_CONTROL_FAILURE                 /*!< Traffic control failure */)\
@@ -167,6 +169,7 @@ typedef uint16_t nms_bbox_counter_t;
     HAILO_STATUS__X(81, HAILO_OUT_OF_HOST_CMA_MEMORY                  /*!< Cannot allocate more CMA memory at host */)\
     HAILO_STATUS__X(82, HAILO_QUEUE_IS_FULL                           /*!< Cannot push more items into the queue */)\
     HAILO_STATUS__X(83, HAILO_DMA_MAPPING_ALREADY_EXISTS              /*!< DMA mapping already exists */)\
+    HAILO_STATUS__X(84, HAILO_CANT_MEET_BUFFER_REQUIREMENTS           /*!< can't meet buffer requirements */)\
 
 typedef enum {
 #define HAILO_STATUS__X(value, name) name = value,
@@ -180,8 +183,7 @@ typedef enum {
     HAILO_STATUS_MAX_ENUM                       = HAILO_MAX_ENUM
 } hailo_status;
 
-#define HAILO_STREAM_ABORTED HAILO_STREAM_ABORTED_BY_HW /* 'HAILO_STREAM_ABORTED' is deprecated. One should use 'HAILO_STREAM_ABORTED_BY_HW' */
-#define HAILO_STREAM_INTERNAL_ABORT HAILO_STREAM_ABORTED_BY_USER /* 'HAILO_STREAM_INTERNAL_ABORT' is deprecated. One should use 'HAILO_STREAM_ABORTED_BY_USER' */
+#define HAILO_STREAM_ABORTED_BY_USER HAILO_STREAM_ABORT /* 'HAILO_STREAM_ABORTED_BY_USER' is deprecated. One should use 'HAILO_STREAM_ABORT' */
 
 /** HailoRT library version */
 typedef struct {
@@ -632,8 +634,8 @@ typedef enum {
      *      For each class (::hailo_nms_shape_t.number_of_classes), the layout is
      *          \code
      *          struct (packed) {
-     *              uint16_t/float32_t bbox_count;
-     *              hailo_bbox_t/hailo_bbox_float32_t bbox[bbox_count];
+     *              float32_t bbox_count;
+     *              hailo_bbox_float32_t bbox[bbox_count];
      *          };
      *          \endcode
      *
@@ -815,20 +817,24 @@ typedef enum {
     HAILO_STREAM_FLAGS_MAX_ENUM     = HAILO_MAX_ENUM
 } hailo_stream_flags_t;
 
-// ************************************* NOTE - START ************************************* //
-// Dma buffer allocation isn't currently supported and is for internal use only             //
-// **************************************************************************************** //
-// TODO: remove hailo_dma_buffer_direction_t (HRT-12391)
 /** Hailo dma buffer direction */
 typedef enum {
+    /** Buffers sent from the host (H) to the device (D). Used for input streams */
     HAILO_DMA_BUFFER_DIRECTION_H2D    = 0,
+
+    /** Buffers received from the device (D) to the host (H). Used for output streams */
     HAILO_DMA_BUFFER_DIRECTION_D2H    = 1,
+
+    /** Buffers can be used both send to the device and received from the device */
     HAILO_DMA_BUFFER_DIRECTION_BOTH   = 2,
 
     /** Max enum value to maintain ABI Integrity */
     HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM  = HAILO_MAX_ENUM
 } hailo_dma_buffer_direction_t;
 
+// ************************************* NOTE - START ************************************* //
+// Dma buffer allocation isn't currently supported and is for internal use only             //
+// **************************************************************************************** //
 /** Hailo buffer flags */
 typedef enum {
     HAILO_BUFFER_FLAGS_NONE         = 0,        /*!< No flags - heap allocated buffer */
@@ -838,31 +844,9 @@ typedef enum {
     HAILO_BUFFER_FLAGS_MAX_ENUM     = HAILO_MAX_ENUM
 } hailo_buffer_flags_t;
 
-/** Hailo buffer heap parameters */
-typedef struct {
-    EMPTY_STRUCT_PLACEHOLDER
-} hailo_buffer_heap_params_t;
-
-// Hailo buffer dma mapping parameters.
-// - If device is not NULL, the resulting buffer created by hailo_allocate_buffer will be mapped to the device.
-// - If vdevice is not NULL, the resulting buffer created by hailo_allocate_buffer will be mapped to all the
-//   underlying devices held be vdevice.
-// - If both device and vdevice are null, the resulting buffer created by hailo_allocate_buffer will be lazily
-//   mapped upon the first async transfer (i.e. when the buffer is passed to hailo_stream_read_raw_buffer_async
-//   or hailo_stream_write_raw_buffer_async).
-typedef struct {
-    hailo_device device;
-    hailo_vdevice vdevice;
-    hailo_dma_buffer_direction_t direction;
-} hailo_buffer_dma_mapping_params_t;
-
 /** Hailo buffer parameters */
 typedef struct {
     hailo_buffer_flags_t flags;
-    union {
-        hailo_buffer_heap_params_t heap_params;
-        hailo_buffer_dma_mapping_params_t dma_mapping_params;
-    };
 } hailo_buffer_parameters_t;
 // ************************************** NOTE - END ************************************** //
 // Dma buffer allocation isn't currently supported and is for internal use only             //
@@ -1229,12 +1213,23 @@ typedef struct {
     uint32_t features;
 } hailo_3d_image_shape_t;
 
+typedef enum
+{
+  HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR,
+  HAILO_PIX_BUFFER_MEMORY_TYPE_DMABUF,
+} hailo_pix_buffer_memory_type_t;
+
 /** image buffer plane */
 typedef struct {
     /** actual data */
     uint32_t bytes_used;
     uint32_t plane_size;
-    void *user_ptr;
+    /* Union in case the buffer is a user buffer or DMA buffer */
+    union
+    {
+        void *user_ptr;
+        int fd;
+    };
 } hailo_pix_buffer_plane_t;
 
 /** image buffer */
@@ -1242,8 +1237,15 @@ typedef struct {
     uint32_t index;
     hailo_pix_buffer_plane_t planes[MAX_NUMBER_OF_PLANES];
     uint32_t number_of_planes;
+    hailo_pix_buffer_memory_type_t memory_type;
 } hailo_pix_buffer_t;
 
+/** dma buffer - intended for use with Linux's dma-buf sub system */
+typedef struct {
+    int fd;
+    size_t size;
+} hailo_dma_buffer_t;
+
 typedef struct {
     uint32_t class_group_index;
     char original_name[HAILO_MAX_STREAM_NAME_SIZE];
@@ -1290,8 +1292,11 @@ typedef struct {
     uint32_t number_of_classes;
     /** Maximum amount of bboxes per nms class */
     uint32_t max_bboxes_per_class;
-    /** Maximum mask size */
-    uint32_t max_mask_size;
+    /** Maximum accumulated mask size for all of the detections in a frame.
+     *  Used only with 'HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK' format order.
+     *  The default value is (`input_image_size` * 2)
+     */
+    uint32_t max_accumulated_mask_size;
 } hailo_nms_shape_t;
 
 #pragma pack(push, 1)
@@ -1354,7 +1359,7 @@ typedef struct {
     /**
      * Status of the async transfer:
      *  - ::HAILO_SUCCESS - The transfer is complete.
-     *  - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation).
+     *  - ::HAILO_STREAM_ABORT - The transfer was canceled (can happen after network deactivation).
      *  - Any other ::hailo_status on unexpected errors.
      */
     hailo_status status;
@@ -1382,7 +1387,7 @@ typedef struct {
     /**
      * Status of the async transfer:
      *  - ::HAILO_SUCCESS - The transfer is complete.
-     *  - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation).
+     *  - ::HAILO_STREAM_ABORT - The transfer was canceled (can happen after network deactivation).
      *  - Any other ::hailo_status on unexpected errors.
      */
     hailo_status status;
@@ -1625,7 +1630,7 @@ typedef struct {
 typedef struct {
     uint8_t network_group_index;
     uint16_t batch_index;
-    uint8_t context_index;
+    uint16_t context_index;
     uint16_t action_index;
 } hailo_context_switch_breakpoint_reached_message_t;
 
@@ -1643,7 +1648,7 @@ typedef struct {
     uint32_t exit_status;
     uint8_t network_group_index;
     uint16_t batch_index;
-    uint8_t context_index;
+    uint16_t context_index;
     uint16_t action_index;
 } hailo_context_switch_run_time_error_message_t;
 
@@ -2811,7 +2816,7 @@ HAILORTAPI hailo_status hailo_network_group_get_output_stream_infos(hailo_config
 
 /**
  * Shutdown a given network group. Makes sure all ongoing async operations are canceled. All async callbacks
- * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORTED_BY_USER.
+ * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORT.
  * Any resources attached to the network group may be released after function returns.
  *
  * @param[in]  network_group                NetworkGroup to be shutdown.
@@ -2878,16 +2883,16 @@ HAILORTAPI hailo_status hailo_get_latency_measurement(hailo_configured_network_g
     const char *network_name, hailo_latency_measurement_result_t *result);
 
 /**
- * Sets the maximum time period that may pass before getting run time from the scheduler,
- *  even without reaching the minimum required send requests (e.g. threshold - see hailo_set_scheduler_threshold()),
- *  as long as at least one send request has been sent.
- *  This time period is measured since the last time the scheduler gave this network group run time.
+ * Sets the maximum time period that may pass before receiving run time from the scheduler.
+ * This will occur providing at least one send request has been sent, there is no minimum requirement for send
+ *  requests, (e.g. threshold - see set_scheduler_threshold()).
  *
  * @param[in]  configured_network_group     NetworkGroup for which to set the scheduler timeout.
  * @param[in]  timeout_ms                   Timeout in milliseconds.
  * @param[in]  network_name                 Network name for which to set the timeout.
  *                                          If NULL is passed, the timeout will be set for all the networks in the network group.
  * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ * @note The new time period will be measured after the previous time the scheduler allocated run time to this network group.
  * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
  * @note The default timeout is 0ms.
  * @note Currently, setting the timeout for a specific network is not supported.
@@ -2942,13 +2947,86 @@ HAILORTAPI hailo_status hailo_set_scheduler_priority(hailo_configured_network_gr
 // Free returned buffer via hailo_free_buffer
 HAILORTAPI hailo_status hailo_allocate_buffer(size_t size, const hailo_buffer_parameters_t *allocation_params, void **buffer_out);
 HAILORTAPI hailo_status hailo_free_buffer(void *buffer);
-// Maps buffer to dma. Free mapping by calling hailo_dma_unmap_buffer_from_device and then free buffer as needed
-// If buffer has already been mapped to device, then HAILO_DMA_MAPPING_ALREADY_EXISTS shall be returned
-HAILORTAPI hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction);
-HAILORTAPI hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction);
 // ************************************** NOTE - END ************************************** //
 // Dma buffer allocation isn't currently supported and is for internal use only             //
 // **************************************************************************************** //
+
+/**
+ * Maps the buffer pointed to by @a address for DMA transfers to/from the given @a device, in the specified
+ * @a data_direction.
+ * DMA mapping of buffers in advance may improve the performance of async API. This improvement will become
+ * apparent when the buffer is reused multiple times across different async operations.
+ * For low level API (aka ::hailo_input_stream or ::hailo_output_stream), buffers passed to
+ * ::hailo_stream_write_raw_buffer_async and ::hailo_stream_read_raw_buffer_async can be mapped.
+ *
+ * @param[in] device        A ::hailo_device object.
+ * @param[in] address       The address of the buffer to be mapped
+ * @param[in] size          The buffer's size in bytes
+ * @param[in] direction     The direction of the mapping. For input streams, use `HAILO_DMA_BUFFER_DIRECTION_H2D`
+ *                          and for output streams, use `HAILO_DMA_BUFFER_DIRECTION_D2H`.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ *
+ * @note The DMA mapping will be released upon calling ::hailo_device_dma_unmap_buffer with @a address, @a size and
+ *       @a data_direction, or when the @a device object is destroyed.
+ * @note The buffer pointed to by @a address cannot be released until it is unmapped (via
+ *       ::hailo_device_dma_unmap_buffer or ::hailo_release_device).
+ */
+HAILORTAPI hailo_status hailo_device_dma_map_buffer(hailo_device device, void *address, size_t size,
+    hailo_dma_buffer_direction_t direction);
+
+/**
+ * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from the given @a device, in the direction
+ * @a direction.
+ *
+ * @param[in] device        A ::hailo_device object.
+ * @param[in] address       The address of the buffer to be un-mapped.
+ * @param[in] size          The buffer's size in bytes.
+ * @param[in] direction     The direction of the mapping.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_device_dma_unmap_buffer(hailo_device device, void *address, size_t size,
+    hailo_dma_buffer_direction_t direction);
+
+/**
+ * Maps the buffer pointed to by @a address for DMA transfers to/from the given @a vdevice, in the specified
+ * @a data_direction.
+ * DMA mapping of buffers in advance may improve the performance of async API. This improvement will become
+ * apparent when the buffer is reused multiple times across different async operations.
+ * For low level API (aka ::hailo_input_stream or ::hailo_output_stream), buffers passed to
+ * ::hailo_stream_write_raw_buffer_async and ::hailo_stream_read_raw_buffer_async can be mapped.
+ *
+ * @param[in] vdevice       A ::hailo_vdevice object.
+ * @param[in] address       The address of the buffer to be mapped
+ * @param[in] size          The buffer's size in bytes
+ * @param[in] direction     The direction of the mapping. For input streams, use `HAILO_DMA_BUFFER_DIRECTION_H2D`
+ *                          and for output streams, use `HAILO_DMA_BUFFER_DIRECTION_D2H`.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ *
+ * @note The DMA mapping will be released upon calling ::hailo_vdevice_dma_unmap_buffer with @a address, @a size and
+ *       @a data_direction, or when the @a vdevice object is destroyed.
+ * @note The buffer pointed to by @a address cannot be released until it is unmapped (via
+ *       ::hailo_vdevice_dma_unmap_buffer or ::hailo_release_vdevice).
+ */
+HAILORTAPI hailo_status hailo_vdevice_dma_map_buffer(hailo_vdevice vdevice, void *address, size_t size,
+    hailo_dma_buffer_direction_t direction);
+
+/**
+ * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from the given @a vdevice, in the direction
+ * @a direction.
+ *
+ * @param[in] vdevice       A ::hailo_vdevice object.
+ * @param[in] address       The address of the buffer to be un-mapped.
+ * @param[in] size          The buffer's size in bytes.
+ * @param[in] direction     The direction of the mapping.
+ *
+ * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ */
+HAILORTAPI hailo_status hailo_vdevice_dma_unmap_buffer(hailo_vdevice vdevice, void *address, size_t size,
+    hailo_dma_buffer_direction_t direction);
+
 /** @} */ // end of group_buffer_functions
 
 /** @defgroup group_stream_functions Stream functions
@@ -3695,6 +3773,7 @@ HAILORTAPI hailo_status hailo_vstream_write_raw_buffer(hailo_input_vstream input
  *                             pointers to the planes to where the data to
  *                             be sent to the device is stored.
  * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+ * @note Currently only support memory_type field of buffer to be HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR.
  */
 HAILORTAPI hailo_status hailo_vstream_write_pix_buffer(hailo_input_vstream input_vstream, const hailo_pix_buffer_t *buffer);
 
diff --git a/hailort/libhailort/include/hailo/hailort.hpp b/hailort/libhailort/include/hailo/hailort.hpp
index 1c85ac02..cb842657 100644
--- a/hailort/libhailort/include/hailo/hailort.hpp
+++ b/hailort/libhailort/include/hailo/hailort.hpp
@@ -30,5 +30,6 @@
 #include "hailo/network_rate_calculator.hpp"
 #include "hailo/quantization.hpp"
 #include "hailo/hailort_defaults.hpp"
+#include "hailo/dma_mapped_buffer.hpp"
 
 #endif /* _HAILORT_HPP_ */
diff --git a/hailort/libhailort/include/hailo/hailort_common.hpp b/hailort/libhailort/include/hailo/hailort_common.hpp
index 203c9a63..85076f3a 100644
--- a/hailort/libhailort/include/hailo/hailort_common.hpp
+++ b/hailort/libhailort/include/hailo/hailort_common.hpp
@@ -12,6 +12,7 @@
 
 #include "hailo/hailort.h"
 #include "hailo/expected.hpp"
+#include "hailo/buffer.hpp"
 
 #include <cmath>
 #include <chrono>
@@ -34,7 +35,7 @@ class HAILORTAPI HailoRTCommon final
     static_assert(sizeof(hailo_bbox_t) / sizeof(uint16_t) == sizeof(hailo_bbox_float32_t) / sizeof(float32_t),
         "Mismatch bbox params size");
     static const uint32_t BBOX_PARAMS = sizeof(hailo_bbox_t) / sizeof(uint16_t);
-    static const uint32_t MASK_PARAMS = 1; // mask_size
+    static const uint32_t DETECTION_WITH_BYTE_MASK_SIZE = sizeof(hailo_detection_with_byte_mask_t);
     static const uint32_t MAX_DEFUSED_LAYER_COUNT = 9;
     static const size_t HW_DATA_ALIGNMENT = 8;
     static const uint32_t MUX_INFO_COUNT = 32;
@@ -84,12 +85,17 @@ class HAILORTAPI HailoRTCommon final
      * @param[in] alignment       Returned number should be aligned to this parameter.
      * @return aligned number
      */
-    template<typename T>
-    static constexpr T align_to(T num, T alignment) {
+    template<typename T, typename U>
+    static constexpr T align_to(T num, U alignment) {
         auto remainder = num % alignment;
         return remainder == 0 ? num : num + (alignment - remainder);
     }
 
+    static void *align_to(void *addr, size_t alignment)
+    {
+        return reinterpret_cast<void *>(align_to(reinterpret_cast<uintptr_t>(addr), alignment));
+    }
+
     /**
      * Gets the shape size.
      *
@@ -241,7 +247,7 @@ class HAILORTAPI HailoRTCommon final
         case HAILO_FORMAT_ORDER_HAILO_YYYYUV:
             return "YYYYUV";
         case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK:
-            return "HAILO NMS WITH METADATA";
+            return "HAILO NMS WITH BYTE MASK";
         default:
             return "Nan";
         }
@@ -280,23 +286,18 @@ class HAILORTAPI HailoRTCommon final
     static uint32_t get_nms_host_frame_size(const hailo_nms_shape_t &nms_shape, const hailo_format_t &format);
 
     /**
-     * Gets HAILO_NMS_WITH_BYTE_MASK host shape size in bytes by nms_shape and buffer format.
+     * Gets `HAILO_NMS_WITH_BYTE_MASK` host frame size in bytes by nms_shape.
      *
      * @param[in] nms_shape             The NMS shape to get size from.
-     * @param[in] format                A ::hailo_format_t object.
-     * @return The HAILO_NMS_WITH_BYTE_MASK host shape size.
+     * @return The HAILO_NMS_WITH_BYTE_MASK host frame size.
      */
-    static constexpr uint32_t get_nms_with_byte_mask_host_shape_size(const hailo_nms_shape_t &nms_shape, const hailo_format_t &format)
+    static constexpr uint32_t get_nms_with_byte_mask_host_frame_size(const hailo_nms_shape_t &nms_shape)
     {
-        // Assuming 1 byte per pixel for the mask
-        auto bbox_size = BBOX_PARAMS + MASK_PARAMS + nms_shape.max_mask_size;
-        const uint32_t size_per_class = 1 + (bbox_size * nms_shape.max_bboxes_per_class);
-        double shape_size = size_per_class * nms_shape.number_of_classes;
-        if ((shape_size * get_format_data_bytes(format)) < UINT32_MAX) {
-            return static_cast<uint32_t>(shape_size);
-        } else {
-            return UINT32_MAX / get_format_data_bytes(format);
-        }
+        // TODO: HRT-12035 - Change `max_bboxes_per_class` to `max_boxes`
+        auto max_detections = nms_shape.number_of_classes * nms_shape.max_bboxes_per_class;
+        auto max_detections_size = max_detections * DETECTION_WITH_BYTE_MASK_SIZE;
+        auto frame_size = max_detections_size + nms_shape.max_accumulated_mask_size;
+        return frame_size;
     }
 
     /**
@@ -411,8 +412,16 @@ class HAILORTAPI HailoRTCommon final
         return ((HAILO_FORMAT_ORDER_HAILO_NMS == order) || (HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == order));
     }
 
+    // TODO HRT-10073: change to supported features list
+    static bool is_hailo1x_device_type(const hailo_device_architecture_t dev_arch)
+    {
+        // Compare with HAILO1X device archs
+        return (HAILO_ARCH_HAILO15H == dev_arch) || (HAILO_ARCH_HAILO15M == dev_arch) || (HAILO_ARCH_PLUTO == dev_arch);
+    }
+
     static Expected<hailo_device_id_t> to_device_id(const std::string &device_id);
     static Expected<std::vector<hailo_device_id_t>> to_device_ids_vector(const std::vector<std::string> &device_ids_str);
+    static Expected<hailo_pix_buffer_t> as_hailo_pix_buffer(MemoryView &memory_view, hailo_format_order_t order);
 };
 
 #ifndef HAILO_EMULATOR
diff --git a/hailort/libhailort/include/hailo/hef.hpp b/hailort/libhailort/include/hailo/hef.hpp
index 3ddd8b2b..b44e6289 100644
--- a/hailort/libhailort/include/hailo/hef.hpp
+++ b/hailort/libhailort/include/hailo/hef.hpp
@@ -22,6 +22,9 @@
 namespace hailort
 {
 
+#define DEFAULT_NMS_NO_BURST_SIZE (1)
+#define DEFAULT_ACTUAL_BATCH_SIZE (1)
+
 /*! Hailo configure parameters per network_group. Analogical to hailo_configure_network_group_params_t */
 struct ConfigureNetworkParams
 {
@@ -466,6 +469,7 @@ class HAILORTAPI Hef final
     friend class ConfiguredNetworkGroupBase;
     friend class CoreOp;
     friend class VDeviceBase;
+    friend class InferModel;
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
     friend class HailoRtRpcClient;
diff --git a/hailort/libhailort/include/hailo/infer_model.hpp b/hailort/libhailort/include/hailo/infer_model.hpp
index 258cc895..c92995e8 100644
--- a/hailort/libhailort/include/hailo/infer_model.hpp
+++ b/hailort/libhailort/include/hailo/infer_model.hpp
@@ -23,6 +23,8 @@ namespace hailort
 
 class ConfiguredInferModelImpl;
 class AsyncInferRunnerImpl;
+
+/*! Asynchronous inference job representation is used to manage and control an inference job that is running asynchronously. */
 class HAILORTAPI AsyncInferJob
 {
 public:
@@ -34,7 +36,19 @@ class HAILORTAPI AsyncInferJob
     AsyncInferJob(AsyncInferJob &&other);
     AsyncInferJob &operator=(AsyncInferJob &&other);
 
+    /**
+     * Waits for the asynchronous inference job to finish.
+     *
+     * @param[in] timeout The maximum time to wait.
+     *
+     * @return A ::hailo_status indicating the status of the operation.
+     *  If the job finishes successfully within the timeout, ::HAILO_SUCCESS is returned. Otherwise, returns a ::hailo_status error
+     **/
     hailo_status wait(std::chrono::milliseconds timeout);
+
+    /**
+     * Detaches the job. Without detaching, the job's destructor will block until the job finishes.
+     **/
     void detach();
 
 private:
@@ -47,35 +61,115 @@ class HAILORTAPI AsyncInferJob
 };
 
 struct AsyncInferCompletionInfo;
+
+static const auto ASYNC_INFER_EMPTY_CALLBACK = [](const AsyncInferCompletionInfo&) {};
+
+/*! Configured infer_model that can be used to perform an asynchronous inference */
 class HAILORTAPI ConfiguredInferModel
 {
 public:
     ConfiguredInferModel() = default;
 
+    /** Represents an asynchronous infer request - holds the input and output buffers of the request */
     class HAILORTAPI Bindings
     {
     public:
         Bindings() = default;
 
+        /** Holds the input and output buffers of the Bindings infer request */
         class HAILORTAPI InferStream
         {
         public:
+            /**
+             * Sets the edge's buffer to a new one, of type MemoryView.
+             *
+             * @param[in] view      The new buffer to be set.
+             * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+             */
             hailo_status set_buffer(MemoryView view);
+
+            /**
+            * @return Upon success, returns Expected of the MemoryView buffer of the edge.
+            * Otherwise, returns Unexpected of ::hailo_status error.
+            * @note If buffer type is not MemoryView, will return ::HAILO_INVALID_OPERATION.
+            */
             Expected<MemoryView> get_buffer();
+
+            /**
+             * Sets the edge's buffer to a new one, of type hailo_pix_buffer_t.
+             *
+             * @param[in] pix_buffer      The new buffer to be set.
+             * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+             * @note Supported only for inputs.
+             * @note Currently only support memory_type field of buffer to be HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR.
+             */
             hailo_status set_pix_buffer(const hailo_pix_buffer_t &pix_buffer);
+
+            /**
+             * @return Upon success, returns Expected of the ::hailo_pix_buffer_t buffer of the edge.
+            * Otherwise, returns Unexpected of ::hailo_status error.
+            * @note If buffer type is not ::hailo_pix_buffer_t, will return ::HAILO_INVALID_OPERATION.
+            */
             Expected<hailo_pix_buffer_t> get_pix_buffer();
 
+            /**
+             * Sets the edge's buffer from a DMA buffer.
+             *
+             * @param[in] dma_buffer      The new buffer to be set.
+             * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+             * @note Supported on Linux only.
+             */
+            hailo_status set_dma_buffer(hailo_dma_buffer_t dma_buffer);
+
+            /**
+             * @return Upon success, returns Expected of the ::hailo_dma_buffer_t buffer of the edge.
+            * Otherwise, returns Unexpected of ::hailo_status error.
+            * @note If buffer type is not ::hailo_dma_buffer_t, will return ::HAILO_INVALID_OPERATION.
+            * @note Supported on Linux only.
+            */
+            Expected<hailo_dma_buffer_t> get_dma_buffer();
+
         private:
             friend class ConfiguredInferModelImpl;
+            friend class AsyncInferRunnerImpl;
 
             class Impl;
             InferStream(std::shared_ptr<Impl> pimpl);
             std::shared_ptr<Impl> m_pimpl;
         };
 
+        /**
+         * Returns the single input's InferStream object.
+         *
+         * @return Upon success, returns Expected of the single input's InferStream object. Otherwise, returns Unexpected of ::hailo_status error.
+         * @note If Bindings has multiple inputs, will return ::HAILO_INVALID_OPERATION.
+         *  In that case - use input(const std::string &name) instead.
+         */
         Expected<InferStream> input();
+
+        /**
+         * Returns the single output's InferStream object.
+         *
+         * @return Upon success, returns Expected of the single output's InferStream object. Otherwise, returns Unexpected of ::hailo_status error.
+         * @note If Bindings has multiple outputs, will return ::HAILO_INVALID_OPERATION.
+         *  In that case - use output(const std::string &name) instead.
+         */
         Expected<InferStream> output();
+
+        /**
+         * Gets an input's InferStream object.
+         *
+         * @param[in] name                    The name of the input edge.
+         * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error.
+         */
         Expected<InferStream> input(const std::string &name);
+
+        /**
+         * Gets an output's InferStream object.
+         *
+         * @param[in] name                    The name of the output edge.
+         * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error.
+         */
         Expected<InferStream> output(const std::string &name);
 
     private:
@@ -88,19 +182,121 @@ class HAILORTAPI ConfiguredInferModel
         std::unordered_map<std::string, Bindings::InferStream> m_outputs;
     };
 
+    /**
+     * Creates a Bindings object.
+     *
+     * @return Upon success, returns Expected of Bindings. Otherwise, returns Unexpected of ::hailo_status error.
+     */
     Expected<Bindings> create_bindings();
+
+    /**
+     * Waits until the model is ready to launch a new asynchronous inference operation.
+     * The readiness of the model is determined by the ability to push buffers to the asynchronous inference pipeline.
+     *
+     * @param[in] timeout           Amount of time to wait until the model is ready in milliseconds.
+     *
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise:
+     *           - If @a timeout has passed and the model is not ready, returns ::HAILO_TIMEOUT.
+     *           - In any other error case, returns ::hailo_status error.
+     */
     hailo_status wait_for_async_ready(std::chrono::milliseconds timeout);
+
+    /**
+     * Activates hailo device inner-resources for context_switch inference.
+     *
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns ::hailo_status error.
+     * @note Calling this function is invalid in case scheduler is enabled.
+     */
     hailo_status activate();
+
+    /**
+     * Deactivates hailo device inner-resources for context_switch inference.
+     * @note Calling this function is invalid in case scheduler is enabled.
+     */
     void deactivate();
+
+    /**
+     * Launches a synchronous inference operation with the provided bindings.
+     *
+     * @param[in] bindings           The bindings for the inputs and outputs of the model.
+     * @param[in] timeout            The maximum amount of time to wait for the inference operation to complete.
+     *
+     * @return Upon success, returns ::HAILO_SUCCESS.
+     *  Otherwise, returns Unexpected of ::hailo_status error.
+     */
     hailo_status run(Bindings bindings, std::chrono::milliseconds timeout);
+
+    /**
+     * Launches an asynchronous inference operation with the provided bindings.
+     * The completion of the operation is notified through the provided callback function.
+     *
+     * @param[in] bindings           The bindings for the inputs and outputs of the model.
+     * @param[in] callback           The function to be called upon completion of the asynchronous inference operation.
+     *
+     * @return Upon success, returns an instance of Expected<AsyncInferJob> representing the launched job.
+     *  Otherwise, returns Unexpected of ::hailo_status error.
+     * @note @a callback should execute as quickly as possible.
+     */
     Expected<AsyncInferJob> run_async(Bindings bindings,
-        std::function<void(const AsyncInferCompletionInfo &)> callback = [] (const AsyncInferCompletionInfo &) {});
-    Expected<LatencyMeasurementResult> get_hw_latency_measurement(const std::string &network_name = "");
+        std::function<void(const AsyncInferCompletionInfo &)> callback = ASYNC_INFER_EMPTY_CALLBACK);
+
+    /**
+    * @return Upon success, returns Expected of LatencyMeasurementResult object containing the output latency result.
+    *  Otherwise, returns Unexpected of ::hailo_status error.
+    */
+    Expected<LatencyMeasurementResult> get_hw_latency_measurement();
+
+    /**
+     * Sets the maximum time period that may pass before receiving run time from the scheduler.
+     * This will occur providing at least one send request has been sent, there is no minimum requirement for send
+     *  requests, (e.g. threshold - see set_scheduler_threshold()).
+     *
+     * @param[in]  timeout              Timeout in milliseconds.
+     *
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note The new time period will be measured after the previous time the scheduler allocated run time to this network group.
+     * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
+     * @note The default timeout is 0ms.
+     */
     hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout);
+
+    /**
+     * Sets the minimum number of send requests required before the network is considered ready to get run time from the scheduler.
+     *
+     * @param[in]  threshold            Threshold in number of frames.
+     *
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
+     * @note The default threshold is 1.
+     * @note If at least one send request has been sent, but the threshold is not reached within a set time period (e.g. timeout - see
+     *  hailo_set_scheduler_timeout()), the scheduler will consider the network ready regardless.
+     */
     hailo_status set_scheduler_threshold(uint32_t threshold);
+
+    /**
+     * Sets the priority of the network.
+     * When the network group scheduler will choose the next network, networks with higher priority will be prioritized in the selection.
+     * bigger number represent higher priority.
+     *
+     * @param[in]  priority             Priority as a number between HAILO_SCHEDULER_PRIORITY_MIN - HAILO_SCHEDULER_PRIORITY_MAX.
+     *
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
+     * @note The default priority is HAILO_SCHEDULER_PRIORITY_NORMAL.
+     */
     hailo_status set_scheduler_priority(uint8_t priority);
+
+    /**
+     * @return Upon success, returns Expected of a the number of inferences that can be queued simultaneously for execution.
+     *  Otherwise, returns Unexpected of ::hailo_status error.
+     */
     Expected<size_t> get_async_queue_size();
 
+    /**
+     * Shuts the inference down. After calling this method, the model is no longer usable.
+     */
+    void shutdown();
+
 private:
     friend class InferModel;
 
@@ -109,39 +305,134 @@ class HAILORTAPI ConfiguredInferModel
     std::shared_ptr<ConfiguredInferModelImpl> m_pimpl;
 };
 
+/**
+ * Context passed to the callback function after the asynchronous inference operation was completed or has failed.
+ */
 struct HAILORTAPI AsyncInferCompletionInfo
 {
-    AsyncInferCompletionInfo(ConfiguredInferModel::Bindings _bindings, hailo_status _status) : bindings(_bindings), status(_status)
+    /**
+     * Constructor for AsyncInferCompletionInfo.
+     *
+     * @param[in] _status The status of the inference operation.
+     */
+    AsyncInferCompletionInfo(hailo_status _status) : status(_status)
     {
     }
 
-    ConfiguredInferModel::Bindings bindings;
+    /**
+     * Status of the asynchronous inference operation.
+     * - ::HAILO_SUCCESS - When the inference operation is complete successfully.
+     * - Any other ::hailo_status on unexpected errors.
+     */
     hailo_status status;
 };
 
+/**
+ * Contains all of the necessary information for configuring the network for inference.
+ * This class is used to set up the model for inference and includes methods for setting and getting the model's parameters.
+ * By calling the configure function, the user can create a ConfiguredInferModel object, which is used to run inference.
+ */
 class HAILORTAPI InferModel final
 {
 public:
     ~InferModel() = default;
 
+    /**
+     * Represents the parameters of a stream.
+     * In default, the stream's parameters are set to the default values of the model.
+     * The user can change the stream's parameters by calling the set_ functions.
+     */
     class HAILORTAPI InferStream
     {
     public:
-        // TODO: explain that the getters return what the user defined with set_ functions
+        /**
+         * @return The name of the stream.
+         */
         const std::string name() const;
+
+        /**
+         * @return The shape of the image that the stream will use for inference.
+         */
         hailo_3d_image_shape_t shape() const;
+
+        /**
+         * @return The format that the stream will use for inference.
+         */
         hailo_format_t format() const;
+
+        /**
+         * @return The size in bytes of a frame that the stream will use for inference.
+         */
         size_t get_frame_size() const;
+
+        /**
+         * @return upon success, an Expected of hailo_nms_shape_t, the NMS shape for the stream.
+         *  Otherwise, returns Unexpected of ::hailo_status error.
+         * @note In case NMS is disabled, returns an unexpected of ::HAILO_INVALID_OPERATION.
+         */
         Expected<hailo_nms_shape_t> get_nms_shape() const;
-        
+
+        /**
+         * Sets the format type of the stream.
+         * This method is used to specify the format type that the stream will use for inference.
+         *
+         * @param[in] type The format type to be set for the stream. This should be a value of the hailo_format_type_t enum.
+         */
         void set_format_type(hailo_format_type_t type);
+
+        /**
+         * Sets the format order of the stream.
+         * This method is used to specify the format order that the stream will use for inference.
+         *
+         * @param[in] order The format order to be set for the stream. This should be a value of the hailo_format_order_t enum.
+         */
         void set_format_order(hailo_format_order_t order);
+
+        /**
+         * Retrieves the quantization information for all layers in the model.
+         * @return A vector of hailo_quant_info_t structures, each representing the quantization information for a layer in the model.
+         */
         std::vector<hailo_quant_info_t> get_quant_infos() const;
+
+        /**
+         * Checks if Non-Maximum Suppression (NMS) is enabled for the model.
+         *
+         * @return True if NMS is enabled, false otherwise.
+         */
         bool is_nms() const;
+
+        /**
+         * Set NMS score threshold, used for filtering out candidates. Any box with score<TH is suppressed.
+         *
+         * @param[in] threshold        NMS score threshold to set.
+         */
         void set_nms_score_threshold(float32_t threshold);
+
+        /**
+         * Set NMS intersection over union overlap Threshold,
+         * used in the NMS iterative elimination process where potential duplicates of detected items are suppressed.
+         *
+         * @param[in] threshold        NMS IoU threshold to set.
+         */
         void set_nms_iou_threshold(float32_t threshold);
+
+        /**
+         * Set a limit for the maximum number of boxes per class.
+         *
+         * @param[in] max_proposals_per_class NMS max proposals per class to set.
+         */
         void set_nms_max_proposals_per_class(uint32_t max_proposals_per_class);
 
+        /**
+         * Set maximum accumulated mask size for all the detections in a frame.
+         *
+         * Note: Used in order to change the output buffer frame size,
+         * in cases where the output buffer is too small for all the segmentation detections.
+         *
+         * @param[in] max_accumulated_mask_size NMS max accumulated mask size.
+         */
+        void set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size);
+
     private:
         friend class InferModel;
         friend class VDevice;
@@ -152,25 +443,102 @@ class HAILORTAPI InferModel final
         std::shared_ptr<Impl> m_pimpl;
     };
 
+    /**
+     * @return A constant reference to the Hef object associated with this InferModel.
+     */
     const Hef &hef() const;
+
+    /**
+     * Sets the batch size of the InferModel.
+     *
+     * @param[in] batch_size      The new batch size to be set.
+     */
     void set_batch_size(uint16_t batch_size);
+
+    /**
+     * Sets the power mode of the InferModel.
+     * See ::hailo_power_mode_t for more information.
+     *
+     * @param[in] power_mode      The new power mode to be set.
+     */
     void set_power_mode(hailo_power_mode_t power_mode);
+
+    /**
+     * Sets the latency measurement flags of the InferModel.
+     * see ::hailo_latency_measurement_flags_t for more information.
+     *
+     * @param[in] latency      The new latency measurement flags to be set.
+     */
     void set_hw_latency_measurement_flags(hailo_latency_measurement_flags_t latency);
 
-    Expected<ConfiguredInferModel> configure(const std::string &network_name = "");
+    /**
+     * Configures the InferModel object. Also checks the validity of the configuration's formats.
+     *
+     * @return Upon success, returns Expected of ConfiguredInferModel, which can be used to perform an asynchronous inference.
+     *  Otherwise, returns Unexpected of ::hailo_status error.
+     * @note InferModel can be configured once.
+     */
+    Expected<ConfiguredInferModel> configure();
+
+    /**
+     * Returns the single input's InferStream object.
+     *
+     * @return Upon success, returns Expected of the single input's InferStream object. Otherwise, returns Unexpected of ::hailo_status error.
+     * @note If InferModel has multiple inputs, will return ::HAILO_INVALID_OPERATION.
+     *  In that case - use input(const std::string &name) instead.
+     */
     Expected<InferStream> input();
+
+    /**
+     * Returns the single output's InferStream object.
+     *
+     * @return Upon success, returns Expected of the single output's InferStream object. Otherwise, returns Unexpected of ::hailo_status error.
+     * @note If InferModel has multiple outputs, will return ::HAILO_INVALID_OPERATION.
+     *  In that case - use output(const std::string &name) instead.
+     */
     Expected<InferStream> output();
+
+    /**
+     * Gets an input's InferStream object.
+     *
+     * @param[in] name                    The name of the input edge.
+     * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error.
+     */
     Expected<InferStream> input(const std::string &name);
+
+    /**
+     * Gets an output's InferStream object.
+     *
+     * @param[in] name                    The name of the output edge.
+     * @return Upon success, returns Expected of the relevant InferStream object. Otherwise, returns a ::hailo_status error.
+     */
     Expected<InferStream> output(const std::string &name);
+
+    /**
+     * @return A constant reference to the vector of input InferStream objects, each representing an input edge.
+     */
     const std::vector<InferStream> &inputs() const;
+
+    /**
+     * @return A constant reference to the vector of output InferStream objects, each representing an output edge.
+     */
     const std::vector<InferStream> &outputs() const;
+
+    /**
+     * @return A constant reference to a vector of strings, each representing the name of an input stream.
+     */
     const std::vector<std::string> &get_input_names() const;
+
+    /**
+     * @return A constant reference to a vector of strings, each representing the name of an output stream.
+     */
     const std::vector<std::string> &get_output_names() const;
-    
+
     InferModel(InferModel &&);
 
     Expected<ConfiguredInferModel> configure_for_ut(std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner,
-        const std::vector<std::string> &input_names, const std::vector<std::string> &output_names);
+        const std::vector<std::string> &input_names, const std::vector<std::string> &output_names,
+        std::shared_ptr<ConfiguredNetworkGroup> net_group = nullptr);
 
 private:
     friend class VDevice;
diff --git a/hailort/libhailort/include/hailo/inference_pipeline.hpp b/hailort/libhailort/include/hailo/inference_pipeline.hpp
index a6811b26..e9b5be10 100644
--- a/hailort/libhailort/include/hailo/inference_pipeline.hpp
+++ b/hailort/libhailort/include/hailo/inference_pipeline.hpp
@@ -106,10 +106,23 @@ class HAILORTAPI InferVStreams final
      *
      * @param[in] max_proposals_per_class    NMS max proposals per class to set.
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     * @note This function will fail in cases where there is no output with NMS operations on the CPU.
+     * @note This function must be called before starting inference!
+     * This function will fail in cases where there is no output with NMS operations on the CPU.
      */
     hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class);
 
+    /**
+     * Set maximum accumulated mask size for all the detections in a frame.
+     *
+     * Note: Used in order to change the output buffer frame size,
+     * in cases where the output buffer is too small for all the segmentation detections.
+     *
+     * @param[in] max_accumulated_mask_size NMS max accumulated mask size.
+     * @note This function must be called before starting inference!
+     * This function will fail in cases where the output vstream has no NMS operations on the CPU.
+     */
+    hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size);
+
     InferVStreams(const InferVStreams &other) = delete;
     InferVStreams &operator=(const InferVStreams &other) = delete;
     InferVStreams &operator=(InferVStreams &&other) = delete;
diff --git a/hailort/libhailort/include/hailo/network_group.hpp b/hailort/libhailort/include/hailo/network_group.hpp
index 9b765476..6d5e1708 100644
--- a/hailort/libhailort/include/hailo/network_group.hpp
+++ b/hailort/libhailort/include/hailo/network_group.hpp
@@ -66,7 +66,7 @@ struct HwInferResults {
 };
 /*@}*/
 
-using src_context_t = uint8_t;
+using src_context_t = uint16_t;
 using src_stream_index_t = uint8_t;
 using IntermediateBufferKey = std::pair<src_context_t, src_stream_index_t>;
 
@@ -239,7 +239,7 @@ class HAILORTAPI ConfiguredNetworkGroup
 
     /**
      * Shutdown the network group. Makes sure all ongoing async operations are canceled. All async callbacks
-     * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORTED_BY_USER.
+     * of transfers that have not been completed will be called with status ::HAILO_STREAM_ABORT.
      * Any resources attached to the network group may be released after function returns.
      *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
@@ -348,15 +348,15 @@ class HAILORTAPI ConfiguredNetworkGroup
     virtual bool is_scheduled() const = 0;
 
     /**
-     * Sets the maximum time period that may pass before getting run time from the scheduler,
-     *  even without reaching the minimum required send requests (e.g. threshold - see set_scheduler_threshold()),
-     *  as long as at least one send request has been sent.
-     *  This time period is measured since the last time the scheduler gave this network group run time.
+     * Sets the maximum time period that may pass before receiving run time from the scheduler.
+     * This will occur providing at least one send request has been sent, there is no minimum requirement for send
+     *  requests, (e.g. threshold - see set_scheduler_threshold()).
      *
      * @param[in]  timeout              Timeout in milliseconds.
      * @param[in]  network_name         Network name for which to set the timeout.
      *                                  If not passed, the timeout will be set for all the networks in the network group.
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note The new time period will be measured after the previous time the scheduler allocated run time to this network group.
      * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
      * @note The default timeout is 0ms.
      * @note Currently, setting the timeout for a specific network is not supported.
@@ -365,8 +365,6 @@ class HAILORTAPI ConfiguredNetworkGroup
 
     /**
      * Sets the minimum number of send requests required before the network is considered ready to get run time from the scheduler.
-     * If at least one send request has been sent, but the threshold is not reached within a set time period (e.g. timeout - see hailo_set_scheduler_timeout()),
-     *  the scheduler will consider the network ready regardless.
      *
      * @param[in]  threshold            Threshold in number of frames.
      * @param[in]  network_name         Network name for which to set the threshold.
@@ -374,6 +372,8 @@ class HAILORTAPI ConfiguredNetworkGroup
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
      * @note Using this function is only allowed when scheduling_algorithm is not ::HAILO_SCHEDULING_ALGORITHM_NONE.
      * @note The default threshold is 1.
+     * @note If at least one send request has been sent, but the threshold is not reached within a set time period (e.g. timeout - see
+     *  hailo_set_scheduler_timeout()), the scheduler will consider the network ready regardless.
      * @note Currently, setting the threshold for a specific network is not supported.
      */
     virtual hailo_status set_scheduler_threshold(uint32_t threshold, const std::string &network_name="") = 0;
@@ -429,14 +429,14 @@ class HAILORTAPI ConfiguredNetworkGroup
         const std::function<void(hailo_status)> &infer_request_done_cb) = 0;
     virtual Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata() = 0;
     virtual Expected<std::unique_ptr<LayerInfo>> get_layer_info(const std::string &stream_name) = 0;
-    hailo_status wait_for_callbacks_finish();
-    hailo_status wait_for_callbacks_to_maintain_below_threshold(size_t threshold);
+    hailo_status wait_for_ongoing_callbacks_count_under(size_t threshold);
     void decrease_ongoing_callbacks();
     void increase_ongoing_callbacks();
 
     virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) = 0;
     virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) = 0;
     virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) = 0;
+    virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) = 0;
 
 protected:
     ConfiguredNetworkGroup();
@@ -446,7 +446,7 @@ class HAILORTAPI ConfiguredNetworkGroup
     std::condition_variable m_cv;
 private:
     friend class ActivatedNetworkGroup;
-    friend class PipelineBuilder;
+    friend class AsyncAsyncPipelineBuilder;
 };
 using ConfiguredNetworkGroupVector = std::vector<std::shared_ptr<ConfiguredNetworkGroup>>;
 
diff --git a/hailort/libhailort/include/hailo/stream.hpp b/hailort/libhailort/include/hailo/stream.hpp
index 3f21bdfd..5423ac17 100644
--- a/hailort/libhailort/include/hailo/stream.hpp
+++ b/hailort/libhailort/include/hailo/stream.hpp
@@ -43,7 +43,7 @@ class HAILORTAPI InputStream
         /**
          * Status of the async transfer.
          * - ::HAILO_SUCCESS - When transfer is complete successfully.
-         * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation).
+         * - ::HAILO_STREAM_ABORT - The transfer was canceled (can happen after network deactivation).
          * - Any other ::hailo_status on unexpected errors.
          */
         hailo_status status;
@@ -223,9 +223,6 @@ class HAILORTAPI InputStream
      */
     virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0;
 
-    // The usage of BufferPtr for async API isn't currently supported and is for internal use only.
-    virtual hailo_status write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) = 0;
-
     /**
      * @returns A ::hailo_stream_info_t object containing the stream's info.
      */
@@ -290,7 +287,7 @@ class HAILORTAPI OutputStream
         /**
          * Status of the async transfer.
          * - ::HAILO_SUCCESS - When transfer is complete successfully.
-         * - ::HAILO_STREAM_ABORTED_BY_USER - The transfer was canceled (can happen after network deactivation).
+         * - ::HAILO_STREAM_ABORT - The transfer was canceled (can happen after network deactivation).
          * - Any other ::hailo_status on unexpected errors.
          */
         hailo_status status;
@@ -505,9 +502,6 @@ class HAILORTAPI OutputStream
      */
     virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) = 0;
 
-    // The usage of BufferPtr for async API isn't currently supported and is for internal use only.
-    virtual hailo_status read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) = 0;
-
     // get_network_group_activated_event is same as this function
     virtual EventPtr &get_core_op_activated_event() = 0;
 protected:
diff --git a/hailort/libhailort/include/hailo/vdevice.hpp b/hailort/libhailort/include/hailo/vdevice.hpp
index fd8f40a2..726c42b8 100644
--- a/hailort/libhailort/include/hailo/vdevice.hpp
+++ b/hailort/libhailort/include/hailo/vdevice.hpp
@@ -55,7 +55,7 @@ class HAILORTAPI VDevice
     static Expected<std::unique_ptr<VDevice>> create(const std::vector<std::string> &device_ids);
 
     /**
-     * Configure the vdevice from an hef.
+     * Configures the vdevice from an hef.
      *
      * @param[in] hef                         A reference to an Hef object to configure the vdevice by.
      * @param[in] configure_params            A map of configured network group name and parameters.
@@ -64,8 +64,17 @@ class HAILORTAPI VDevice
      */
     virtual Expected<ConfiguredNetworkGroupVector> configure(Hef &hef,
         const NetworkGroupsParamsMap &configure_params={}) = 0;
-    
-    virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path);
+
+    /**
+     * Creates the infer model from an hef
+     *
+     * @param[in] hef_path                    A string of an hef file.
+     * @param[in] network_name                A string of the network name (optional).
+     * @return Upon success, returns Expected of a shared pointer of infer model.
+     *         Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path,
+        const std::string &network_name = "");
 
     /**
      * Gets the underlying physical devices.
@@ -111,39 +120,43 @@ class HAILORTAPI VDevice
      */
     Expected<ConfigureNetworkParams> create_configure_params(Hef &hef, const std::string &network_group_name) const;
 
-    // TODO: Also link to async infer - ConfiguredInferModel, Bindings etc. Just like we did for
-    //       InputStream::write_async and OutputStream::read_async (HRT-11039)
     /**
      * Maps the buffer pointed to by @a address for DMA transfers to/from this vdevice, in the specified
      * @a data_direction.
-     * DMA mapping of buffers in advance may improve the performance of `InputStream::write_async()` or
-     * `OutputStream::read_async()`. This improvement will be realized if the buffer is reused multiple times
-     * across different async operations.
-     * - For buffers that will be written to the vdevice via `InputStream::write_async()`, use `HAILO_H2D_STREAM`
-     *   for the @a direction parameter.
-     * - For buffers that will be read from the vdevice via `OutputStream::read_async()`, use `HAILO_D2H_STREAM`
-     *   for the @a direction parameter.
+     * DMA mapping of buffers in advance may improve the performance of async API. This improvement will become
+     * apparent when the buffer is reused multiple times across different async operations.
+     *
+     * For high level API (aka InferModel), buffers bound using ConfiguredInferModel::Bindings::InferStream::set_buffer
+     * can be mapped.
+     *
+     * For low level API (aka InputStream/OutputStream), buffers passed to InputStream::write_async and
+     * OutputStream::read_async can be mapped.
+     *
+     * @param[in] address       The address of the buffer to be mapped.
+     * @param[in] size          The buffer's size in bytes.
+     * @param[in] direction     The direction of the mapping. For input streams, use `HAILO_DMA_BUFFER_DIRECTION_H2D`
+     *                          and for output streams, use `HAILO_DMA_BUFFER_DIRECTION_D2H`.
      *
-     * @param[in] address       The address of the buffer to be mapped
-     * @param[in] size          The buffer's size in bytes
-     * @param[in] direction     The direction of the mapping
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     * @note The DMA mapping will be freed upon calling dma_unmap() with @a address and @a data_direction, or when the
-     *       @a VDevice object is destroyed.
-     * @note The buffer pointed to by @a address cannot be freed until it is unmapped (via dma_unmap() or @a VDevice
+     *
+     * @note The DMA mapping will be released upon calling dma_unmap() with @a address, @a size and @a data_direction, or
+     *       when the @a VDevice object is destroyed.
+     * @note The buffer pointed to by @a address cannot be released until it is unmapped (via dma_unmap() or @a VDevice
      *       destruction).
      */
-    virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction);
+    virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) = 0;
 
     /**
      * Un-maps a buffer buffer pointed to by @a address for DMA transfers to/from this vdevice, in the direction
      * @a direction.
      *
-     * @param[in] address       The address of the buffer to be un-mapped
-     * @param[in] direction     The direction of the mapping
+     * @param[in] address       The address of the buffer to be un-mapped.
+     * @param[in] size          The buffer's size in bytes.
+     * @param[in] direction     The direction of the mapping.
+     *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
      */
-    virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction);
+    virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) = 0;
 
     virtual hailo_status before_fork();
     virtual hailo_status after_fork_in_parent();
diff --git a/hailort/libhailort/include/hailo/vstream.hpp b/hailort/libhailort/include/hailo/vstream.hpp
index 76d17f71..fd3cd30f 100644
--- a/hailort/libhailort/include/hailo/vstream.hpp
+++ b/hailort/libhailort/include/hailo/vstream.hpp
@@ -28,7 +28,7 @@ class HAILORTAPI InputVStream
     static Expected<InputVStream> create(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
         const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
         std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr core_op_activated_event,
         AccumulatorPtr pipeline_latency_accumulator);
     InputVStream(InputVStream &&other) noexcept = default;
     InputVStream &operator=(InputVStream &&other) noexcept = default;
@@ -50,6 +50,7 @@ class HAILORTAPI InputVStream
      * @param[in] buffer            The buffer containing pointers to the planes where the data to
      *                              be sent to the device is stored.
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note Currently only support memory_type field of buffer to be HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR.
      */
     hailo_status write(const hailo_pix_buffer_t &buffer);
 
@@ -202,7 +203,7 @@ class HAILORTAPI OutputVStream
         const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
         const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
         std::vector<std::shared_ptr<PipelineElement>> &&pipeline, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        EventPtr shutdown_event, EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator);
+        EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator);
     OutputVStream(OutputVStream &&other) noexcept = default;
     OutputVStream &operator=(OutputVStream &&other) noexcept = default;
     virtual ~OutputVStream() = default;
@@ -302,8 +303,8 @@ class HAILORTAPI OutputVStream
 
     /**
      * Gets a reference to a map between pipeline element names to their respective queue size accumulators.
-     * These accumulators measure the number of free buffers in the queue, right before a buffer is removed
-     * from the queue to be used.
+     * These accumulators measure the number of buffers in the queue, waiting to be processed downstream.
+     * The measurements take place right before we try to enqueue the next buffer.
      * 
      * @return A const reference to a map between pipeline element names to their respective queue size accumulators.
      * @note Queue size accumulators are created for pipeline elements, if the vstream is created with the flag
@@ -352,10 +353,23 @@ class HAILORTAPI OutputVStream
      *
      * @param[in] max_proposals_per_class    NMS max proposals per class to set.
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     * @note This function will fail in cases where the output vstream has no NMS operations on the CPU.
+     * @note This function must be called before starting inference!
+     * This function will fail in cases where the output vstream has no NMS operations on the CPU.
      */
     hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class);
 
+    /**
+     * Set maximum accumulated mask size for all the detections in a frame.
+     *
+     * Note: Used in order to change the output buffer frame size,
+     * in cases where the output buffer is too small for all the segmentation detections.
+     *
+     * @param[in] max_accumulated_mask_size NMS max accumulated mask size.
+     * @note This function must be called before starting inference!
+     * This function will fail in cases where the output vstream has no NMS operations on the CPU.
+     */
+    hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size);
+
 
     bool is_aborted();
 
diff --git a/hailort/libhailort/src/CMakeLists.txt b/hailort/libhailort/src/CMakeLists.txt
index 51b0c90b..b9921de5 100644
--- a/hailort/libhailort/src/CMakeLists.txt
+++ b/hailort/libhailort/src/CMakeLists.txt
@@ -7,6 +7,7 @@ include(CMakePackageConfigHelpers)
 include(${CMAKE_CURRENT_SOURCE_DIR}/../../cmake/common_compiler_options.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
 include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/readerwriterqueue.cmake)
+include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/eigen.cmake)
 
 FUNCTION(relative_to_absolute_paths output)
     SET(listVar "")
@@ -53,8 +54,10 @@ relative_to_absolute_paths(C_OS_SOURCES ${C_OS_SOURCES})
 relative_to_absolute_paths(COMMON_C_SOURCES ${COMMON_C_SOURCES})
 relative_to_absolute_paths(HAILO_OS_DIR ${HAILO_OS_DIR})
 relative_to_absolute_paths(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR})
+relative_to_absolute_paths(HAILO_DRIVER_SRC_FILES ${HAILO_DRIVER_SRC_FILES})
 set(HAILO_OS_DIR ${HAILO_OS_DIR} CACHE INTERNAL "Absolute path of os-dir")
 set(HAILO_FULL_OS_DIR ${HAILO_FULL_OS_DIR} CACHE INTERNAL "Absolute Full path of os-dir")
+set(HAILO_DRIVER_SRC_FILES ${HAILO_DRIVER_SRC_FILES} CACHE INTERNAL "Absolute Full path of driver src files")
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} CACHE INTERNAL "Absolute paths of hailort's cpp source files")
 set(COMMON_C_SOURCES ${COMMON_C_SOURCES} CACHE INTERNAL "Absolute paths of common source files")
 set(HAILORT_SRCS_ABS ${HAILORT_CPP_SOURCES} ${HAILORT_COMMON_CPP_SOURCES} ${COMMON_C_SOURCES} CACHE INTERNAL "All absolute paths of hailort's source files")
@@ -87,6 +90,7 @@ target_link_libraries(libhailort PRIVATE profiler_proto)
 target_link_libraries(libhailort PRIVATE scheduler_mon_proto)
 target_link_libraries(libhailort PRIVATE spdlog::spdlog)
 target_link_libraries(libhailort PRIVATE readerwriterqueue)
+target_link_libraries(libhailort PRIVATE Eigen3::Eigen)
 if(HAILO_BUILD_SERVICE)
     target_link_libraries(libhailort PRIVATE grpc++_unsecure)
     target_link_libraries(libhailort PRIVATE hailort_rpc_grpc_proto)
@@ -101,7 +105,6 @@ set(HAILORT_PUBLIC_HEADERS
     ${HAILORT_INC_DIR}/hailo/platform.h
 
     ${HAILORT_INC_DIR}/hailo/hailort.hpp
-    ${HAILORT_INC_DIR}/hailo/buffer_storage.hpp
     ${HAILORT_INC_DIR}/hailo/buffer.hpp
     ${HAILORT_INC_DIR}/hailo/device.hpp
     ${HAILORT_INC_DIR}/hailo/event.hpp
@@ -119,6 +122,7 @@ set(HAILORT_PUBLIC_HEADERS
     ${HAILORT_INC_DIR}/hailo/vdevice.hpp
     ${HAILORT_INC_DIR}/hailo/quantization.hpp
     ${HAILORT_INC_DIR}/hailo/hailort_defaults.hpp
+    ${HAILORT_INC_DIR}/hailo/dma_mapped_buffer.hpp
 )
 
 set_target_properties(libhailort PROPERTIES
diff --git a/hailort/libhailort/src/core_op/CMakeLists.txt b/hailort/libhailort/src/core_op/CMakeLists.txt
index 79c32184..b2d401d6 100644
--- a/hailort/libhailort/src/core_op/CMakeLists.txt
+++ b/hailort/libhailort/src/core_op/CMakeLists.txt
@@ -8,8 +8,11 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/config_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/intermediate_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/channel_allocator.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/context_switch_buffer_builder.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/periph_calculator.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/internal_buffer_manager.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/internal_buffer_planner.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/core_op/core_op.cpp b/hailort/libhailort/src/core_op/core_op.cpp
index 83fcc4f5..d195d041 100644
--- a/hailort/libhailort/src/core_op/core_op.cpp
+++ b/hailort/libhailort/src/core_op/core_op.cpp
@@ -16,7 +16,6 @@
 
 #include "core_op/core_op.hpp"
 #include "core_op/resource_manager/resource_manager.hpp"
-#include "hef/hef_internal.hpp"
 #include "eth/eth_stream.hpp"
 #include "vdma/vdma_stream.hpp"
 #include "mipi/mipi_stream.hpp"
@@ -139,11 +138,12 @@ hailo_status CoreOp::activate(uint16_t dynamic_batch_size)
         }
         m_active_core_op_holder.clear();
     }
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         return status;
     }
     CHECK_SUCCESS(status);
 
+    //TODO: HRT-13019 - Unite with the calculation in vmda_config_core_op.cpp
     const auto elapsed_time_ms = std::chrono::duration<double, std::milli>(
         std::chrono::steady_clock::now() - start_time).count();
 
@@ -185,6 +185,7 @@ hailo_status CoreOp::deactivate()
         LOGGER__ERROR("Failed deactivating core-op (status {})", deactivate_status);
     }
 
+    //TODO: HRT-13019 - Unite with the calculation in vmda_config_core_op.cpp
     const auto elapsed_time_ms = std::chrono::duration<double, std::milli>(
         std::chrono::steady_clock::now() - start_time).count();
     LOGGER__INFO("Deactivating took {} ms", elapsed_time_ms);
@@ -289,7 +290,7 @@ hailo_status CoreOp::activate_low_level_streams()
 {
     for (auto &name_pair : m_input_streams) {
         auto status = name_pair.second->activate_stream();
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             LOGGER__INFO("Stream {} activation failed because it was aborted by user", name_pair.first);
             return status;
         }
@@ -297,7 +298,7 @@ hailo_status CoreOp::activate_low_level_streams()
     }
     for (auto &name_pair : m_output_streams) {
         auto status = name_pair.second->activate_stream();
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             LOGGER__INFO("Stream {} activation failed because it was aborted by user", name_pair.first);
             return status;
         }
@@ -532,8 +533,8 @@ hailo_status CoreOp::infer_async_impl(std::unordered_map<std::string, TransferRe
             "for input '{}', passed buffer size is {} (expected {})", input.first, transfer->second.get_total_transfer_size(),
             input.second->get_frame_size());
 
-        auto status = input.second->write_async(std::move(transfer->second));
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        auto status = input.second->write_async(TransferRequest{transfer->second});
+        if (HAILO_STREAM_ABORT == status) {
             return status;
         }
         CHECK_SUCCESS(status);
@@ -548,8 +549,8 @@ hailo_status CoreOp::infer_async_impl(std::unordered_map<std::string, TransferRe
             "for output '{}', passed buffer size is {} (expected {})", output.first, transfer->second.get_total_transfer_size(),
             output.second->get_frame_size());
 
-        auto status = output.second->read_async(std::move(transfer->second));
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        auto status = output.second->read_async(TransferRequest{transfer->second});
+        if (HAILO_STREAM_ABORT == status) {
             return status;
         }
         CHECK_SUCCESS(status);
@@ -563,8 +564,13 @@ TransferDoneCallback CoreOp::wrap_user_callback(TransferDoneCallback &&original_
     std::shared_ptr<OngoingInferState> state,
     TransferDoneCallback infer_callback)
 {
-    return [original_callback, state, infer_callback](hailo_status status) {
-        original_callback(status);
+    return [original_callback, state, infer_callback](hailo_status status) mutable {
+        {
+            // Before calling infer_callback, we must ensure all stream callbacks were called and released (since the
+            // user may capture some variables in the callbacks).
+            auto moved_callback = std::move(original_callback);
+            moved_callback(status);
+        }
 
         if (HAILO_SUCCESS != status) {
             state->status = status;
@@ -638,7 +644,7 @@ Expected<std::shared_ptr<OutputStreamBase>> CoreOp::create_output_stream_from_co
         const auto max_queue_size = batch_size * MAX_ACTIVE_TRANSFERS_SCALE;
 
         auto nms_stream = NmsOutputStream::create(base_stream, layer_info.value(), max_queue_size,
-            m_core_op_activated_event);
+            m_core_op_activated_event, stream_params.stream_interface);
         CHECK_EXPECTED(nms_stream);
         output_stream = nms_stream.release();
     }
diff --git a/hailort/libhailort/src/core_op/core_op.hpp b/hailort/libhailort/src/core_op/core_op.hpp
index 6672c4b3..17f350e9 100644
--- a/hailort/libhailort/src/core_op/core_op.hpp
+++ b/hailort/libhailort/src/core_op/core_op.hpp
@@ -20,10 +20,10 @@
 #define _HAILO_CORE_OP_HPP_
 
 #include "hailo/network_group.hpp"
+#include "hailo/device.hpp"
 
 #include "common/latency_meter.hpp"
 
-#include "hef/hef_internal.hpp"
 #include "hef/core_op_metadata.hpp"
 #include "control_protocol.h"
 #include "core_op/active_core_op_holder.hpp"
@@ -80,7 +80,7 @@ class CoreOp
     hailo_status activate(uint16_t dynamic_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE);
     hailo_status deactivate();
 
-    // Shutdown the core-op, make sure all ongoing transfers are completed with status HAILO_STREAM_ABORTED_BY_USER
+    // Shutdown the core-op, make sure all ongoing transfers are completed with status HAILO_STREAM_ABORT
     virtual hailo_status shutdown() = 0;
 
     virtual hailo_status activate_impl(uint16_t dynamic_batch_size = CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE) = 0;
@@ -155,7 +155,7 @@ class CoreOp
 
     // Launch write_async/read_async on all streams with wrapped callback.
     // We remove all transfer that was launched successfully from transfers in order to call those callback
-    // with HAILO_STREAM_ABORTED_BY_USER status on the case of a failure.
+    // with HAILO_STREAM_ABORT status on the case of a failure.
     hailo_status infer_async_impl(std::unordered_map<std::string, TransferRequest> &transfers,
         std::shared_ptr<OngoingInferState> state,
          TransferDoneCallback done_callback);
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp
new file mode 100644
index 00000000..03b360c4
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp
@@ -0,0 +1,53 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file action_list_buffer_builder.hpp
+ * @brief Pure virtual class that represents the basic functions and members for building the action list for the FW.
+ * Implemented and derived by two different classes:
+ * ControlActionListBufferBuilder - uses control messages to send Action list to FW
+ * DDRActionListBufferBuilder (only relevant in hailo1x) - Action list is written to M4 mapped memory in DDR - and read
+ * from there directly by FW
+ **/
+#ifndef _HAILO_ACTION_LIST_BUFFER_BUILDER_HPP_
+#define _HAILO_ACTION_LIST_BUFFER_BUILDER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "hailo/buffer.hpp"
+
+#include <vector>
+
+#include "control_protocol.h"
+
+namespace hailort
+{
+
+class ActionListBufferBuilder {
+public:
+    enum class Type {
+        CONTROL,
+        DDR
+    };
+
+    virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
+        bool is_new_context, bool last_action_buffer_in_context) = 0;
+
+    virtual uint64_t get_mapped_buffer_dma_address() const = 0;
+
+    ActionListBufferBuilder::Type get_builder_type() const {
+         return m_builder_type;
+    }
+protected:
+    ActionListBufferBuilder(ActionListBufferBuilder::Type builder_type) :
+        m_builder_type(builder_type)
+    {}
+    virtual ~ActionListBufferBuilder() = default;
+private:
+    const ActionListBufferBuilder::Type m_builder_type;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_ACTION_LIST_BUFFER_BUILDER_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp
new file mode 100644
index 00000000..92ccf303
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.cpp
@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file control_action_list_buffer_builder.cpp
+ * @brief Class used to build the vector of controls containing the action list content sent to the firmware.
+ **/
+
+#include "control_action_list_buffer_builder.hpp"
+
+namespace hailort
+{
+
+ControlActionListBufferBuilder::ControlActionListBufferBuilder() :
+    ActionListBufferBuilder(ActionListBufferBuilder::Type::CONTROL)
+{}
+
+Expected<std::shared_ptr<ControlActionListBufferBuilder>> ControlActionListBufferBuilder::create()
+{
+    return make_shared_nothrow<ControlActionListBufferBuilder>();
+}
+
+hailo_status ControlActionListBufferBuilder::write_action(MemoryView action,
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context, bool last_action_buffer_in_context)
+{
+    (void) last_action_buffer_in_context;
+    assert(action.size() < std::numeric_limits<uint32_t>::max());
+    const uint32_t action_size = static_cast<uint32_t>(action.size());
+    const auto should_start_new_control = (is_new_context || !has_space_for_action(action_size));
+    
+    if (should_start_new_control) {
+        start_new_control(context_type, is_new_context);
+    }
+
+    auto &control = current_control();
+    memcpy(&control.context_network_data[control.context_network_data_length], action.data(), action_size);
+    control.context_network_data_length += action_size;
+    return HAILO_SUCCESS;
+}
+
+CONTROL_PROTOCOL__context_switch_context_info_chunk_t &ControlActionListBufferBuilder::current_control()
+{
+    assert(!m_controls.empty());
+    return m_controls.back();
+}
+
+bool ControlActionListBufferBuilder::has_space_for_action(uint32_t action_size)
+{
+    auto &control = current_control();
+    return (control.context_network_data_length + action_size) <= CONTROL_PROTOCOL__CONTEXT_NETWORK_DATA_SINGLE_CONTROL_MAX_SIZE;
+}
+
+void ControlActionListBufferBuilder::start_new_control(CONTROL_PROTOCOL__context_switch_context_type_t context_type,
+    bool is_new_context)
+{
+    if (!is_new_context) {
+        current_control().is_last_chunk_per_context = false;
+    }
+
+    // Creating a new control directly inside the vector to avoid copying the control struct.
+    m_controls.emplace_back();
+    auto &new_control = current_control();
+    new_control.context_network_data_length = 0;
+    new_control.context_type = static_cast<uint8_t>(context_type);
+    new_control.is_first_chunk_per_context = is_new_context;
+    new_control.is_last_chunk_per_context = true;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp
new file mode 100644
index 00000000..d417df58
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp
@@ -0,0 +1,55 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file control_action_list_buffer_builder.cpp
+ * @brief Class used to build the vector of controls containing the action list content sent to the firmware.
+ **/
+
+#ifndef _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_
+#define _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_
+
+#include "hailo/hailort.h"
+
+#include "context_switch_defs.h"
+#include "core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp"
+
+#include "vdma/channel/channel_id.hpp"
+#include "device_common/control_protocol.hpp"
+#include "hef/layer_info.hpp"
+
+
+namespace hailort
+{
+
+// This class manages a vector of CONTROL_PROTOCOL__context_switch_context_info_chunk_t controls to be sent
+// to the firmware. Actions are written to the control buffer, until we reach the maximum control size, then we will
+// start a new control. 
+class ControlActionListBufferBuilder : public ActionListBufferBuilder {
+public:
+    ControlActionListBufferBuilder();
+    static Expected<std::shared_ptr<ControlActionListBufferBuilder>> create();
+    virtual ~ControlActionListBufferBuilder() = default;
+
+    virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
+        bool is_new_context, bool last_action_buffer_in_context) override;
+
+    virtual uint64_t get_mapped_buffer_dma_address() const override {
+        return CONTEXT_SWITCH_DEFS__INVALID_DDR_CONTEXTS_BUFFER_ADDRESS;
+    }
+
+    const std::vector<CONTROL_PROTOCOL__context_switch_context_info_chunk_t> &get_controls() const {
+        return m_controls;
+    }
+private:
+    CONTROL_PROTOCOL__context_switch_context_info_chunk_t &current_control();
+    bool has_space_for_action(uint32_t action_size);
+    void start_new_control(CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context);
+
+    std::vector<CONTROL_PROTOCOL__context_switch_context_info_chunk_t> m_controls;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_CONTROL_ACTION_LIST_BUFFER_BUILDER_HPP_ */
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp
new file mode 100644
index 00000000..ee9b179a
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.cpp
@@ -0,0 +1,93 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file ddr_action_list_buffer_builder.cpp
+ * @brief Class used to build the action list sent to the firmware through DDR.
+ **/
+
+#include "ddr_action_list_buffer_builder.hpp"
+
+namespace hailort
+{
+
+// TODO: HRT-12512 : Can remove these variables when / if continuous buffer comes from designated region
+// In hailo15 - the DDR memory range of 0x80000000 - 0x90000000 is mapped to the M4 using a LUT (look up table) to addresses
+// 0x50000000 - 0x60000000, Currently this is the range the CMA allocation should come from seeing as this is one of the first CMA allocations
+// and the linux cma memory pool according to the hailo15 dtsi is - "alloc-ranges = <0 0x80000000 0 0x40000000>"
+// (meaning starts from 0x80000000 and goes for 992 MB) - so anything allocated from 0x90000000 and on ward will be outside the mapped area
+// The solution to this issue is to create a specific range for this allocation inide the mapped area - seeing as this affects other components
+// Like the dsp etc...need to check with them before doing so. For now - this should almost always retirn in the mapped area and we will verify
+// to double check
+
+DDRActionListBufferBuilder::DDRActionListBufferBuilder(vdma::ContinuousBuffer &&buffer) :
+    ActionListBufferBuilder(ActionListBufferBuilder::Type::DDR),
+    m_action_list_buffer(std::move(buffer)),
+    m_write_offset(0),
+    m_current_context_info{}
+{}
+
+bool DDRActionListBufferBuilder::verify_dma_addr(vdma::ContinuousBuffer &buffer)
+{
+    // verify that buffer starts and ends inside mapped range
+    if (buffer.dma_address() < CONTEXT_SWITCH_DEFS__START_M4_MAPPED_DDR_ADDRESS ||
+        (buffer.dma_address() + buffer.size() >= CONTEXT_SWITCH_DEFS__END_M4_MAPPED_DDR_ADDRESS)) {
+        return false;
+    }
+    return true;
+}
+
+Expected<std::shared_ptr<DDRActionListBufferBuilder>> DDRActionListBufferBuilder::create(size_t num_contexts,
+    HailoRTDriver &driver)
+{
+    // Try to allocate continous buffer for action list in DDR
+    auto continous_alloc = vdma::ContinuousBuffer::create(num_contexts * 
+        sizeof(CONTROL_PROTOCOL__context_switch_context_info_chunk_t), driver);
+
+    // TODO HRT-12512 - Add fallback to Control if continous buffer allocation fails
+    CHECK_EXPECTED(continous_alloc);
+    // Verify that continous buffer is in allocated region
+    CHECK_AS_EXPECTED(verify_dma_addr(continous_alloc.value()), HAILO_INTERNAL_FAILURE,
+        "Failed to allocate continous buffer in M4 mapped memory region");
+    return make_shared_nothrow<DDRActionListBufferBuilder>(continous_alloc.release());
+}
+
+hailo_status DDRActionListBufferBuilder::write_action(MemoryView action,
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, bool is_new_context, bool is_last_action_in_context)
+{
+    assert(action.size() < std::numeric_limits<uint32_t>::max());
+    const uint32_t action_size = static_cast<uint32_t>(action.size());
+
+    if (is_new_context) {
+        m_current_context_info.is_first_chunk_per_context = true;
+        m_current_context_info.is_last_chunk_per_context = true;
+        m_current_context_info.context_type = static_cast<uint8_t>(context_type);
+        m_current_context_info.context_network_data_length = 0;
+    }
+
+    CHECK(m_current_context_info.context_network_data_length + action_size <=
+        ARRAY_ENTRIES(m_current_context_info.context_network_data), HAILO_INVALID_ARGUMENT,
+        "Context exceeds maximum context size {}", ARRAY_ENTRIES(m_current_context_info.context_network_data));
+
+    // TODO HRT-12788 - make more efficient by writing directly to DDR without using the local context_info_single_control_t
+    memcpy(&(m_current_context_info.context_network_data[m_current_context_info.context_network_data_length]),
+        action.data(), action_size);
+    m_current_context_info.context_network_data_length += action_size;
+
+    if (is_last_action_in_context) {
+        const auto write_size = sizeof(CONTROL_PROTOCOL__context_switch_context_info_chunk_t);
+        auto status = m_action_list_buffer.write(&m_current_context_info, write_size, m_write_offset);
+        CHECK_SUCCESS(status);
+        m_write_offset += write_size;
+    }
+
+    return HAILO_SUCCESS;
+}
+
+uint64_t DDRActionListBufferBuilder::get_mapped_buffer_dma_address() const
+{
+    return m_action_list_buffer.dma_address();
+}
+
+} /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp
new file mode 100644
index 00000000..05b4b214
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp
@@ -0,0 +1,43 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file ddr_action_list_buffer_builder.hpp
+ * @brief Class used to build the action list sent to the firmware through DDR.
+ **/
+#ifndef _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_
+#define _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_
+
+#include "hailo/hailort.h"
+#include "context_switch_defs.h"
+#include "core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp"
+#include "vdma/memory/continuous_buffer.hpp"
+
+#define DDR_ACTION_LIST_ENV_VAR         ("HAILO_DDR_ACTION_LIST")
+#define DDR_ACTION_LIST_ENV_VAR_VALUE   ("1")
+
+namespace hailort
+{
+
+class DDRActionListBufferBuilder : public ActionListBufferBuilder {
+public:
+    DDRActionListBufferBuilder(vdma::ContinuousBuffer &&buffer);
+    virtual ~DDRActionListBufferBuilder() = default;
+    static Expected<std::shared_ptr<DDRActionListBufferBuilder>> create(size_t num_contexts, HailoRTDriver &driver);
+
+    virtual hailo_status write_action(MemoryView action, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
+        bool is_new_context, bool last_action_buffer_in_context) override;
+
+    virtual uint64_t get_mapped_buffer_dma_address() const override;
+private:    
+    vdma::ContinuousBuffer m_action_list_buffer;
+    // TODO: HRT-12512 : Can remove this check when / if continuous buffer comes from designated region
+    static bool verify_dma_addr(vdma::ContinuousBuffer &buffer);
+    size_t m_write_offset;
+    CONTROL_PROTOCOL__context_switch_context_info_chunk_t m_current_context_info;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_DDR_ACTION_LIST_BUFFER_BUILDER_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
index 12385b51..ecc323df 100644
--- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
@@ -9,8 +9,8 @@
  */
 
 #include "core_op/resource_manager/config_buffer.hpp"
-#include "vdma/memory/sg_buffer.hpp"
-#include "vdma/memory/continuous_buffer.hpp"
+#include "vdma/memory/sg_edge_layer.hpp"
+#include "vdma/memory/continuous_edge_layer.hpp"
 #include "vdma/memory/buffer_requirements.hpp"
 
 #include <numeric>
@@ -18,7 +18,7 @@
 
 namespace hailort {
 
-Expected<std::unique_ptr<vdma::VdmaBuffer>> ConfigBuffer::create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id,
+Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id,
     const std::vector<uint32_t> &cfg_sizes, const uint32_t buffer_size)
 {
     auto buffer_ptr = should_use_ccb(driver) ?
@@ -43,7 +43,7 @@ Expected<ConfigBuffer> ConfigBuffer::create(HailoRTDriver &driver, vdma::Channel
     return ConfigBuffer(buffer_ptr.release(), channel_id, buffer_size);
 }
 
-ConfigBuffer::ConfigBuffer(std::unique_ptr<vdma::VdmaBuffer> &&buffer,
+ConfigBuffer::ConfigBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&buffer,
     vdma::ChannelId channel_id, size_t total_buffer_size)
     : m_buffer(std::move(buffer)),
       m_channel_id(channel_id),
@@ -55,7 +55,7 @@ Expected<uint32_t> ConfigBuffer::program_descriptors()
 {
     // TODO HRT-9657: remove DEVICE interrupts
     auto descriptors_count =
-        m_buffer->program_descriptors(m_acc_buffer_offset, vdma::InterruptsDomain::DEVICE, m_acc_desc_count);
+        m_buffer->program_descriptors(m_acc_buffer_offset, InterruptsDomain::DEVICE, m_acc_desc_count);
     CHECK_EXPECTED(descriptors_count);
 
     m_acc_desc_count += descriptors_count.value();
@@ -71,7 +71,7 @@ hailo_status ConfigBuffer::pad_with_nops()
     auto page_size = desc_page_size();
     auto buffer_size = m_total_buffer_size;
     auto buffer_residue = buffer_size % page_size;
-    if (0 != buffer_residue % CCW_HEADER_SIZE) {
+    if (0 != (page_size - buffer_residue) % CCW_HEADER_SIZE) {
         LOGGER__ERROR("CFG channel buffer size must be a multiple of CCW header size ({})", CCW_HEADER_SIZE);
         return HAILO_INTERNAL_FAILURE;
     }
@@ -135,40 +135,56 @@ hailo_status ConfigBuffer::write_inner(const MemoryView &data)
     return HAILO_SUCCESS;
 }
 
-Expected<std::unique_ptr<vdma::VdmaBuffer>> ConfigBuffer::create_sg_buffer(HailoRTDriver &driver,
+Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_sg_buffer(HailoRTDriver &driver,
     vdma::ChannelId channel_id, const std::vector<uint32_t> &cfg_sizes)
 {
-    static const bool NOT_CIRCULAR = false;
+    static const auto NOT_CIRCULAR = false;
     // For config channels (In Hailo15), the page size must be a multiplication of host default page size.
     // Therefore we use the flag force_default_page_size for those types of buffers.
-    auto const FORCE_DEFAULT_PAGE_SIZE = true;
-    auto const FORCE_BATCH_SIZE = true;
-    auto buffer_size_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_multiple_transfers(
-        driver.desc_max_page_size(), 1, cfg_sizes, NOT_CIRCULAR, FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE);
+    static const auto FORCE_DEFAULT_PAGE_SIZE = true;
+    static const auto FORCE_BATCH_SIZE = true;
+    auto buffer_size_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_multiple_transfers(
+        vdma::VdmaBuffer::Type::SCATTER_GATHER, driver.desc_max_page_size(), 1, cfg_sizes, NOT_CIRCULAR,
+        FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE);
     CHECK_EXPECTED(buffer_size_requirements);
     const auto page_size = buffer_size_requirements->desc_page_size();
     const auto descs_count = buffer_size_requirements->descs_count();
     const auto buffer_size = buffer_size_requirements->buffer_size();
 
-    auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, page_size, NOT_CIRCULAR,
-        HailoRTDriver::DmaDirection::H2D, channel_id);
+    auto buffer = vdma::SgBuffer::create(driver, buffer_size, HailoRTDriver::DmaDirection::H2D);
     CHECK_EXPECTED(buffer);
 
-    auto buffer_ptr = make_unique_nothrow<vdma::SgBuffer>(buffer.release());
+    auto buffer_ptr = make_shared_nothrow<vdma::SgBuffer>(buffer.release());
     CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
+    static const auto DEFAULT_OFFSET = 0;
+    auto edge_layer = vdma::SgEdgeLayer::create(std::move(buffer_ptr), buffer_size, DEFAULT_OFFSET, driver, descs_count,
+        page_size, NOT_CIRCULAR, channel_id);
+    CHECK_EXPECTED(edge_layer);
+
+    auto edge_layer_ptr = make_unique_nothrow<vdma::SgEdgeLayer>(edge_layer.release());
+    CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return std::unique_ptr<vdma::VdmaEdgeLayer>(std::move(edge_layer_ptr));
 }
 
-Expected<std::unique_ptr<vdma::VdmaBuffer>> ConfigBuffer::create_ccb_buffer(HailoRTDriver &driver,
+Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_ccb_buffer(HailoRTDriver &driver,
     uint32_t buffer_size)
 {
-    static const bool NOT_CIRCULAR = false;
-    static const uint16_t SINGLE_TRANSFER = 1;
-    auto buffer_size_requirements = vdma::BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer(
-        SINGLE_TRANSFER, buffer_size, NOT_CIRCULAR);
+    static const auto NOT_CIRCULAR = false;
+    // For config channels (In Hailo15), the page size must be a multiplication of host default page size.
+    // Therefore we use the flag force_default_page_size for those types of buffers.
+    static const auto FORCE_DEFAULT_PAGE_SIZE = true;
+    static const auto FORCE_BATCH_SIZE = true;
+    static const auto DEFAULT_BATCH_SIZE = 1;
+    static const auto IS_VDMA_ALIGNED_BUFFER = true;
+    auto buffer_size_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
+        vdma::VdmaBuffer::Type::CONTINUOUS, driver.desc_max_page_size(), DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE,
+        buffer_size, NOT_CIRCULAR, FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
     CHECK_EXPECTED(buffer_size_requirements);
 
+    const auto page_size = buffer_size_requirements->desc_page_size();
+    const auto descs_count = buffer_size_requirements->descs_count();
     auto buffer = vdma::ContinuousBuffer::create(buffer_size_requirements->buffer_size(), driver);
     /* Don't print error here since this might be expected error that the libhailoRT can recover from
         (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */
@@ -178,10 +194,17 @@ Expected<std::unique_ptr<vdma::VdmaBuffer>> ConfigBuffer::create_ccb_buffer(Hail
         CHECK_EXPECTED(buffer);
     }
 
-    auto buffer_ptr = make_unique_nothrow<vdma::ContinuousBuffer>(buffer.release());
+    auto buffer_ptr = make_shared_nothrow<vdma::ContinuousBuffer>(buffer.release());
     CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
+    static const auto DEFAULT_OFFSET = 0;
+    auto edge_layer = vdma::ContinuousEdgeLayer::create(std::move(buffer_ptr), buffer_size, DEFAULT_OFFSET, page_size, descs_count);
+    CHECK_EXPECTED(edge_layer);
+
+    auto edge_layer_ptr = make_unique_nothrow<vdma::ContinuousEdgeLayer>(edge_layer.release());
+    CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return std::unique_ptr<vdma::VdmaEdgeLayer>(std::move(edge_layer_ptr));
 }
 
 bool ConfigBuffer::should_use_ccb(HailoRTDriver &driver)
diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp
index 534bab52..45695216 100644
--- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp
@@ -13,7 +13,7 @@
 
 #include "hailo/buffer.hpp"
 
-#include "vdma/memory/vdma_buffer.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
 
 
 namespace hailort {
@@ -48,20 +48,20 @@ class ConfigBuffer final
     CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const;
 
 private:
-    ConfigBuffer(std::unique_ptr<vdma::VdmaBuffer> &&buffer, vdma::ChannelId channel_id, size_t total_buffer_size);
+    ConfigBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&buffer, vdma::ChannelId channel_id, size_t total_buffer_size);
 
     hailo_status write_inner(const MemoryView &data);
 
-    static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_sg_buffer(HailoRTDriver &driver,
+    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_sg_buffer(HailoRTDriver &driver,
         vdma::ChannelId channel_id, const std::vector<uint32_t> &cfg_sizes);
-    static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_ccb_buffer(HailoRTDriver &driver,
+    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_ccb_buffer(HailoRTDriver &driver,
         uint32_t buffer_size);
-    static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id,
+    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id,
         const std::vector<uint32_t> &cfg_sizes, const uint32_t buffer_size);
 
     static bool should_use_ccb(HailoRTDriver &driver);
 
-    std::unique_ptr<vdma::VdmaBuffer> m_buffer;
+    std::unique_ptr<vdma::VdmaEdgeLayer> m_buffer;
     vdma::ChannelId m_channel_id;
     const size_t m_total_buffer_size; 
     size_t m_acc_buffer_offset;
diff --git a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp
deleted file mode 100644
index 5684abc6..00000000
--- a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file context_switch_buffer_builder.cpp
- * @brief Class used to build the context switch buffer sent to the firmware
- **/
-
-#include "context_switch_buffer_builder.hpp"
-
-namespace hailort
-{
-
-ContextSwitchBufferBuilder::ContextSwitchBufferBuilder(CONTROL_PROTOCOL__context_switch_context_type_t context_type) :
-    m_context_type(context_type)
-{
-    // Initialize first control
-    start_new_control();
-}
-
-void ContextSwitchBufferBuilder::write_action(MemoryView action)
-{
-    assert(action.size() < std::numeric_limits<uint32_t>::max());
-    const uint32_t action_size = static_cast<uint32_t>(action.size());
-
-    if (!has_space_for_action(action_size)) {
-        // Size exceeded single control size, creating a new control buffer.
-        start_new_control();
-    }
-
-    auto &control = current_control();
-    memcpy(&control.context_network_data[control.context_network_data_length], action.data(), action_size);
-    control.context_network_data_length += action_size;
-}
-
-const std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> &ContextSwitchBufferBuilder::get_controls() const
-{
-    return m_controls;
-}
-
-const CONTROL_PROTOCOL__context_switch_context_type_t &ContextSwitchBufferBuilder::get_context_type() const
-{
-    return m_context_type;
-}
-
-CONTROL_PROTOCOL__context_switch_context_info_single_control_t &ContextSwitchBufferBuilder::current_control()
-{
-    assert(!m_controls.empty());
-    return m_controls.back();
-}
-
-bool ContextSwitchBufferBuilder::has_space_for_action(uint32_t action_size)
-{
-    auto &control = current_control();
-    return (control.context_network_data_length + action_size) <= ARRAY_ENTRIES(control.context_network_data);
-}
-
-void ContextSwitchBufferBuilder::start_new_control()
-{
-    if (!m_controls.empty()) {
-        current_control().is_last_control_per_context = false;
-    }
-
-    // Creating a new control directly inside the vector to avoid copying the control struct.
-    m_controls.emplace_back();
-    auto &new_control = current_control();
-    new_control.context_network_data_length = 0;
-    new_control.context_type = static_cast<uint8_t>(m_context_type);
-    new_control.is_first_control_per_context = (1 == m_controls.size());
-    new_control.is_last_control_per_context = true;
-}
-
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.hpp
deleted file mode 100644
index a121761c..00000000
--- a/hailort/libhailort/src/core_op/resource_manager/context_switch_buffer_builder.hpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file context_switch_buffer_builder.hpp
- * @brief Class used to build the context switch buffer sent to the firmware.
- **/
-
-#ifndef _HAILO_CONTEXT_SWITCH_BUFFER_BUILDER_HPP_
-#define _HAILO_CONTEXT_SWITCH_BUFFER_BUILDER_HPP_
-
-#include "hailo/hailort.h"
-
-#include "vdma/channel/channel_id.hpp"
-#include "device_common/control_protocol.hpp"
-#include "hef/layer_info.hpp"
-
-
-namespace hailort
-{
-
-// This class manages a vector of CONTROL_PROTOCOL__context_switch_context_info_single_control_t controls to be sent
-// to the firmware. Actions are written to the control buffer, until we reach the maximum control size, then we will
-// start a new control. 
-class ContextSwitchBufferBuilder final {
-public:
-    ContextSwitchBufferBuilder(CONTROL_PROTOCOL__context_switch_context_type_t context_type);
-
-    void write_action(MemoryView action);
-    const std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> &get_controls() const;
-    const CONTROL_PROTOCOL__context_switch_context_type_t &get_context_type() const;
-
-private:
-    CONTROL_PROTOCOL__context_switch_context_info_single_control_t &current_control();
-    bool has_space_for_action(uint32_t action_size);
-    void start_new_control();
-
-    CONTROL_PROTOCOL__context_switch_context_type_t m_context_type;
-    std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> m_controls;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_CONTEXT_SWITCH_BUFFER_BUILDER_HPP_ */
diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
index 59be948b..77b2e801 100644
--- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
@@ -10,35 +10,33 @@
 #include "intermediate_buffer.hpp"
 
 #include "core_op/resource_manager/resource_manager.hpp"
-#include "vdma/memory/sg_buffer.hpp"
-#include "vdma/memory/continuous_buffer.hpp"
+#include "vdma/memory/sg_edge_layer.hpp"
+#include "vdma/memory/continuous_edge_layer.hpp"
 #include "vdma/memory/buffer_requirements.hpp"
 
 
 namespace hailort
 {
-Expected<std::unique_ptr<vdma::VdmaBuffer>> IntermediateBuffer::create_buffer(HailoRTDriver &driver, uint32_t transfer_size,
+Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> IntermediateBuffer::create_edge_layer(
+    std::shared_ptr<vdma::VdmaBuffer> &&buffer, size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size,
     uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type)
 {
     const bool is_circular = (streaming_type == StreamingType::CIRCULAR_CONTINUOS);
-    auto buffer_exp = should_use_ccb(driver, streaming_type) ?
-        create_ccb_buffer(driver, transfer_size, max_batch_size, is_circular) :
-        create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, is_circular);
+    auto buffer_exp = (vdma::VdmaBuffer::Type::CONTINUOUS == buffer->type()) ?
+        create_ccb_edge_layer(std::move(buffer), buffer_offset, driver, transfer_size, max_batch_size, is_circular) :
+        create_sg_edge_layer(std::move(buffer), buffer_offset, driver, transfer_size, max_batch_size, d2h_channel_id, is_circular);
 
-    if (should_use_ccb(driver, streaming_type) && (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer_exp.status())) {
-        /* Try to use sg buffer instead */
-        return create_sg_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, is_circular);
-    } else {
-        return buffer_exp;
-    }
+    return buffer_exp;
 }
 
 Expected<IntermediateBuffer> IntermediateBuffer::create(HailoRTDriver &driver, uint32_t transfer_size,
-    uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type)
+    uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
+    std::shared_ptr<vdma::VdmaBuffer> &&buffer, size_t buffer_offset)
 {
-    auto buffer_exp = create_buffer(driver, transfer_size, max_batch_size, d2h_channel_id, streaming_type);
-    CHECK_EXPECTED(buffer_exp);
-    auto buffer_ptr = buffer_exp.release();
+    auto edge_layer_exp = create_edge_layer(std::move(buffer), buffer_offset, driver, transfer_size, max_batch_size,
+        d2h_channel_id, streaming_type);
+    CHECK_EXPECTED(edge_layer_exp);
+    auto edge_layer_ptr = edge_layer_exp.release();
 
     if (streaming_type == StreamingType::BURST) {
         // We have max_batch_size transfers, so we program them one by one. The last transfer should report interrupt
@@ -46,31 +44,31 @@ Expected<IntermediateBuffer> IntermediateBuffer::create(HailoRTDriver &driver, u
         size_t acc_offset = 0;
         for (uint16_t i = 0; i < max_batch_size; i++) {
             const auto last_desc_interrupts_domain = ((max_batch_size - 1) == i) ?
-                vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE;
-            auto desc_count_local = buffer_ptr->program_descriptors(transfer_size, last_desc_interrupts_domain, acc_offset);
+                InterruptsDomain::DEVICE : InterruptsDomain::NONE;
+            auto desc_count_local = edge_layer_ptr->program_descriptors(transfer_size, last_desc_interrupts_domain, acc_offset);
             CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big.");
             acc_offset += desc_count_local.value();
         }
     } else {
         // Program all descriptors, no need for interrupt.
-        const auto interrupts_domain = vdma::InterruptsDomain::NONE;
-        const auto total_size = buffer_ptr->descs_count() * buffer_ptr->desc_page_size();
-        auto desc_count_local = buffer_ptr->program_descriptors(total_size, interrupts_domain, 0);
+        const auto interrupts_domain = InterruptsDomain::NONE;
+        const auto total_size = edge_layer_ptr->descs_count() * edge_layer_ptr->desc_page_size();
+        auto desc_count_local = edge_layer_ptr->program_descriptors(total_size, interrupts_domain, 0);
         CHECK_EXPECTED(desc_count_local);
     }
 
-    return IntermediateBuffer(std::move(buffer_ptr), transfer_size, max_batch_size);
+    return IntermediateBuffer(std::move(edge_layer_ptr), transfer_size, max_batch_size);
 }
 
 Expected<Buffer> IntermediateBuffer::read()
 {
     const auto size = m_transfer_size * m_dynamic_batch_size;
-    assert(size <= m_buffer->size());
+    assert(size <= m_edge_layer->size());
 
     auto res = Buffer::create(size);
     CHECK_EXPECTED(res);
 
-    auto status = m_buffer->read(res->data(), size, 0);
+    auto status = m_edge_layer->read(res->data(), size, 0);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     return res.release();
@@ -78,94 +76,65 @@ Expected<Buffer> IntermediateBuffer::read()
 
 CONTROL_PROTOCOL__host_buffer_info_t IntermediateBuffer::get_host_buffer_info() const
 {
-    return m_buffer->get_host_buffer_info(m_transfer_size);
+    return m_edge_layer->get_host_buffer_info(m_transfer_size);
 }
 
-IntermediateBuffer::IntermediateBuffer(std::unique_ptr<vdma::VdmaBuffer> &&buffer, uint32_t transfer_size,
+IntermediateBuffer::IntermediateBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&edge_layer, uint32_t transfer_size,
                                        uint16_t batch_size) :
-    m_buffer(std::move(buffer)),
+    m_edge_layer(std::move(edge_layer)),
     m_transfer_size(transfer_size),
     m_dynamic_batch_size(batch_size)
 {}
 
-Expected<std::unique_ptr<vdma::VdmaBuffer>> IntermediateBuffer::create_sg_buffer(HailoRTDriver &driver,
-    uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular)
+Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> IntermediateBuffer::create_sg_edge_layer(
+    std::shared_ptr<vdma::VdmaBuffer> &&buffer, size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size,
+    uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular)
 {
-    auto const DONT_FORCE_DEFAULT_PAGE_SIZE = false;
-    auto const FORCE_BATCH_SIZE = true;
-    auto const IS_VDMA_ALIGNED_BUFFER = true;
-    auto buffer_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
-        driver.desc_max_page_size(), batch_size, batch_size, transfer_size, is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE,
-        FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
+    static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
+    static const auto FORCE_BATCH_SIZE = true;
+    static const auto IS_VDMA_ALIGNED_BUFFER = true;
+    auto buffer_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
+        vdma::VdmaBuffer::Type::SCATTER_GATHER, driver.desc_max_page_size(), batch_size, batch_size, transfer_size,
+        is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
     CHECK_EXPECTED(buffer_requirements);
     const auto desc_page_size = buffer_requirements->desc_page_size();
     const auto descs_count = buffer_requirements->descs_count();
     const auto buffer_size = buffer_requirements->buffer_size();
 
-    auto buffer = vdma::SgBuffer::create(driver, buffer_size, descs_count, desc_page_size, is_circular,
-        HailoRTDriver::DmaDirection::BOTH, d2h_channel_id);
-    CHECK_EXPECTED(buffer);
+    auto edge_layer = vdma::SgEdgeLayer::create(std::dynamic_pointer_cast<vdma::SgBuffer>(buffer), buffer_size,
+        buffer_offset, driver, descs_count, desc_page_size, is_circular, d2h_channel_id);
+    CHECK_EXPECTED(edge_layer);
 
-    auto buffer_ptr = make_unique_nothrow<vdma::SgBuffer>(buffer.release());
-    CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    auto edge_layer_ptr = make_unique_nothrow<vdma::SgEdgeLayer>(edge_layer.release());
+    CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
+    return std::unique_ptr<vdma::VdmaEdgeLayer>(std::move(edge_layer_ptr));
 }
 
-Expected<std::unique_ptr<vdma::VdmaBuffer>> IntermediateBuffer::create_ccb_buffer(HailoRTDriver &driver,
-    uint32_t transfer_size, uint16_t batch_size, bool is_circular)
+Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> IntermediateBuffer::create_ccb_edge_layer(std::shared_ptr<vdma::VdmaBuffer> &&buffer,
+    size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size, bool is_circular)
 {
-    auto buffer_size_requirements = vdma::BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer(
-        batch_size, transfer_size, is_circular);
-    CHECK_EXPECTED(buffer_size_requirements);
-
-    auto buffer = vdma::ContinuousBuffer::create(buffer_size_requirements->buffer_size(), driver);
-    /* Don't print error here since this might be expected error that the libhailoRT can recover from
-        (out of host memory). If it's not the case, there is a print in hailort_driver.cpp file */
-    if (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer.status()) {
-        return make_unexpected(buffer.status());
-    } else {
-        CHECK_EXPECTED(buffer);
-    }
+    static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
+    static const auto FORCE_BATCH_SIZE = true;
+    static const auto IS_VDMA_ALIGNED_BUFFER = true;
 
-    auto buffer_ptr = make_unique_nothrow<vdma::ContinuousBuffer>(buffer.release());
-    CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    auto buffer_size_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
+        vdma::VdmaBuffer::Type::CONTINUOUS, driver.desc_max_page_size(), batch_size, batch_size, transfer_size,
+        is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
+    CHECK_EXPECTED(buffer_size_requirements);
 
-    return std::unique_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
-}
+    const auto page_size = buffer_size_requirements->desc_page_size();
+    const auto descs_count = buffer_size_requirements->descs_count();
+    const auto buffer_size = buffer_size_requirements->buffer_size();
 
-bool IntermediateBuffer::should_use_ccb(HailoRTDriver &driver, StreamingType streaming_type)
-{
-    if (driver.dma_type() == HailoRTDriver::DmaType::PCIE) {
-        // CCB not supported on PCIe
-        return false;
-    }
+    auto edge_layer = vdma::ContinuousEdgeLayer::create(std::dynamic_pointer_cast<vdma::ContinuousBuffer>(buffer),
+        buffer_size, buffer_offset, page_size, descs_count);
+    CHECK_EXPECTED(edge_layer);
 
-    switch (streaming_type) {
-    case StreamingType::BURST:
-        // On burst (aka inter-context), because the buffers are big (And depends on the max_batch_size), we currently
-        // don't want to use CCB by default.
-        if (nullptr != std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) {
-            LOGGER__WARNING("Using desc instead of CCB for inter context channels is not optimal for performance.\n");
-            return false;
-        } else {
-            return true;
-        }
-    case StreamingType::CIRCULAR_CONTINUOS:
-        // On circular_continuous (aka ddr), the buffers are relatively small and we want to verify the C2C mechanism,
-        // therefore the CCB is the default behaviour.
-        // Due to request from the DFC group (Memory issues) - DDR buffers would run over DESC and not CCB buffers.
-        if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")) {
-            LOGGER__INFO("Using Non default buffer type (CCB instead of DESC) for ddr channel. \n");
-            return true;
-        } else {
-            return false;
-        }
-    }
+    auto edge_layer_ptr = make_unique_nothrow<vdma::ContinuousEdgeLayer>(edge_layer.release());
+    CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    // Shouldn't reach here
-    assert(false);
-    return false;
+    return std::unique_ptr<vdma::VdmaEdgeLayer>(std::move(edge_layer_ptr));
 }
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp
index aebf2ab5..8661cafa 100644
--- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp
@@ -13,8 +13,8 @@
 #include "hailo/expected.hpp"
 #include "hailo/buffer.hpp"
 
-#include "os/hailort_driver.hpp"
-#include "vdma/memory/vdma_buffer.hpp"
+#include "vdma/driver/hailort_driver.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
 
 #include "control_protocol.h"
 
@@ -34,24 +34,25 @@ class IntermediateBuffer final {
     };
 
     static Expected<IntermediateBuffer> create(HailoRTDriver &driver, uint32_t transfer_size,
-        uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type);
+        uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
+        std::shared_ptr<vdma::VdmaBuffer> &&buffer, size_t buffer_offset);
 
     Expected<Buffer> read();
     CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const;
 
 private:
-    IntermediateBuffer(std::unique_ptr<vdma::VdmaBuffer> &&buffer, uint32_t transfer_size, uint16_t batch_size);
-
-    static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_sg_buffer(HailoRTDriver &driver,
-        uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular);
-    static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_ccb_buffer(HailoRTDriver &driver,
-        uint32_t transfer_size, uint16_t batch_size, bool is_circular);
-    static Expected<std::unique_ptr<vdma::VdmaBuffer>> create_buffer(HailoRTDriver &driver, uint32_t transfer_size,
-        uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type);
-
-    static bool should_use_ccb(HailoRTDriver &driver, StreamingType streaming_type);
-
-    std::unique_ptr<vdma::VdmaBuffer> m_buffer;
+    IntermediateBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&buffer, uint32_t transfer_size, uint16_t batch_size);
+
+    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_sg_edge_layer(std::shared_ptr<vdma::VdmaBuffer> &&buffer,
+        size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size,
+        vdma::ChannelId d2h_channel_id, bool is_circular);
+    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_ccb_edge_layer(std::shared_ptr<vdma::VdmaBuffer> &&buffer,
+        size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size, bool is_circular);
+    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_edge_layer(std::shared_ptr<vdma::VdmaBuffer> &&buffer,
+        size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t max_batch_size,
+        vdma::ChannelId d2h_channel_id, StreamingType streaming_type);
+
+    std::unique_ptr<vdma::VdmaEdgeLayer> m_edge_layer;
     const uint32_t m_transfer_size;
     uint16_t m_dynamic_batch_size;
 };
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp
new file mode 100644
index 00000000..3dd26db8
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp
@@ -0,0 +1,327 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file internal_buffer_manager.hpp
+ * @brief Planner for all the internal buffers of the CoreOp
+ *
+ * The manager will hold all the internal buffers of the CoreOp.
+ * The manager can optimize the memory consumption of the core op and provide API
+ * about the total internal memory consumption.
+ *
+  **/
+
+#include "internal_buffer_manager.hpp"
+#include "hef/layer_info.hpp"
+#include "vdma/memory/sg_buffer.hpp"
+#include "vdma/memory/continuous_buffer.hpp"
+#include "vdma/memory/buffer_requirements.hpp"
+
+
+#include <numeric>
+
+namespace hailort
+{
+
+// Macros that check status. If status is HAILO_CANT_MEET_BUFFER_REQUIREMENTS, return without printing error to the prompt.
+#define CHECK_EXPECTED_OUT_OF_CMA_MEMORY(type) if (HAILO_OUT_OF_HOST_CMA_MEMORY == (type).status()) {return make_unexpected(HAILO_OUT_OF_HOST_CMA_MEMORY);} CHECK_SUCCESS(type);
+
+Expected<std::shared_ptr<InternalBufferManager>> InternalBufferManager::create(HailoRTDriver &driver,
+    const ConfigureNetworkParams &config_params)
+{
+
+    auto buffer_manager_ptr = make_shared_nothrow<InternalBufferManager>(InternalBufferManager(driver, config_params));
+    CHECK_NOT_NULL_AS_EXPECTED(buffer_manager_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return buffer_manager_ptr;
+}
+
+InternalBufferManager::InternalBufferManager(HailoRTDriver &driver, const ConfigureNetworkParams &config_params)
+    : m_driver(driver),
+      m_config_params(config_params),
+      m_edge_layer_infos(),
+      m_edge_layer_to_buffer_map()
+    {}
+
+
+void InternalBufferManager::add_buffer_info(const EdgeLayerKey &edge_layer_key, const EdgeLayerInfo &buffer_info)
+{
+    m_edge_layer_infos.emplace(edge_layer_key, buffer_info);
+}
+
+Expected<uint16_t> InternalBufferManager::get_network_batch_size(const std::string &network_name) const
+{
+    for (auto const &network_map : m_config_params.network_params_by_name) {
+        auto const network_name_from_params = network_map.first;
+        if (network_name_from_params == network_name) {
+            auto actual_batch_size = network_map.second.batch_size;
+            if (HAILO_DEFAULT_BATCH_SIZE == actual_batch_size) {
+                actual_batch_size = DEFAULT_ACTUAL_BATCH_SIZE;
+            }
+            return actual_batch_size;
+        }
+    }
+
+    LOGGER__ERROR("Failed to find network with network name {}", network_name);
+
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
+hailo_status InternalBufferManager::add_inter_context_buffer(const LayerInfo &layer_info)
+{
+    // This API gets the inter context input Layer, but the key is the output layer.
+    // The reason is that there is one output edge layer and multiple input edge layers.
+    // We must get the info of all the inputs in order to set the right start and end contexts,
+    // but the key must the the output (from the connected context info).
+
+    // layer_info.connected_context_info.context_index == start context
+    // layer_info.context_index == end context
+    const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
+    TRY(auto batch_size, get_network_batch_size(layer_info.network_name));
+    static const bool BUFFER_REUSE = true;
+
+    auto edge_layer_key =
+        std::make_pair(layer_info.connected_context_info.context_index, layer_info.connected_context_info.stream_index);
+    // First check if there is a key (for the case of one output multiple inputs).
+
+    const auto it = m_edge_layer_infos.find(edge_layer_key);
+    if (it != m_edge_layer_infos.end()) {
+        CHECK(it->second.transfer_size == transfer_size, HAILO_INTERNAL_FAILURE,
+            "Found two edge layers with the same key but different transfer size");
+        CHECK(it->second.max_transfers_in_batch == batch_size, HAILO_INTERNAL_FAILURE,
+            "Found two edge layers with the same key but different batch size");
+        // Now if the new end context is bigger than the old one, update it.
+        if (it->second.end_context < layer_info.context_index) {
+            it->second.end_context = layer_info.context_index;
+        }
+    } else {
+        LOGGER__DEBUG("Adding edge layer with key ({}, {}) to the internal buffer manager", edge_layer_key.first, edge_layer_key.second);
+        add_buffer_info(edge_layer_key,
+            EdgeLayerInfo{
+                layer_info.type,
+                transfer_size,
+                batch_size,
+                layer_info.connected_context_info.context_index,
+                layer_info.context_index,
+                BUFFER_REUSE});
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status InternalBufferManager::add_ddr_buffer(const LayerInfo &layer_info)
+{
+    // In DDR - always use core bytes per buffer as row size
+    const auto row_size = static_cast<uint16_t>(layer_info.nn_stream_config.core_bytes_per_buffer);
+    const auto min_buffered_rows = layer_info.ddr_info.min_buffered_rows;
+    static auto const BUFFER_REUSE = true;
+    auto edge_layer_key = std::make_pair(layer_info.context_index, layer_info.stream_index);
+
+    auto it = m_edge_layer_infos.find(edge_layer_key);
+    CHECK(it == m_edge_layer_infos.end(), HAILO_INTERNAL_FAILURE,
+        "Found two edge layers with the same key for DDR layer. This is not supported.");
+
+    add_buffer_info(edge_layer_key,
+        EdgeLayerInfo{
+            layer_info.type,
+            row_size,
+            min_buffered_rows,
+            layer_info.context_index,
+            layer_info.connected_context_info.context_index,
+            BUFFER_REUSE});
+
+    return HAILO_SUCCESS;
+}
+
+// For edge layers
+hailo_status InternalBufferManager::add_layer_buffer_info(const LayerInfo &layer_info)
+{
+    switch (layer_info.type) {
+        case LayerType::INTER_CONTEXT:
+            return add_inter_context_buffer(layer_info);
+        case LayerType::DDR:
+            return add_ddr_buffer(layer_info);
+        default:
+            LOGGER__ERROR("Unsupported layer type for InternalBufferManager");
+            return HAILO_INTERNAL_FAILURE;
+    }
+}
+
+hailo_status InternalBufferManager::add_config_buffer_info(const uint16_t context_index, const size_t config_stream_index,
+    const std::vector<uint32_t> &cfg_sizes)
+{
+    static const bool NO_REUSE = false;
+    static const auto SINGLE_TRANSFER_PER_BATCH = 1;
+    auto edge_layer_key = std::make_pair(static_cast<uint16_t>(context_index), static_cast<uint8_t>(MAX_EDGE_LAYERS_PER_CONTEXT + config_stream_index));
+    const auto buffer_size = static_cast<uint32_t>(std::accumulate(cfg_sizes.begin(), cfg_sizes.end(), 0));
+    add_buffer_info(edge_layer_key,
+        EdgeLayerInfo{
+            LayerType::CFG,
+            buffer_size,
+            SINGLE_TRANSFER_PER_BATCH,
+            context_index,
+            context_index,
+            NO_REUSE});
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<vdma::VdmaBuffer>> InternalBufferManager::create_intermediate_sg_buffer(
+    const size_t buffer_size)
+{
+    auto buffer = vdma::SgBuffer::create(m_driver, buffer_size, HailoRTDriver::DmaDirection::BOTH);
+    CHECK_EXPECTED(buffer);
+
+    auto buffer_ptr = make_shared_nothrow<vdma::SgBuffer>(buffer.release());
+    CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return std::shared_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
+}
+
+Expected<std::shared_ptr<vdma::VdmaBuffer>> InternalBufferManager::create_intermediate_ccb_buffer(
+    const size_t buffer_size)
+{
+    auto buffer = vdma::ContinuousBuffer::create(buffer_size, m_driver);
+    CHECK_EXPECTED_OUT_OF_CMA_MEMORY(buffer);
+
+    auto buffer_ptr = make_shared_nothrow<vdma::ContinuousBuffer>(buffer.release());
+    CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return std::shared_ptr<vdma::VdmaBuffer>(std::move(buffer_ptr));
+}
+
+Expected<std::shared_ptr<vdma::VdmaBuffer>> InternalBufferManager::create_intermediate_buffer(
+    vdma::VdmaBuffer::Type &buffer_type, const size_t buffer_size)
+{
+    if (vdma::VdmaBuffer::Type::CONTINUOUS == buffer_type) {
+        return create_intermediate_ccb_buffer(buffer_size);
+    }
+    return create_intermediate_sg_buffer(buffer_size);
+}
+
+void InternalBufferManager::print_execution_results(const BufferPlanReport &default_planner_report,
+    bool default_planner_meet_requirements, const BufferPlanReport &executed_buffers_report)
+{
+    if (!default_planner_meet_requirements) {
+        LOGGER__INFO("Default Internal buffer planner failed to meet requirements");
+    } else {
+        LOGGER__INFO("Planned internal buffer memory: CMA memory {}, user memory {}. memory to edge layer usage factor is {}",
+            default_planner_report.cma_memory, default_planner_report.user_memory, default_planner_report.memory_utilization_factor);
+    }
+
+    auto default_plan_executed = (default_planner_report.cma_memory == executed_buffers_report.cma_memory) &&
+        (default_planner_report.user_memory == executed_buffers_report.user_memory);
+
+    if (default_plan_executed) {
+        LOGGER__INFO("Default Internal buffer planner executed successfully");
+    } else {
+        LOGGER__INFO("executed internal buffer memory: CMA memory {}, user memory {}. memory to edge layer usage factor is {}",
+            executed_buffers_report.cma_memory, executed_buffers_report.user_memory, executed_buffers_report.memory_utilization_factor);
+    }
+}
+
+hailo_status InternalBufferManager::plan_and_execute(InternalBufferPlanner::Type default_planner_type,
+    const size_t number_of_contexts)
+{
+    // Create buffer planning
+    auto planner_type = default_planner_type;
+    // copy of initial edge layers
+    auto edge_layers = m_edge_layer_infos;
+    // Vector of executed buffers from the planning
+    InternalBufferPlanning buffers_executed;
+    // Default planner report
+    BufferPlanReport default_planner_report {};
+    bool default_planner_meet_requirements = false;
+
+    while (!edge_layers.empty()) {
+        CHECK(InternalBufferPlanner::Type::INVALID != planner_type, HAILO_CANT_MEET_BUFFER_REQUIREMENTS,
+            "Cannot find an executable buffer planning for the given edge layers");
+
+        LOGGER__DEBUG("Trying to plan with planner type {}", static_cast<uint8_t>(planner_type));
+        auto buffer_planning_exp = InternalBufferPlanner::create_buffer_planning(edge_layers, planner_type,
+            m_driver.dma_type(), m_driver.desc_max_page_size(), number_of_contexts);
+        if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == buffer_planning_exp.status()) {
+            // If planner failed, Try to go to next planner
+            LOGGER__DEBUG("Can't plan with planner type {}", static_cast<uint8_t>(planner_type));
+            planner_type = static_cast<InternalBufferPlanner::Type>((static_cast<uint8_t>(planner_type)) + 1);
+            continue;
+        }
+        auto buffer_planning = buffer_planning_exp.release();
+
+        if (planner_type == default_planner_type) {
+            default_planner_meet_requirements = true;
+            default_planner_report = InternalBufferPlanner::report_planning_info(buffer_planning);
+        }
+
+        std::vector<EdgeLayerKey> edge_layers_executed;
+        auto status = execute_plan(buffer_planning, edge_layers_executed, buffers_executed);
+        // Don't return error if out of CMA host memory. Try to go to next plan.
+        if (HAILO_OUT_OF_HOST_CMA_MEMORY != status) {
+            CHECK_SUCCESS(status);
+        }
+
+        // Remove executed edge layers from edge layers
+        for (const auto &edge_layer_key : edge_layers_executed) {
+            edge_layers.erase(edge_layer_key);
+        }
+
+        if (!edge_layers.empty()) {
+            LOGGER__DEBUG("Execute of plan type {} didn't finish. Moving to next planner ", static_cast<uint8_t>(planner_type));
+        } else {
+            LOGGER__DEBUG("Execute finished successfully");
+        }
+        // Move to next planner
+        planner_type = static_cast<InternalBufferPlanner::Type>((static_cast<uint8_t>(planner_type)) + 1);
+    }
+
+    const auto executed_buffers_report = InternalBufferPlanner::report_planning_info(buffers_executed);
+
+    print_execution_results(default_planner_report, default_planner_meet_requirements, executed_buffers_report);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status InternalBufferManager::execute_plan(InternalBufferPlanning &buffer_planning,
+    std::vector<EdgeLayerKey> &edge_layers_executed, InternalBufferPlanning &buffers_executed)
+{
+    // Verify no buffers were allocated yet
+    assert(m_edge_layer_to_buffer_map.empty());
+
+    auto execution_status = HAILO_SUCCESS;
+
+    // Go over plan and create buffers
+    for (auto &buffer_plan : buffer_planning) {
+        auto buffer_ptr = create_intermediate_buffer(buffer_plan.buffer_type, buffer_plan.buffer_size);
+        if (buffer_ptr.status() == HAILO_OUT_OF_HOST_CMA_MEMORY) {
+            execution_status = buffer_ptr.status();
+            // If one of the buffer failed due to lack to memory, try to move to next buffer.
+            continue;
+        }
+        for (const auto &edge_layer_offset : buffer_plan.edge_layer_offsets) {
+            m_edge_layer_to_buffer_map.emplace(
+                edge_layer_offset.first,
+                EdgeLayerToBufferMap{buffer_ptr.value(), edge_layer_offset.second});
+        }
+        // Add edge layers to executed list
+        for (const auto &edge_layer_info : buffer_plan.edge_layer_infos) {
+            edge_layers_executed.emplace_back(edge_layer_info.first);
+        }
+
+        // Add buffer to executed list
+        buffers_executed.emplace_back(buffer_plan);
+    }
+
+    return execution_status;
+}
+
+Expected<EdgeLayerToBufferMap> InternalBufferManager::get_intermediate_buffer(const EdgeLayerKey &key)
+{
+    const auto buffer_it = m_edge_layer_to_buffer_map.find(key);
+    if (std::end(m_edge_layer_to_buffer_map) == buffer_it) {
+        return make_unexpected(HAILO_NOT_FOUND);
+    }
+
+    return Expected<EdgeLayerToBufferMap>(buffer_it->second);
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp
new file mode 100644
index 00000000..5a93200c
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp
@@ -0,0 +1,76 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file internal_buffer_manager.hpp
+ * @brief Planner for all the internal buffers of the CoreOp
+ *
+ * The manager will hold all the internal buffers of the CoreOp.
+ * The manager can optimize the memory consumption of the core op and provide API
+ * about the total internal memory consumption.
+ *
+  **/
+
+#ifndef _HAILO_INTERNAL_BUFFER_MANAGER_HPP_
+#define _HAILO_INTERNAL_BUFFER_MANAGER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/hef.hpp"
+#include "common/utils.hpp"
+#include "hef/layer_info.hpp"
+#include "vdma/memory/vdma_buffer.hpp"
+#include "internal_buffer_planner.hpp"
+
+
+namespace hailort
+{
+
+#define MAX_EDGE_LAYERS_PER_CONTEXT (20)
+
+class InternalBufferManager final
+{
+public:
+    static Expected<std::shared_ptr<InternalBufferManager>> create(HailoRTDriver &driver,
+        const ConfigureNetworkParams &config_params);
+
+    hailo_status add_config_buffer_info(const uint16_t context_index, const size_t config_stream_index,
+        const std::vector<uint32_t> &cfg_sizes);
+    hailo_status add_layer_buffer_info(const LayerInfo &layer_info);
+    Expected<EdgeLayerToBufferMap> get_intermediate_buffer(const EdgeLayerKey &key);
+    hailo_status plan_and_execute(InternalBufferPlanner::Type default_planner_type, const size_t number_of_contexts);
+private:
+
+    // Add buffer info phase functions
+    void add_buffer_info(const EdgeLayerKey &edge_layer_key, const EdgeLayerInfo &buffer_info);
+    hailo_status add_inter_context_buffer(const LayerInfo &layer_info);
+    hailo_status add_ddr_buffer(const LayerInfo &layer_info);
+    Expected<uint16_t> get_network_batch_size(const std::string &network_name) const;
+
+    // Execute phase functions
+    hailo_status execute_plan(InternalBufferPlanning &buffer_planning,
+        std::vector<EdgeLayerKey> &edge_layers_executed, InternalBufferPlanning &buffers_executed);
+    Expected<std::shared_ptr<vdma::VdmaBuffer>> create_intermediate_buffer(
+        vdma::VdmaBuffer::Type &buffer_type, const size_t buffer_size);
+    Expected<std::shared_ptr<vdma::VdmaBuffer>> create_intermediate_ccb_buffer(
+        const size_t buffer_size);
+    Expected<std::shared_ptr<vdma::VdmaBuffer>> create_intermediate_sg_buffer(
+        const size_t buffer_size);
+
+    // Reporting functions
+    void print_execution_results(const BufferPlanReport &default_planner_report,
+        bool default_planner_meet_requirements, const BufferPlanReport &executed_buffers_report);
+
+    HailoRTDriver &m_driver;
+    const ConfigureNetworkParams &m_config_params;
+    // m_edge_layer_infos is filled by add_buffer_info API
+    std::map<EdgeLayerKey, EdgeLayerInfo> m_edge_layer_infos;
+
+    std::map<EdgeLayerKey, EdgeLayerToBufferMap> m_edge_layer_to_buffer_map;
+
+    InternalBufferManager(HailoRTDriver &driver, const ConfigureNetworkParams &config_params);
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_INTERNAL_BUFFER_MANAGER_HPP_ */
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp
new file mode 100644
index 00000000..bf369481
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp
@@ -0,0 +1,403 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file internal_buffer_planner.cpp
+ * @brief Planner for all the internal buffers of the CoreOp
+ *
+ * The planner hold the algorithms to plan connection between buffers and edge layer
+ *
+  **/
+
+#include "vdma/memory/buffer_requirements.hpp"
+#include "internal_buffer_planner.hpp"
+
+#include <numeric>
+
+constexpr size_t NAIVE_PLANNING_EDGE_LAYER_OFFSET = 0;
+
+// Macros that check status. If status is HAILO_CANT_MEET_BUFFER_REQUIREMENTS, return without printing error to the prompt.
+#define CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(type) if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == type.status()) {return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS);} CHECK_SUCCESS(type);
+#define CHECK_STATUS_CANT_MEET_REQUIREMENTS(status) if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == status) {return make_unexpected(status);} CHECK_SUCCESS(status);
+
+namespace hailort
+{
+
+bool InternalBufferPlanner::should_edge_layer_use_ccb(const LayerType &layer_type, HailoRTDriver::DmaType dma_type, 
+    bool force_sg_buffer_type)
+{
+    if (HailoRTDriver::DmaType::PCIE == dma_type) {
+        // CCB not supported on PCIe
+        return false;
+    }
+
+    if (force_sg_buffer_type) {
+        return false;
+    }
+
+    switch (layer_type) {
+    case LayerType::INTER_CONTEXT:
+        // On burst (aka inter-context), because the buffers are big (And depends on the max_batch_size), we currently
+        // don't want to use CCB by default.
+        if (nullptr != std::getenv("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")) {
+            LOGGER__WARNING("Using desc instead of CCB for inter context channels is not optimal for performance.");
+            return false;
+        } else {
+            return true;
+        }
+    case LayerType::DDR:
+        // On circular_continuous (aka ddr), the buffers are relatively small and we want to verify the C2C mechanism,
+        // therefore the CCB is the default behaviour.
+        // Due to request from the DFC group (Memory issues) - DDR buffers would run over DESC and not CCB buffers.
+        if (nullptr != std::getenv("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")) {
+            LOGGER__WARNING("Using Non default buffer type (CCB instead of DESC) for ddr channel.");
+            return true;
+        } else {
+            return false;
+        }
+    case LayerType::CFG:
+        if (nullptr != std::getenv("HAILO_FORCE_CONF_CHANNEL_OVER_DESC")) {
+            LOGGER__WARNING("Using desc instead of CCB for config channel is not optimal for performance.");
+            return false;
+        }
+        else {
+            return true;
+        }
+    default:
+        // Shouldn't reach here
+        assert(false);
+        return false;
+    }
+}
+
+Expected<InternalBufferPlanning> InternalBufferPlanner::create_naive_buffer_planning(
+    const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos, HailoRTDriver::DmaType dma_type,
+    uint16_t max_page_size, bool force_sg_buffer_type)
+{
+    InternalBufferPlanning buffer_planning;
+
+    // Sort edge layers by size - Start with the biggest buffer
+    auto sorted_edge_layer_vector = sort_edge_layers_by_size(edge_layer_infos);
+    for (const auto &edge_layer_info : sorted_edge_layer_vector) {
+        // Naive planning - Buffer holds only one transfer pattern and one edge layer
+        std::vector<std::pair<EdgeLayerKey, size_t>> edge_layer_offsets;
+        std::map<EdgeLayerKey, EdgeLayerInfo> plan_edge_layer_infos;
+        plan_edge_layer_infos.emplace(edge_layer_info.first, edge_layer_info.second);
+        edge_layer_offsets.emplace_back(edge_layer_info.first, NAIVE_PLANNING_EDGE_LAYER_OFFSET);
+        vdma::VdmaBuffer::Type buffer_type = should_edge_layer_use_ccb(edge_layer_info.second.type, dma_type, force_sg_buffer_type) ?
+            vdma::VdmaBuffer::Type::CONTINUOUS : vdma::VdmaBuffer::Type::SCATTER_GATHER;
+        const auto buffer_requirements = return_buffer_requirements(edge_layer_info.second, buffer_type, max_page_size);
+        CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(buffer_requirements);
+
+        buffer_planning.emplace_back(
+            BufferPlan{
+                buffer_type,
+                buffer_requirements->buffer_size(),
+                buffer_requirements->buffer_size(),
+                edge_layer_offsets,
+                plan_edge_layer_infos});
+    }
+    return buffer_planning;
+}
+
+std::vector<std::pair<EdgeLayerKey, EdgeLayerInfo>> InternalBufferPlanner::sort_edge_layers_by_size(
+    const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layers)
+{
+    std::vector<std::pair<EdgeLayerKey, EdgeLayerInfo>> sorted_edge_layers;
+    std::copy(edge_layers.begin(), edge_layers.end(), std::back_inserter<std::vector<std::pair<EdgeLayerKey, EdgeLayerInfo>>>(sorted_edge_layers));
+    std::sort(sorted_edge_layers.begin(), sorted_edge_layers.end(),
+        [](const std::pair<EdgeLayerKey, EdgeLayerInfo> &a, const std::pair<EdgeLayerKey, EdgeLayerInfo> &b) {
+            return a.second.transfer_size > b.second.transfer_size;
+        });
+    return sorted_edge_layers;
+}
+
+Expected<vdma::BufferSizesRequirements> InternalBufferPlanner::return_buffer_requirements(const EdgeLayerInfo &edge_layer,
+    const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size)
+{
+    // Calc actual size
+    static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
+    static const auto FORCE_BATCH_SIZE = true;
+    static const auto IS_VDMA_ALIGNED_BUFFER = true;
+    const auto is_circular = (LayerType::DDR == edge_layer.type);
+    auto buffer_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
+        buffer_type, max_page_size, edge_layer.max_transfers_in_batch,
+        edge_layer.max_transfers_in_batch, edge_layer.transfer_size, is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE,
+        FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
+    return buffer_requirements;
+}
+
+ContextBufferUsageSegments InternalBufferPlanner::merge_context_buffer_events(
+    ContextBufferUsageSegments& combined, const ContextBufferUsageSegments& added_buffers)
+{
+    // Combine the two vectors into one
+    combined.insert(combined.end(), added_buffers.begin(), added_buffers.end());
+
+    // Sort the combined vector by offset
+    std::sort(combined.begin(), combined.end(), [](const BufferUsageSegment& a, const BufferUsageSegment& b) {
+        return a.offset < b.offset;
+    });
+
+    // Merge overlapping buffers
+    ContextBufferUsageSegments merged;
+    for (const auto& buffer : combined) {
+        if (!merged.empty() && (merged.back().offset + merged.back().size >= buffer.offset)) {
+            // If the current buffer overlaps with the last buffer in the merged list,
+            // extend the size of the last buffer to include the current buffer
+            merged.back().size = std::max(merged.back().size, buffer.offset + buffer.size - merged.back().offset);
+        } else {
+            // If the current buffer does not overlap with the last buffer in the merged list,
+            // add it to the list
+            merged.push_back(buffer);
+        }
+    }
+
+    return merged;
+}
+
+size_t InternalBufferPlanner::find_new_buffer_offset(const ContextBufferUsageSegments& unified_buffers, size_t new_buffer_size,
+    uint16_t buffer_offset_alignment)
+{
+    // Try to find a gap in the list that is large enough to hold the new buffer
+    // If first buffer starts after 0, check the gap at the beginning of the list
+    const auto aligned_first_buffer_offset =
+        !unified_buffers.empty() ? (DIV_ROUND_DOWN(unified_buffers[0].offset, buffer_offset_alignment) * buffer_offset_alignment) : 0;
+    if (!unified_buffers.empty() && aligned_first_buffer_offset >= new_buffer_size) {
+        return 0;
+    }
+
+    const auto max_size = unified_buffers.empty() ? 0 : unified_buffers.back().offset + unified_buffers.back().size;
+    const auto aligned_max_size =  DIV_ROUND_UP(max_size, buffer_offset_alignment) * buffer_offset_alignment;
+    for (auto it = unified_buffers.begin(); it != unified_buffers.end(); ++it) {
+        const auto aligned_end_of_buffer =  DIV_ROUND_UP((it->offset + it->size), buffer_offset_alignment) * buffer_offset_alignment;
+        // Calculate the gap between the current buffer and the next buffer
+        size_t gap = ((it + 1 != unified_buffers.end()) ? ((it + 1)->offset) : (max_size)) - aligned_end_of_buffer;
+
+        // If the gap is large enough to hold the new buffer, insert the new buffer there
+        if (gap >= new_buffer_size) {
+            return aligned_end_of_buffer;
+        }
+    }
+
+    // If no suitable gap was found, add the new buffer to the end of the list (but aligned to page size).
+    return aligned_max_size;
+}
+
+std::vector<BufferUsageSegment> InternalBufferPlanner::build_availibility_map(
+    const std::vector<ContextBufferUsageSegments> &context_buffer_usage_vector, uint16_t start_context, uint16_t end_context)
+{
+    // Start with empty event vector
+    std::vector<BufferUsageSegment> unified_buffer_events = {};
+    for (size_t context_index = start_context; context_index <= end_context; context_index++) {
+        unified_buffer_events = merge_context_buffer_events(unified_buffer_events, context_buffer_usage_vector[context_index]);
+    }
+
+    return unified_buffer_events;
+}
+
+void update_buffer_to_context_map(std::vector<std::vector<BufferUsageSegment>> &context_buffer_usage_vector,
+    uint16_t start_context, uint16_t end_context, size_t buffer_offset, size_t buffer_size)
+{
+    // Don't have to sort here. Only the combined vector needs to be sorted.
+    for (uint16_t context_index = start_context; context_index <= end_context; context_index++) {
+        context_buffer_usage_vector[context_index].emplace_back(BufferUsageSegment{buffer_offset, buffer_size});
+    }
+}
+
+hailo_status InternalBufferPlanner::add_edge_layer_to_planning(
+    const std::pair<EdgeLayerKey, EdgeLayerInfo> &edge_layer,
+    std::vector<std::vector<BufferUsageSegment>> &context_buffer_usage_vector, BufferPlan &buffer_plan,
+    const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size)
+{
+    const auto buffer_requirements = return_buffer_requirements(edge_layer.second, buffer_type, max_page_size);
+    CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(buffer_requirements);
+
+    // Check if there is enough space in the current context buffer.
+    const auto start_context = edge_layer.second.start_context;
+    const auto end_context = edge_layer.second.end_context;
+    const auto buffer_map = build_availibility_map(context_buffer_usage_vector, start_context, end_context);
+
+    const auto edge_layer_size = buffer_requirements->buffer_size();
+    const auto buffer_offset_alignment = buffer_requirements->desc_page_size();
+    const auto buffer_offset = find_new_buffer_offset(buffer_map, edge_layer_size, buffer_offset_alignment);
+
+    auto end_of_edge_layer_offset = buffer_offset + edge_layer_size;
+    // Update buffer size if needed
+    buffer_plan.buffer_size = std::max(end_of_edge_layer_offset, buffer_plan.buffer_size);
+    // Update total edge layer size
+    buffer_plan.total_edge_layer_size += edge_layer_size;
+
+    // Add the buffer to the buffer plan
+    buffer_plan.edge_layer_offsets.emplace_back(edge_layer.first, buffer_offset);
+    buffer_plan.edge_layer_infos.emplace(edge_layer.first, edge_layer.second);
+
+    update_buffer_to_context_map(context_buffer_usage_vector, start_context, end_context, buffer_offset, edge_layer_size);
+
+    LOGGER__DEBUG("Added edge layer key {}:{} with size {} from context {} to context {} to offset {}",
+        edge_layer.first.first, edge_layer.first.second, edge_layer_size, start_context, end_context, buffer_offset);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<InternalBufferPlanning> InternalBufferPlanner::create_single_buffer_planning(
+    const std::map<EdgeLayerKey, EdgeLayerInfo> &sg_edge_layers, size_t number_of_contexts,
+    const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size)
+{
+    InternalBufferPlanning buffer_planning;
+    // Trying to reserve one buffer only.
+    buffer_planning.reserve(1);
+    // Allocate plan for one buffer
+    BufferPlan buffer_plan;
+    // Buffer type is SG
+    buffer_plan.buffer_type = buffer_type;
+    // Init buffer with size 0
+    buffer_plan.buffer_size = 0;
+    buffer_plan.total_edge_layer_size = 0;
+
+    auto sorted_edge_layer_vector = sort_edge_layers_by_size(sg_edge_layers);
+    std::vector<std::vector<BufferUsageSegment>> context_buffer_usage_vector(number_of_contexts);
+
+    for (auto &edge_layer : sorted_edge_layer_vector) {
+        auto status = add_edge_layer_to_planning(edge_layer, context_buffer_usage_vector, buffer_plan, buffer_type, max_page_size);
+        CHECK_STATUS_CANT_MEET_REQUIREMENTS(status);
+    }
+
+    // Update buffer planning
+    buffer_planning.emplace_back(buffer_plan);
+
+    return buffer_planning;
+}
+
+Expected<InternalBufferPlanning> InternalBufferPlanner::create_optimized_buffer_planning(
+    const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos, HailoRTDriver::DmaType dma_type,
+    uint16_t max_page_size, size_t number_of_contexts, bool force_sg_buffer_type)
+{
+    std::map<EdgeLayerKey, EdgeLayerInfo> ccb_edge_layers;
+    std::map<EdgeLayerKey, EdgeLayerInfo> sg_edge_layers;
+
+    // First - split between CCB and SG buffers
+    for (const auto &edge_layer_info : edge_layer_infos) {
+        if (should_edge_layer_use_ccb(edge_layer_info.second.type, dma_type, force_sg_buffer_type)) {
+            ccb_edge_layers.emplace(edge_layer_info.first, edge_layer_info.second);
+        } else {
+            sg_edge_layers.emplace(edge_layer_info.first, edge_layer_info.second);
+        }
+    }
+
+    InternalBufferPlanning buffer_planning;
+    // Second - create buffer planning for each buffer type
+    if (!ccb_edge_layers.empty()) {
+        auto ccb_buffer_planning =
+            create_single_buffer_planning(ccb_edge_layers, number_of_contexts, vdma::VdmaBuffer::Type::CONTINUOUS, max_page_size);
+        CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(ccb_buffer_planning);
+        buffer_planning.insert(buffer_planning.end(), ccb_buffer_planning->begin(), ccb_buffer_planning->end());
+    }
+
+    if (!sg_edge_layers.empty()) {
+        auto sg_buffer_planning =
+            create_single_buffer_planning(sg_edge_layers, number_of_contexts, vdma::VdmaBuffer::Type::SCATTER_GATHER, max_page_size);
+        CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(sg_buffer_planning);
+        buffer_planning.insert(buffer_planning.end(), sg_buffer_planning->begin(), sg_buffer_planning->end());
+    }
+
+    return buffer_planning;
+}
+
+Expected<InternalBufferPlanning> InternalBufferPlanner::create_buffer_planning(
+    const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos, Type plan_type,
+    HailoRTDriver::DmaType dma_type, uint16_t max_page_size, size_t number_of_contexts)
+{
+    static const bool FORCE_SG_BUFFER_TYPE = true;
+    // Force plan by user flag
+    if (nullptr != std::getenv("HAILO_FORCE_NAIVE_PER_BUFFER_TYPE_ALOCATION")) {
+        LOGGER__INFO("Forced buffer planning of type 'NAIVE_PER_BUFFER_TYPE.");
+        plan_type = Type::NAIVE_PER_BUFFER_TYPE;
+    }
+
+    switch (plan_type) {
+    case Type::SINGLE_BUFFER_PER_BUFFER_TYPE:
+        return create_optimized_buffer_planning(edge_layer_infos, dma_type, max_page_size, number_of_contexts);
+    case Type::SINGLE_SG_BUFFER:
+        return create_optimized_buffer_planning(edge_layer_infos, dma_type, max_page_size, number_of_contexts, FORCE_SG_BUFFER_TYPE);
+    case Type::NAIVE_PER_BUFFER_TYPE:
+        return create_naive_buffer_planning(edge_layer_infos, dma_type, max_page_size);
+    case Type::NAIVE_SG_BUFFER:
+        return create_naive_buffer_planning(edge_layer_infos, dma_type, max_page_size, FORCE_SG_BUFFER_TYPE);
+    default:
+        return make_unexpected(HAILO_INVALID_ARGUMENT);
+    }
+}
+
+BufferPlanReport InternalBufferPlanner::report_planning_info(const InternalBufferPlanning &buffer_planning)
+{
+    BufferPlanReport report = {};
+    report.cma_memory = 0;
+    report.user_memory = 0;
+    report.edge_layer_size = 0;
+
+    for (const auto &buffer_plan : buffer_planning) {
+        if (vdma::VdmaBuffer::Type::CONTINUOUS == buffer_plan.buffer_type) {
+            report.cma_memory += buffer_plan.buffer_size;
+        } else {
+            report.user_memory += buffer_plan.buffer_size;
+        }
+        report.edge_layer_size += buffer_plan.total_edge_layer_size;
+    }
+
+    report.memory_utilization_factor = (report.edge_layer_size > 0) ?
+        (static_cast<float>(report.cma_memory + report.user_memory) / static_cast<float>(report.edge_layer_size)) : 1;
+
+    return report;
+}
+
+Expected<EdgeLayerInfo> InternalBufferPlanner::get_edge_info_from_buffer_plan(const InternalBufferPlanning &buffer_planning,
+    const EdgeLayerKey &edge_layer_key)
+{
+    for (const auto &buffer_plan : buffer_planning) {
+        auto it = buffer_plan.edge_layer_infos.find(edge_layer_key);
+        if (it != buffer_plan.edge_layer_infos.end()) {
+            return Expected<EdgeLayerInfo>(it->second);
+        }
+    }
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
+hailo_status InternalBufferPlanner::change_edge_layer_buffer_offset(InternalBufferPlanning &buffer_planning,
+    const EdgeLayerKey &edge_layer_key, size_t new_offset, uint16_t max_page_size)
+{
+    TRY(auto edge_layer_info, get_edge_info_from_buffer_plan(buffer_planning, edge_layer_key));
+    for (auto &buffer_plan : buffer_planning) {
+        const auto buffer_requirements =  return_buffer_requirements(edge_layer_info, buffer_plan.buffer_type, max_page_size);
+        CHECK_EXPECTED_CANT_MEET_REQUIREMENTS(buffer_requirements);
+
+        for (auto &edge_layer_offset : buffer_plan.edge_layer_offsets) {
+            if (edge_layer_offset.first == edge_layer_key) {
+                edge_layer_offset.second = new_offset;
+                if (edge_layer_offset.second + buffer_requirements->buffer_size() > buffer_plan.buffer_size) {
+                    buffer_plan.buffer_size = edge_layer_offset.second + buffer_requirements->buffer_size();
+                }
+                return HAILO_SUCCESS;
+            }
+        }
+    }
+    return HAILO_INVALID_ARGUMENT;
+}
+
+Expected<size_t> InternalBufferPlanner::get_edge_layer_buffer_offset(const InternalBufferPlanning &buffer_planning,
+    const EdgeLayerKey &edge_layer_key)
+{
+    for (auto &buffer_plan : buffer_planning) {
+        auto it = buffer_plan.edge_layer_offsets.begin();
+        while (it != buffer_plan.edge_layer_offsets.end()) {
+            if (it->first == edge_layer_key) {
+                return Expected<size_t>(it->second);
+            }
+            it++;
+        }
+    }
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp
new file mode 100644
index 00000000..2e1a6508
--- /dev/null
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp
@@ -0,0 +1,135 @@
+/**
+ * Copyright (c) 2020-2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file internal_buffer_manager.hpp
+ * @brief Planner for all the internal buffers of the CoreOp
+ *
+ * The manager will hold all the internal buffers of the CoreOp.
+ * The manager can optimize the memory consumption of the core op and provide API
+ * about the total internal memory consumption.
+ *
+  **/
+
+#ifndef _HAILO_INTERNAL_BUFFER_PLANNER_HPP_
+#define _HAILO_INTERNAL_BUFFER_PLANNER_HPP_
+
+#include "hailo/hef.hpp"
+#include "common/utils.hpp"
+#include "hef/layer_info.hpp"
+#include "vdma/memory/vdma_buffer.hpp"
+#include "vdma/memory/buffer_requirements.hpp"
+
+namespace hailort
+{
+
+using EdgeLayerKey = std::pair<src_context_t, src_stream_index_t>;
+
+struct EdgeLayerInfo {
+    LayerType type;
+    uint32_t transfer_size;
+    uint16_t max_transfers_in_batch;
+    uint16_t start_context;
+    uint16_t end_context;
+    bool reuse_buffer;
+};
+
+struct EdgeLayerToBufferMap {
+    std::shared_ptr<vdma::VdmaBuffer> buffer;
+    size_t offset;
+};
+
+struct BufferPlan {
+    vdma::VdmaBuffer::Type buffer_type;
+    size_t buffer_size;
+    size_t total_edge_layer_size;
+    std::vector<std::pair<EdgeLayerKey, size_t>> edge_layer_offsets;
+    std::map<EdgeLayerKey, EdgeLayerInfo> edge_layer_infos;
+};
+
+struct BufferPlanReport {
+    size_t cma_memory;
+    size_t user_memory;
+    size_t edge_layer_size;
+    float memory_utilization_factor;
+};
+
+using InternalBufferPlanning = std::vector<BufferPlan>;
+
+
+// BufferUsageSegment is a struct that represents a segment of a buffer that is used in a specific context
+typedef struct {
+    size_t offset;
+    size_t size;
+} BufferUsageSegment;
+
+// ContextBufferUsageSegments represents all buffer segments that is used in a specific context
+using ContextBufferUsageSegments = std::vector<BufferUsageSegment>;
+
+class InternalBufferPlanner final
+{
+public:
+
+    enum class Type {
+        SINGLE_BUFFER_PER_BUFFER_TYPE = 0,
+        SINGLE_SG_BUFFER,
+        NAIVE_PER_BUFFER_TYPE,
+        NAIVE_SG_BUFFER,
+
+        // Must be last
+        INVALID,
+    };
+
+    // Planning functions
+    static Expected<InternalBufferPlanning> create_buffer_planning(
+        const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos, Type plan_type,
+        HailoRTDriver::DmaType dma_type, uint16_t max_page_size, size_t number_of_contexts);
+    static Expected<InternalBufferPlanning> create_naive_buffer_planning(
+        const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos, HailoRTDriver::DmaType dma_type,
+        uint16_t max_page_size, bool force_sg_type_buffer = false);
+    static Expected<InternalBufferPlanning> create_optimized_buffer_planning(
+        const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos, HailoRTDriver::DmaType dma_type,
+        uint16_t max_page_size, size_t number_of_contexts, bool force_sg_type_buffer = false);
+    // Reporting functions
+    static BufferPlanReport report_planning_info(const InternalBufferPlanning &buffer_planning);
+
+    // Debug API
+    static hailo_status change_edge_layer_buffer_offset(InternalBufferPlanning &buffer_planning, const EdgeLayerKey &edge_layer_key,
+        size_t new_offset, uint16_t max_page_size);
+    static Expected<size_t> get_edge_layer_buffer_offset(const InternalBufferPlanning &buffer_planning,
+        const EdgeLayerKey &edge_layer_key);
+
+private:
+
+    // Helper functions
+    static bool should_edge_layer_use_ccb(const LayerType &layer_type, HailoRTDriver::DmaType dma_type,
+        bool force_sg_type_buffer);
+    static std::vector<std::pair<EdgeLayerKey, EdgeLayerInfo>> sort_edge_layers_by_size(
+        const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layers);
+    static Expected<vdma::BufferSizesRequirements> return_buffer_requirements(
+        const EdgeLayerInfo &edge_layer, const vdma::VdmaBuffer::Type buffer_type,
+        uint16_t max_page_size);
+    static Expected<EdgeLayerInfo> get_edge_info_from_buffer_plan(const InternalBufferPlanning &buffer_planning,
+        const EdgeLayerKey &edge_layer_key);
+
+    // Planning phase functions
+    static ContextBufferUsageSegments merge_context_buffer_events(
+        ContextBufferUsageSegments& combined, const ContextBufferUsageSegments& added_buffers);
+    static size_t find_new_buffer_offset(const ContextBufferUsageSegments& unified_buffers, size_t new_buffer_size,
+        uint16_t buffer_offset_alignment);
+    static std::vector<BufferUsageSegment> build_availibility_map(
+        const std::vector<ContextBufferUsageSegments> &context_buffer_usage_vector, uint16_t start_context, uint16_t end_context);
+    static hailo_status add_edge_layer_to_planning(const std::pair<EdgeLayerKey, EdgeLayerInfo> &edge_layer,
+        std::vector<std::vector<BufferUsageSegment>> &context_buffer_usage_vector, BufferPlan &buffer_plan,
+        const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size);
+
+
+    static Expected<InternalBufferPlanning> create_single_buffer_planning(
+        const std::map<EdgeLayerKey, EdgeLayerInfo> &sg_edge_layers, size_t number_of_contexts,
+        const vdma::VdmaBuffer::Type buffer_type, uint16_t max_page_size);
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_INTERNAL_BUFFER_PLANNER_HPP_ */
diff --git a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
index 29846f93..b35878b6 100644
--- a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
@@ -8,7 +8,7 @@
  **/
 
 #include "periph_calculator.hpp"
-#include "device_common/device_internal.hpp"
+#include "hef/hef_internal.hpp"
 
 namespace hailort
 {
@@ -70,7 +70,7 @@ uint32_t PeriphCalculator::calculate_ddr_periph_buffers_per_frame(const LayerInf
 
 Expected<LayerInfo> PeriphCalculator::calculate_periph_registers_impl(const LayerInfo &layer_info,
     const uint32_t desc_page_size, const uint32_t max_periph_bytes_value, const bool is_core_hw_padding_config_in_dfc,
-    const ProtoHEFHwArch &hw_arch)
+    const HEFHwArch &hw_arch)
 {
     // Calculate periph according to hw shape - the shape the core is epecting to get
     const hailo_3d_image_shape_t& periph_shape = layer_info.hw_shape;
@@ -84,18 +84,22 @@ Expected<LayerInfo> PeriphCalculator::calculate_periph_registers_impl(const Laye
     const auto row_size = static_cast<uint32_t>(periph_shape.width * periph_shape.features * layer_info.hw_data_bytes);
     auto periph_frame_size = periph_shape.height * row_size;
 
-    // In case of core hw padding in DFC extension - hw shape might not be aligned - use aligned frame size and 
+    CHECK_AS_EXPECTED(desc_page_size < layer_info.max_shmifo_size, HAILO_INVALID_ARGUMENT,
+        "Cannot find possible periph buffer size solution since desc_page_size ({}) is equal or larger than max stream size ({}) for layer name {}",
+        desc_page_size, layer_info.max_shmifo_size, layer_info.name);
+
+    // In case of core hw padding in DFC extension - hw shape might not be aligned - use aligned frame size and
     // confgured periph registers will add / removed the extra padding
     if (is_core_hw_padding_config_in_dfc) {
         if (0 != (periph_frame_size % PERIPH_FRAME_ALIGNMENT)) {
             auto max_periph_padding_payload = HefConfigurator::max_periph_padding_payload_value(
                     DeviceBase::hef_arch_to_device_arch(hw_arch));
             CHECK_EXPECTED(max_periph_padding_payload);
-            
+
             // Currently case of payload larger than max periph padding payload value - not supported
             CHECK_AS_EXPECTED(max_periph_padding_payload.value() > periph_frame_size, HAILO_INVALID_HEF,
                 "Error, padded frame size larger than {} Currently not supported", max_periph_padding_payload.value());
-            
+
             const auto padded_periph_frame_size = HailoRTCommon::align_to(periph_frame_size,
                 static_cast<uint32_t>(PERIPH_FRAME_ALIGNMENT));
             // Configure periph padding registers
@@ -116,7 +120,7 @@ Expected<LayerInfo> PeriphCalculator::calculate_periph_registers_impl(const Laye
     }
     CHECK_AS_EXPECTED(0 != periph_bytes_per_buffer, HAILO_INVALID_ARGUMENT,
         "Error, Could not find valid periph bytes per buffer value");
-    
+
     // In ddr - the core make sure that row size is aligned to PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE but if a row
     // Is too large to fit in core bytes per buffer - they will divide it and put it in mutliple buffers - so in order to 
     // Get the exact size in periph buffers per frame - we must multiply core registers and divide by periph bytes per buffer
@@ -131,7 +135,7 @@ Expected<LayerInfo> PeriphCalculator::calculate_periph_registers_impl(const Laye
 }
 
 Expected<LayerInfo> PeriphCalculator::calculate_periph_registers(const LayerInfo &layer_info,
-    const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const ProtoHEFHwArch &hw_arch,
+    const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const HEFHwArch &hw_arch,
     const bool is_core_hw_padding_config_in_dfc)
 {
     auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch));
diff --git a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
index bfa487bb..5ca09112 100644
--- a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
@@ -13,7 +13,8 @@
 
 #include "common/utils.hpp"
 #include "hailo/hailort_common.hpp"
-#include "hef/hef_internal.hpp"
+#include "hef/layer_info.hpp"
+#include "device_common/device_internal.hpp"
 
 namespace hailort
 {
@@ -23,7 +24,7 @@ static const uint64_t PERIPH_FRAME_ALIGNMENT = 8;
 class PeriphCalculator {
 public:
     static Expected<LayerInfo> calculate_periph_registers(const LayerInfo &layer_info,
-        const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const ProtoHEFHwArch &hw_arch,
+        const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const HEFHwArch &hw_arch,
         const bool is_core_hw_padding_config_in_dfc);
 private:
     static bool is_valid_periph_bytes_value(const uint32_t periph_bytes_per_buffer, const uint32_t hw_frame_size,
@@ -32,7 +33,7 @@ class PeriphCalculator {
     static Expected<LayerInfo> calculate_nms_periph_registers(const LayerInfo &layer_info);
     static Expected<LayerInfo> calculate_periph_registers_impl(const LayerInfo &layer_info,
         const uint32_t desc_page_size, const uint32_t max_periph_bytes_value, 
-        const bool is_core_hw_padding_config_in_dfc, const ProtoHEFHwArch &hw_arch);
+        const bool is_core_hw_padding_config_in_dfc, const HEFHwArch &hw_arch);
     static uint32_t calculate_ddr_periph_buffers_per_frame(const LayerInfo &layer_info,
         const uint32_t periph_bytes_per_buffer);
 
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
index b1b74457..ab1aaecf 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
@@ -4,20 +4,22 @@
 #include "vdma/channel/boundary_channel.hpp"
 #include "vdma/memory/buffer_requirements.hpp"
 #include "device_common/control.hpp"
+#include "core_op/resource_manager/internal_buffer_manager.hpp"
 
 #include <numeric>
 
 #define HAILO15H_NMS_MAX_CLASSES (1024)
+#define MAX_NUM_CONTEXTS_FOR_CONTROL_BUILDER (64)
 
 namespace hailort
 {
 
 Expected<ContextResources> ContextResources::create(HailoRTDriver &driver,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, const std::vector<vdma::ChannelId> &config_channels_ids,
-    const ConfigBufferInfoMap &config_buffer_infos)
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index,
+    const std::vector<vdma::ChannelId> &config_channels_ids, const ConfigBufferInfoMap &config_buffer_infos,
+    std::shared_ptr<InternalBufferManager> internal_buffer_manager)
 {
     CHECK_AS_EXPECTED(context_type < CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_COUNT, HAILO_INVALID_ARGUMENT);
-
     CHECK_AS_EXPECTED(config_buffer_infos.size() <= config_channels_ids.size(), HAILO_INTERNAL_FAILURE,
         "config_buffer_infos size ({}) is bigger than config_channels_id count  ({})",
         config_buffer_infos.size(), config_channels_ids.size());
@@ -29,19 +31,11 @@ Expected<ContextResources> ContextResources::create(HailoRTDriver &driver,
             config_buffer_infos.at(config_stream_index));
         CHECK_EXPECTED(buffer_resource);
         config_buffers.emplace_back(buffer_resource.release());
-    }
-
-    return ContextResources(driver, context_type, std::move(config_buffers));
-}
 
-const std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> &ContextResources::get_controls() const
-{
-    return m_builder.get_controls();
-}
+        internal_buffer_manager->add_config_buffer_info(context_index, config_stream_index, config_buffer_infos.at(config_stream_index));
+    }
 
-ContextSwitchBufferBuilder &ContextResources::builder()
-{
-    return m_builder;
+    return ContextResources(driver, context_type, std::move(config_buffers), internal_buffer_manager);
 }
 
 hailo_status ContextResources::add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
@@ -145,7 +139,7 @@ hailo_status ContextResources::validate_edge_layer(const LayerInfo &layer_info,
 
         // In Activation Context it is ok to have multiple edge layers with same stream index seeing as they could be for
         // Different contexts etc...
-        if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION != m_builder.get_context_type()) {
+        if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION != get_context_type()) {
             if (edge_layer.layer_info.stream_index == layer_info.stream_index) {
                 // Validate that the amount of edge layers with the same stream index per context is 2 (And with opposite directions)
                 // In the case of dual direction supported feature - otherwise 1
@@ -185,14 +179,18 @@ static Expected<LatencyMeterPtr> create_hw_latency_meter(const std::vector<Layer
             }
 
             d2h_channel_names.insert(layer.name);
-        }
-        else {
-            h2d_streams_count++;
+        } else {
+            if (layer.is_multi_planar) {
+                h2d_streams_count = h2d_streams_count + layer.planes.size();
+            } else {
+                h2d_streams_count++;
+            }
         }
     }
 
     if (h2d_streams_count > 1) {
-        LOGGER__WARNING("HW Latency measurement is supported on networks with a single input");
+        LOGGER__WARNING("HW Latency measurement is supported on networks with a single input. the model has {} physical inputs.",
+            h2d_streams_count);
         return make_unexpected(HAILO_INVALID_OPERATION);
     }
 
@@ -242,13 +240,20 @@ Expected<ResourcesManager> ResourcesManager::create(VdmaDevice &vdma_device, Hai
         config_channels_ids.push_back(channel_id.release());
     }
 
+    auto internal_buffer_manager = InternalBufferManager::create(driver, config_params);
+    CHECK_EXPECTED(internal_buffer_manager);
+
+    auto action_list_buffer_builder = create_action_list_buffer_builder(core_op_metadata->dynamic_contexts().size(),
+        driver);
+    CHECK_EXPECTED(action_list_buffer_builder);
+
     auto network_index_map = core_op_metadata->get_network_names();
 
     auto latency_meters = create_latency_meters_from_config_params(config_params, core_op_metadata);
     CHECK_EXPECTED(latency_meters);
     ResourcesManager resources_manager(vdma_device, driver, std::move(allocator), config_params,
-        std::move(core_op_metadata), core_op_index,
-        std::move(network_index_map), latency_meters.release(), std::move(config_channels_ids));
+        std::move(core_op_metadata), core_op_index, std::move(network_index_map), latency_meters.release(),
+        std::move(config_channels_ids), internal_buffer_manager.release(), action_list_buffer_builder.release());
 
     return resources_manager;
 }
@@ -258,7 +263,9 @@ ResourcesManager::ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &drive
                                    std::shared_ptr<CoreOpMetadata> &&core_op_metadata,
                                    uint8_t core_op_index, const std::vector<std::string> &&network_index_map,
                                    LatencyMetersMap &&latency_meters,
-                                   std::vector<vdma::ChannelId> &&config_channels_ids) :
+                                   std::vector<vdma::ChannelId> &&config_channels_ids,
+                                   std::shared_ptr<InternalBufferManager> internal_buffer_manager,
+                                   std::shared_ptr<ActionListBufferBuilder> &&action_list_buffer_builder) :
     m_contexts_resources(),
     m_channel_allocator(std::move(channel_allocator)),
     m_vdma_device(vdma_device),
@@ -273,8 +280,11 @@ ResourcesManager::ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &drive
     m_latency_meters(std::move(latency_meters)),
     m_boundary_channels(),
     m_is_configured(false),
+    m_is_activated(false),
     m_config_channels_ids(std::move(config_channels_ids)),
-    m_hw_only_boundary_buffers()
+    m_hw_only_boundary_buffers(),
+    m_internal_buffer_manager(std::move(internal_buffer_manager)),
+    m_action_list_buffer_builder(std::move(action_list_buffer_builder))
 {}
 
 ResourcesManager::ResourcesManager(ResourcesManager &&other) noexcept :
@@ -286,19 +296,24 @@ ResourcesManager::ResourcesManager(ResourcesManager &&other) noexcept :
     m_intermediate_buffers(std::move(other.m_intermediate_buffers)),
     m_core_op_metadata(std::move(other.m_core_op_metadata)),
     m_core_op_index(other.m_core_op_index),
-    m_dynamic_context_count(std::exchange(other.m_dynamic_context_count, static_cast<uint8_t>(0))),
-    m_total_context_count(std::exchange(other.m_total_context_count, static_cast<uint8_t>(0))),
+    m_dynamic_context_count(std::exchange(other.m_dynamic_context_count, static_cast<uint16_t>(0))),
+    m_total_context_count(std::exchange(other.m_total_context_count, static_cast<uint16_t>(0))),
     m_network_index_map(std::move(other.m_network_index_map)),
     m_latency_meters(std::move(other.m_latency_meters)),
     m_boundary_channels(std::move(other.m_boundary_channels)),
     m_is_configured(std::exchange(other.m_is_configured, false)),
+    m_is_activated(std::exchange(other.m_is_activated, false)),
     m_config_channels_ids(std::move(other.m_config_channels_ids)),
-    m_hw_only_boundary_buffers(std::move(other.m_hw_only_boundary_buffers))
+    m_hw_only_boundary_buffers(std::move(other.m_hw_only_boundary_buffers)),
+    m_internal_buffer_manager(std::move(other.m_internal_buffer_manager)),
+    m_action_list_buffer_builder(std::move(other.m_action_list_buffer_builder))
 {}
 
 hailo_status ResourcesManager::fill_infer_features(CONTROL_PROTOCOL__application_header_t &app_header)
 {
     app_header.infer_features.preliminary_run_asap = m_core_op_metadata->supported_features().preliminary_run_asap;
+    app_header.infer_features.batch_register_config = m_core_op_metadata->supported_features().batch_register_config;
+    app_header.infer_features.can_fast_batch_switch = m_core_op_metadata->get_can_fast_batch_switch();
     return HAILO_SUCCESS;
 }
 
@@ -343,7 +358,7 @@ hailo_status ResourcesManager::fill_network_batch_size(CONTROL_PROTOCOL__applica
 hailo_status ResourcesManager::fill_csm_buffer_size(CONTROL_PROTOCOL__application_header_t &app_header)
 {
     // All config buffers on the same platform will have the same desc_page_size - because it is derived from the host
-    app_header.csm_buffer_size = std::min(m_driver.desc_max_page_size(), vdma::DEFAULT_DESC_PAGE_SIZE);
+    app_header.csm_buffer_size = std::min(m_driver.desc_max_page_size(), vdma::DEFAULT_SG_PAGE_SIZE);
     return HAILO_SUCCESS;
 }
 
@@ -364,6 +379,11 @@ void ResourcesManager::process_interrupts(IrqData &&irq_data)
             continue;
         }
 
+        if (!channel_irq_data.validation_success) {
+            LOGGER__CRITICAL("Got validation error on channel {}", channel_irq_data.channel_id);
+            continue;
+        }
+
         if (!channel_irq_data.is_active) {
             LOGGER__CRITICAL("Channel {} was aborted by external source", channel_irq_data.channel_id);
             continue;
@@ -371,7 +391,7 @@ void ResourcesManager::process_interrupts(IrqData &&irq_data)
 
         auto status = boundary_channel->second->trigger_channel_completion(channel_irq_data.desc_num_processed);
         if ((status != HAILO_SUCCESS) &&
-            (status != HAILO_STREAM_ABORTED_BY_USER) &&
+            (status != HAILO_STREAM_ABORT) &&
             (status != HAILO_STREAM_NOT_ACTIVATED)) {
             // Log error and continue gracefully to process other interrupts
             LOGGER__ERROR("Trigger channel completion failed on channel {} with status {}", channel_irq_data.channel_id, status);
@@ -449,16 +469,21 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay
     // TODO - remove this WA after HRT-11747
     const uint16_t max_page_size = (m_driver.desc_max_page_size() == layer_info.max_shmifo_size) ?
         (m_driver.desc_max_page_size() / 2) : m_driver.desc_max_page_size();
-    auto buffer_sizes_requirements = vdma::BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
-        max_page_size, static_cast<uint16_t>(min_active_trans), static_cast<uint16_t>(max_active_trans),
-        transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE, DONT_FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
+    auto buffer_sizes_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
+        vdma::VdmaBuffer::Type::SCATTER_GATHER, max_page_size, static_cast<uint16_t>(min_active_trans),
+        static_cast<uint16_t>(max_active_trans), transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE,
+        DONT_FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
+    if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == buffer_sizes_requirements.status()) {
+        LOGGER__ERROR("Network shapes and batch size exceeds driver descriptors capabilities. "
+                "(A common cause for this error could be the batch size - which is {}).", network_batch_size.value());
+    }
     CHECK_EXPECTED_AS_STATUS(buffer_sizes_requirements);
 
     const auto page_size = buffer_sizes_requirements->desc_page_size();
     const auto descs_count = (nullptr != std::getenv("HAILO_CONFIGURE_FOR_HW_INFER")) ?
-        MAX_DESCS_COUNT : buffer_sizes_requirements->descs_count();
+        MAX_SG_DESCS_COUNT : buffer_sizes_requirements->descs_count();
 
-    auto channel = vdma::BoundaryChannel::create(channel_id.value(), channel_direction, m_vdma_device, descs_count,
+    auto channel = vdma::BoundaryChannel::create(channel_id.value(), channel_direction, m_driver, descs_count,
         page_size, layer_info.name, latency_meter);
     CHECK_EXPECTED_AS_STATUS(channel);
 
@@ -493,16 +518,19 @@ hailo_power_mode_t ResourcesManager::get_power_mode() const
     return m_config_params.power_mode;
 }
 
-ExpectedRef<IntermediateBuffer> ResourcesManager::create_intermediate_buffer(uint32_t transfer_size,
-    uint16_t batch_size, uint8_t src_stream_index, uint8_t src_context_index,
+ExpectedRef<IntermediateBuffer> ResourcesManager::create_intermediate_buffer(
+    uint32_t transfer_size, uint16_t batch_size, uint8_t src_stream_index, uint16_t src_context_index,
     vdma::ChannelId d2h_channel_id, IntermediateBuffer::StreamingType streaming_type)
 {
-    auto buffer = IntermediateBuffer::create(m_driver, transfer_size, batch_size, d2h_channel_id,
-        streaming_type);
-    CHECK_EXPECTED(buffer);
+    auto edge_layer_key = std::make_pair(src_context_index, src_stream_index);
+    TRY(auto buffer_info, m_internal_buffer_manager->get_intermediate_buffer(edge_layer_key));
+
+    auto intermediate_buffer = IntermediateBuffer::create(m_driver, transfer_size, batch_size, d2h_channel_id,
+        streaming_type, std::move(buffer_info.buffer), buffer_info.offset);
+    CHECK_EXPECTED(intermediate_buffer);
 
     const auto key = std::make_pair(src_context_index, src_stream_index);
-    auto emplace_res = m_intermediate_buffers.emplace(key, buffer.release());
+    auto emplace_res = m_intermediate_buffers.emplace(key, intermediate_buffer.release());
     return std::ref(emplace_res.first->second);
 }
 
@@ -530,20 +558,27 @@ Expected<CONTROL_PROTOCOL__application_header_t> ResourcesManager::get_control_c
     status = fill_csm_buffer_size(app_header);
     CHECK_SUCCESS_AS_EXPECTED(status, "Invalid csm buffer size");
 
+    const auto mapped_addr = get_action_list_buffer_builder()->get_mapped_buffer_dma_address();
+    CHECK(IS_FIT_IN_UINT32(mapped_addr), HAILO_INVALID_ARGUMENT, "Invalid Mapped Address {} must fit in uint32",
+        mapped_addr);
+    app_header.external_action_list_address = static_cast<uint32_t>(mapped_addr);
+
     return app_header;
 }
 
-Expected<std::reference_wrapper<ContextResources>> ResourcesManager::add_new_context(CONTROL_PROTOCOL__context_switch_context_type_t type,
+Expected<std::reference_wrapper<ContextResources>> ResourcesManager::add_new_context(
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, const uint16_t context_index,
     const ConfigBufferInfoMap &config_info)
 {
-    CHECK_AS_EXPECTED(m_total_context_count < std::numeric_limits<uint8_t>::max(), HAILO_INVALID_CONTEXT_COUNT);
+    CHECK_AS_EXPECTED(m_total_context_count < std::numeric_limits<uint16_t>::max(), HAILO_INVALID_CONTEXT_COUNT);
 
-    auto context_resources = ContextResources::create(m_driver, type, m_config_channels_ids, config_info);
+    auto context_resources = ContextResources::create(m_driver, context_type, context_index,
+        m_config_channels_ids, config_info, m_internal_buffer_manager);
     CHECK_EXPECTED(context_resources);
 
     m_contexts_resources.emplace_back(context_resources.release());
     m_total_context_count++;
-    if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC == type) {
+    if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC == context_type) {
         m_dynamic_context_count++;
     }
 
@@ -609,8 +644,11 @@ hailo_status ResourcesManager::configure()
     auto status = Control::context_switch_set_network_group_header(m_vdma_device, core_op_header.release());
     CHECK_SUCCESS(status);
 
-    for (const auto &context : m_contexts_resources) {
-        status = Control::context_switch_set_context_info(m_vdma_device, context.get_controls());
+    // Only send controls to FW in case of control action list builder
+    if (ActionListBufferBuilder::Type::CONTROL == get_action_list_buffer_builder()->get_builder_type()) {
+        const auto control_action_list = std::static_pointer_cast<ControlActionListBufferBuilder>(
+            get_action_list_buffer_builder());
+        status = Control::context_switch_set_context_info(m_vdma_device, control_action_list->get_controls());
         CHECK_SUCCESS(status);
     }
 
@@ -619,11 +657,21 @@ hailo_status ResourcesManager::configure()
 
 hailo_status ResourcesManager::enable_state_machine(uint16_t dynamic_batch_size, uint16_t batch_count)
 {
-    return Control::enable_core_op(m_vdma_device, m_core_op_index, dynamic_batch_size, batch_count);
+    CHECK_SUCCESS(Control::enable_core_op(m_vdma_device, m_core_op_index, dynamic_batch_size, batch_count));
+    // Enable over enable is possible (batch switch in the same NG), so there is no need to verify the state.
+    set_is_activated(true);
+
+    return HAILO_SUCCESS;
 }
 
 hailo_status ResourcesManager::reset_state_machine()
 {
+    if (!get_is_activated()) {
+        return HAILO_SUCCESS;
+    }
+
+    set_is_activated(false);
+
     auto status = Control::reset_context_switch_state_machine(m_vdma_device);
     CHECK_SUCCESS(status);
 
@@ -639,6 +687,8 @@ hailo_status ResourcesManager::reset_state_machine()
 
 hailo_status ResourcesManager::start_vdma_interrupts_dispatcher()
 {
+    CHECK(get_is_activated(), HAILO_INTERNAL_FAILURE, "Cannot call start_vdma_interrupts_dispatcher when core-op already deactivated");
+
     auto interrupts_dispatcher = m_vdma_device.get_vdma_interrupts_dispatcher();
     CHECK_EXPECTED_AS_STATUS(interrupts_dispatcher);
 
@@ -656,9 +706,31 @@ hailo_status ResourcesManager::start_vdma_interrupts_dispatcher()
 
 hailo_status ResourcesManager::stop_vdma_interrupts_dispatcher()
 {
-    auto interrupts_dispatcher = m_vdma_device.get_vdma_interrupts_dispatcher();
-    CHECK_EXPECTED_AS_STATUS(interrupts_dispatcher);
-    return interrupts_dispatcher->get().stop();
+    if (!get_is_activated()) {
+        return HAILO_SUCCESS;
+    }
+
+    TRY(auto interrupts_dispatcher, m_vdma_device.get_vdma_interrupts_dispatcher());
+    return interrupts_dispatcher.get().stop();
+}
+
+hailo_status ResourcesManager::start_vdma_transfer_launcher()
+{
+    CHECK(get_is_activated(), HAILO_INTERNAL_FAILURE, "Cannot call start_vdma_transfer_launcher when core-op already deactivated");
+    TRY(auto vdma_transfer_launcher, m_vdma_device.get_vdma_transfer_launcher());
+    vdma_transfer_launcher.get().start();
+    return HAILO_SUCCESS;
+}
+
+hailo_status ResourcesManager::stop_vdma_transfer_launcher()
+{
+    if (!get_is_activated()) {
+        return HAILO_SUCCESS;
+    }
+
+    TRY(auto vdma_transfer_launcher, m_vdma_device.get_vdma_transfer_launcher());
+    vdma_transfer_launcher.get().stop();
+    return HAILO_SUCCESS;
 }
 
 Expected<uint16_t> ResourcesManager::program_desc_for_hw_only_flow(std::shared_ptr<vdma::DescriptorList> desc_list,
@@ -668,7 +740,7 @@ Expected<uint16_t> ResourcesManager::program_desc_for_hw_only_flow(std::shared_p
     for (uint16_t batch_index = 0; batch_index < batch_count; batch_index++) {
         for (uint16_t transfer_index = 0; transfer_index < dynamic_batch_size; transfer_index++) {
             const auto last_desc_interrupts_domain = ((dynamic_batch_size - 1) == transfer_index) ?
-                vdma::InterruptsDomain::DEVICE : vdma::InterruptsDomain::NONE;
+                InterruptsDomain::DEVICE : InterruptsDomain::NONE;
             auto desc_count_local = desc_list->program_last_descriptor(single_transfer_size,
                 last_desc_interrupts_domain, acc_desc_offset);
             CHECK_EXPECTED(desc_count_local, "Failed to program descs for inter context channels. Given max_batch_size is too big.");
@@ -698,8 +770,9 @@ Expected<std::pair<vdma::ChannelId, uint16_t>> ResourcesManager::create_mapped_b
     CHECK_EXPECTED(mapped_buffer);
     m_hw_only_boundary_buffers.emplace_back(mapped_buffer.release());
 
-    uint32_t STARTING_DESC = 0;
-    auto status = desc_list->configure_to_use_buffer(*m_hw_only_boundary_buffers.back(), boundary_channel_ptr->get_channel_id(), STARTING_DESC);
+    static const auto DEFAULT_BUFFER_OFFSET = 0;
+    auto status = desc_list->configure_to_use_buffer(*m_hw_only_boundary_buffers.back(),
+        m_hw_only_boundary_buffers.back()->size(), DEFAULT_BUFFER_OFFSET, boundary_channel_ptr->get_channel_id());
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     auto desc_programed = program_desc_for_hw_only_flow(desc_list, single_transfer_size, dynamic_batch_size, batch_count);
@@ -842,4 +915,52 @@ Expected<HwInferResults> ResourcesManager::run_hw_only_infer()
         fw_infer_results.infer_cycles);
 }
 
+hailo_status ResourcesManager::fill_internal_buffers_info()
+{
+    for (const auto &context_metadata : m_core_op_metadata->dynamic_contexts()) {
+        for (const auto &layer_info : context_metadata.get_ddr_output_layers()) {
+            auto status = m_internal_buffer_manager->add_layer_buffer_info(layer_info);
+            CHECK_SUCCESS(status);
+        }
+        for (const auto &layer_info : context_metadata.get_inter_context_input_layers()) {
+            auto status = m_internal_buffer_manager->add_layer_buffer_info(layer_info);
+            CHECK_SUCCESS(status);
+        }
+    }
+
+    auto status = m_internal_buffer_manager->plan_and_execute(InternalBufferPlanner::Type::SINGLE_BUFFER_PER_BUFFER_TYPE,
+        m_core_op_metadata->dynamic_contexts().size());
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+bool ResourcesManager::should_use_ddr_action_list(size_t num_contexts, HailoRTDriver::DmaType dma_type)
+{
+    // Only allow env variable to affect in case of DmaType DRAM
+    if ((HailoRTDriver::DmaType::DRAM == dma_type) && ((MAX_NUM_CONTEXTS_FOR_CONTROL_BUILDER < num_contexts)
+        || (is_env_variable_on(DDR_ACTION_LIST_ENV_VAR, DDR_ACTION_LIST_ENV_VAR_VALUE, sizeof(DDR_ACTION_LIST_ENV_VAR_VALUE))))) {
+        return true;
+    }
+    return false;
+}
+
+Expected<std::shared_ptr<ActionListBufferBuilder>> ResourcesManager::create_action_list_buffer_builder(
+    size_t num_dynamic_contexts, HailoRTDriver &driver)
+{
+    static const auto total_num_contexts = CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS +
+        num_dynamic_contexts;
+
+    if (should_use_ddr_action_list(total_num_contexts, driver.dma_type())) {
+        auto ddr_action_list_buffer_builder = DDRActionListBufferBuilder::create(total_num_contexts, driver);
+        CHECK_EXPECTED(ddr_action_list_buffer_builder);
+        return std::static_pointer_cast<ActionListBufferBuilder>(ddr_action_list_buffer_builder.release());
+    } else {
+        auto control_action_list_buffer_builder = ControlActionListBufferBuilder::create();
+        CHECK_EXPECTED(control_action_list_buffer_builder);
+        return std::static_pointer_cast<ActionListBufferBuilder>(control_action_list_buffer_builder.release());
+    }
+
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
index ea8f0d14..9b892460 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
@@ -31,17 +31,16 @@
 #include "core_op/resource_manager/intermediate_buffer.hpp"
 #include "core_op/resource_manager/config_buffer.hpp"
 #include "core_op/resource_manager/channel_allocator.hpp"
-#include "core_op/resource_manager/context_switch_buffer_builder.hpp"
+#include "core_op/resource_manager/action_list_buffer_builder/control_action_list_buffer_builder.hpp"
+#include "core_op/resource_manager/action_list_buffer_builder/ddr_action_list_buffer_builder.hpp"
 #include "device_common/control_protocol.hpp"
 #include "vdma/channel/boundary_channel.hpp"
 #include "vdma/pcie/pcie_device.hpp"
-
+#include "internal_buffer_manager.hpp"
 
 namespace hailort
 {
 
-#define DEFAULT_ACTUAL_BATCH_SIZE (1)
-
 
 struct EdgeLayer {
     LayerInfo layer_info;
@@ -84,11 +83,10 @@ struct DdrChannelsInfo
 
 class ContextResources final {
 public:
-    static Expected<ContextResources> create(HailoRTDriver &driver, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
-        const std::vector<vdma::ChannelId> &config_channels_ids, const ConfigBufferInfoMap &config_buffer_infos);
-
-    const std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> &get_controls() const;
-    ContextSwitchBufferBuilder &builder();
+    static Expected<ContextResources> create(HailoRTDriver &driver,
+        CONTROL_PROTOCOL__context_switch_context_type_t context_type, const uint16_t context_index,
+        const std::vector<vdma::ChannelId> &config_channels_ids, const ConfigBufferInfoMap &config_buffer_infos,
+        std::shared_ptr<InternalBufferManager> internal_buffer_manager);
 
     hailo_status add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
         const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, const SupportedFeatures &supported_features);
@@ -110,21 +108,26 @@ class ContextResources final {
         const SupportedFeatures &supported_features);
 
     std::vector<ConfigBuffer> &get_config_buffers();
+    CONTROL_PROTOCOL__context_switch_context_type_t get_context_type() const {
+        return m_context_type;
+    }
 
 private:
     ContextResources(HailoRTDriver &driver, CONTROL_PROTOCOL__context_switch_context_type_t context_type,
-        std::vector<ConfigBuffer> &&config_buffers) :
+        std::vector<ConfigBuffer> &&config_buffers, std::shared_ptr<InternalBufferManager> internal_buffer_manager) :
         m_driver(std::ref(driver)),
-        m_builder(context_type),
-        m_config_buffers(std::move(config_buffers))
+        m_context_type(context_type),
+        m_config_buffers(std::move(config_buffers)),
+        m_internal_buffer_manager(std::move(internal_buffer_manager))
     {}
 
     std::reference_wrapper<HailoRTDriver> m_driver;
-    ContextSwitchBufferBuilder m_builder;
+    CONTROL_PROTOCOL__context_switch_context_type_t m_context_type;
     std::vector<ConfigBuffer> m_config_buffers;
 
     std::vector<EdgeLayer> m_edge_layers;
     std::vector<DdrChannelsInfo> m_ddr_channels_infos;
+    std::shared_ptr<InternalBufferManager> m_internal_buffer_manager;
 };
 
 class ResourcesManager final
@@ -134,23 +137,24 @@ class ResourcesManager final
         const ConfigureNetworkParams &config_params, std::shared_ptr<CoreOpMetadata> core_op_metadata,
         uint8_t core_op_index);
 
-    // TODO: HRT-9432 needs to call stop_vdma_interrupts_dispatcher and any other resource on dtor. 
+    // TODO: HRT-9432 needs to call stop_vdma_interrupts_dispatcher and any other resource on dtor.
     ~ResourcesManager() = default;
     ResourcesManager(const ResourcesManager &other) = delete;
     ResourcesManager &operator=(const ResourcesManager &other) = delete;
     ResourcesManager &operator=(ResourcesManager &&other) = delete;
     ResourcesManager(ResourcesManager &&other) noexcept;
 
-    ExpectedRef<IntermediateBuffer> create_intermediate_buffer(uint32_t transfer_size, uint16_t batch_size,
-        uint8_t src_stream_index, uint8_t src_context_index, vdma::ChannelId d2h_channel_id,
-        IntermediateBuffer::StreamingType streaming_type);
+    ExpectedRef<IntermediateBuffer> create_intermediate_buffer(
+        uint32_t transfer_size, uint16_t batch_size, uint8_t src_stream_index, uint16_t src_context_index,
+        vdma::ChannelId d2h_channel_id, IntermediateBuffer::StreamingType streaming_type);
     ExpectedRef<IntermediateBuffer> get_intermediate_buffer(const IntermediateBufferKey &key);
     hailo_status create_boundary_vdma_channel(const LayerInfo &layer_info);
 
     Expected<CONTROL_PROTOCOL__application_header_t> get_control_core_op_header();
 
-    Expected<std::reference_wrapper<ContextResources>> add_new_context(CONTROL_PROTOCOL__context_switch_context_type_t type,
-        const ConfigBufferInfoMap &config_info={});
+    Expected<std::reference_wrapper<ContextResources>> add_new_context(
+        CONTROL_PROTOCOL__context_switch_context_type_t context_type,
+        const uint16_t context_index, const ConfigBufferInfoMap &config_info={});
 
     const SupportedFeatures &get_supported_features() const
     {
@@ -181,16 +185,23 @@ class ResourcesManager final
         return m_boundary_channels;
     }
 
+    std::shared_ptr<ActionListBufferBuilder>& get_action_list_buffer_builder()
+    {
+        return m_action_list_buffer_builder;
+    }
+
     Expected<hailo_stream_interface_t> get_default_streams_interface();
 
     Expected<Buffer> read_intermediate_buffer(const IntermediateBufferKey &key);
 
     hailo_status configure();
-    hailo_status enable_state_machine(uint16_t dynamic_batch_size, 
+    hailo_status enable_state_machine(uint16_t dynamic_batch_size,
         uint16_t batch_count = CONTROL_PROTOCOL__INIFINITE_BATCH_COUNT);
     hailo_status reset_state_machine();
     hailo_status start_vdma_interrupts_dispatcher();
     hailo_status stop_vdma_interrupts_dispatcher();
+    hailo_status start_vdma_transfer_launcher();
+    hailo_status stop_vdma_transfer_launcher();
     Expected<uint16_t> get_network_batch_size(const std::string &network_name) const;
     Expected<vdma::BoundaryChannelPtr> get_boundary_vdma_channel_by_stream_name(const std::string &stream_name);
     Expected<std::shared_ptr<const vdma::BoundaryChannel>> get_boundary_vdma_channel_by_stream_name(const std::string &stream_name) const;
@@ -207,6 +218,24 @@ class ResourcesManager final
         size_t single_frame_transfer_size, uint32_t infer_cycles);
     hailo_status set_hw_infer_done_notification(std::condition_variable &infer_done_cond);
     Expected<HwInferResults> run_hw_only_infer();
+    hailo_status fill_internal_buffers_info();
+    static bool should_use_ddr_action_list(size_t num_contexts, HailoRTDriver::DmaType dma_type);
+    static Expected<std::shared_ptr<ActionListBufferBuilder>> create_action_list_buffer_builder(
+        size_t num_dynamic_contexts, HailoRTDriver &driver);
+    bool get_can_fast_batch_switch()
+    {
+        return m_core_op_metadata->get_can_fast_batch_switch();
+    }
+
+    void set_is_activated(bool is_activated)
+    {
+        m_is_activated = is_activated;
+    }
+
+    bool get_is_activated() const
+    {
+        return m_is_activated;
+    }
 
 private:
     hailo_status fill_infer_features(CONTROL_PROTOCOL__application_header_t &app_header);
@@ -224,24 +253,29 @@ class ResourcesManager final
     std::map<IntermediateBufferKey, IntermediateBuffer> m_intermediate_buffers;
     std::shared_ptr<CoreOpMetadata> m_core_op_metadata;
     uint8_t m_core_op_index;
-    uint8_t m_dynamic_context_count;
-    uint8_t m_total_context_count;
+    uint16_t m_dynamic_context_count;
+    uint16_t m_total_context_count;
     const std::vector<std::string> m_network_index_map;
     LatencyMetersMap m_latency_meters; // Latency meter per network
     // TODO: HRT-9429 - fast access to channel by id, using array, using engine_index and channel_index.
     std::map<vdma::ChannelId, vdma::BoundaryChannelPtr> m_boundary_channels;
     bool m_is_configured;
+    bool m_is_activated;
     // Config channels ids are shared between all context. The following vector contains the channel id for each
     // config_stream_index.
     std::vector<vdma::ChannelId> m_config_channels_ids;
     // Mapped buffers would be used only in hw only flow
     std::vector<std::shared_ptr<vdma::MappedBuffer>> m_hw_only_boundary_buffers;
+    std::shared_ptr<InternalBufferManager> m_internal_buffer_manager;
+    std::shared_ptr<ActionListBufferBuilder> m_action_list_buffer_builder;
 
     ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &driver,
         ChannelAllocator &&channel_allocator, const ConfigureNetworkParams config_params,
         std::shared_ptr<CoreOpMetadata> &&core_op_metadata, uint8_t core_op_index,
         const std::vector<std::string> &&network_index_map, LatencyMetersMap &&latency_meters,
-        std::vector<vdma::ChannelId> &&config_channels_ids);
+        std::vector<vdma::ChannelId> &&config_channels_ids,
+        std::shared_ptr<InternalBufferManager> internal_buffer_manager,
+        std::shared_ptr<ActionListBufferBuilder> &&action_list_buffer_builder);
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
index 26d8a1bd..e86c141f 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
@@ -10,7 +10,7 @@
 #include "resource_manager_builder.hpp"
 #include "device_common/control.hpp"
 #include "periph_calculator.hpp"
-
+#include "hef/hef_internal.hpp"
 
 namespace hailort
 {
@@ -78,7 +78,7 @@ static Expected<LayerInfo> calculate_credit_params(const CONTROL_PROTOCOL__hw_co
 
 static Expected<LayerInfo> update_layer_info(const LayerInfo &original_layer_info,
     const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info,
-    const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, const bool should_optimize_credits,
+    const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch, const bool should_optimize_credits,
     const bool is_periph_calculated_in_hailort, const bool is_core_hw_padding_config_in_dfc)
 {
     LayerInfo local_layer_info = original_layer_info;
@@ -104,7 +104,7 @@ static Expected<LayerInfo> update_layer_info(const LayerInfo &original_layer_inf
 
 static hailo_status fill_boundary_input_layer_impl(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
-    const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
+    const HEFHwArch &hw_arch, bool should_optimize_credits)
 {
     const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
 
@@ -129,7 +129,7 @@ static hailo_status fill_boundary_input_layer_impl(ContextResources &context_res
 
 static hailo_status fill_boundary_input_layer(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
-    const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
+    const HEFHwArch &hw_arch, bool should_optimize_credits)
 {
     if (layer_info.is_multi_planar) {
         for (auto &plane : layer_info.planes) {
@@ -144,7 +144,7 @@ static hailo_status fill_boundary_input_layer(ContextResources &context_resource
 
 static hailo_status fill_inter_context_input_layer(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
-    const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
+    const HEFHwArch &hw_arch, bool should_optimize_credits)
 {
     const auto channel_id = resources_manager.get_available_channel_id(to_layer_identifier(layer_info),
         HailoRTDriver::DmaDirection::H2D, layer_info.dma_engine_index);
@@ -168,16 +168,16 @@ static hailo_status fill_inter_context_input_layer(ContextResources &context_res
         inter_context_buffer.get_host_buffer_info(), resources_manager.get_supported_features());
     CHECK_SUCCESS(status);
 
-    LOGGER__DEBUG("Intermediate input stream {}, src_context:{}, dst_context: {}, h2d_channel {}.",
-        layer_info.stream_index, layer_info.context_index, layer_info.connected_context_info.context_index,
-        channel_id.value());
+    LOGGER__DEBUG("Intermediate edge key: {}:{} src_context:{}, dst_context: {}, h2d_channel {}.",
+        connected_context.context_index, connected_context.stream_index,
+        layer_info.connected_context_info.context_index, layer_info.context_index, channel_id.value());
 
     return HAILO_SUCCESS;
 }
 
 static hailo_status fill_boundary_output_layer(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
-    const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
+    const HEFHwArch &hw_arch, bool should_optimize_credits)
 {
     const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
 
@@ -202,7 +202,7 @@ static hailo_status fill_boundary_output_layer(ContextResources &context_resourc
 
 static hailo_status fill_inter_context_output_layer(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo &layer_info,
-    const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch, bool should_optimize_credits)
+    const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch, bool should_optimize_credits)
 {
     const auto channel_id = resources_manager.get_available_channel_id(to_layer_identifier(layer_info),
         HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index);
@@ -214,8 +214,8 @@ static hailo_status fill_inter_context_output_layer(ContextResources &context_re
     CHECK_EXPECTED_AS_STATUS(network_batch_size);
 
     auto inter_context_buffer_exp = resources_manager.create_intermediate_buffer(frame_credits_in_bytes,
-        network_batch_size.value(), layer_info.stream_index, layer_info.context_index, channel_id.value(),
-        IntermediateBuffer::StreamingType::BURST);
+        network_batch_size.value(), layer_info.stream_index, layer_info.context_index,
+        channel_id.value(), IntermediateBuffer::StreamingType::BURST);
     CHECK_EXPECTED_AS_STATUS(inter_context_buffer_exp);
     auto &inter_context_buffer = inter_context_buffer_exp->get();
 
@@ -236,7 +236,7 @@ static hailo_status fill_inter_context_output_layer(ContextResources &context_re
 
 static hailo_status fill_ddr_output_layer(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo &layer_info,
-    const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch)
+    const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch)
 {
     CHECK(resources_manager.get_supported_features().padded_ddr_buffers, HAILO_INVALID_HEF,
         "Failed opening non-compatible HEF that uses the following deprecated features: host-managed DDR buffers." 
@@ -299,7 +299,7 @@ static hailo_status fill_ddr_output_layer(ContextResources &context_resources,
 }
 
 static hailo_status fill_ddr_input_layer(ContextResources &context_resources, ResourcesManager &resources_manager,
-    const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const ProtoHEFHwArch &hw_arch)
+    const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch)
 {
     auto connected_stream_index = layer_info.connected_context_info.stream_index;
     auto ddr_info = context_resources.get_ddr_channels_info(connected_stream_index);
@@ -355,7 +355,7 @@ static hailo_status add_ddr_buffers_info(std::vector<ContextSwitchConfigActionPt
 static hailo_status parse_and_fill_edge_layers_mapping(
     ContextResources &context_resources,
     const ContextMetadata &context_metadata,
-    ResourcesManager &resources_manager, const ProtoHEFHwArch &hw_arch)
+    ResourcesManager &resources_manager, const HEFHwArch &hw_arch)
 {
     hailo_status status = HAILO_UNINITIALIZED;
 
@@ -549,37 +549,17 @@ static hailo_status push_fetch_config_actions(
     return HAILO_SUCCESS;
 }
 
-static hailo_status write_ccw_to_buffer(ConfigBuffer& config_buffer, const WriteDataCcwAction &ccw_action,
-    bool support_pre_fetch)
-{
-    const bool is_last_write = config_buffer.size_left() == ccw_action.data().size();
-    if (support_pre_fetch && is_last_write) {
-        auto status = config_buffer.pad_with_nops();
-        CHECK_SUCCESS(status);
-    }
-
-    auto status = config_buffer.write(ccw_action.data());
-    CHECK_SUCCESS(status);
-
-    if (support_pre_fetch && is_last_write) {
-        auto desc_count = config_buffer.program_descriptors();
-        CHECK_EXPECTED_AS_STATUS(desc_count);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-static hailo_status proccess_write_ccw_action(const ContextSwitchConfigActionPtr &configuration_action,
+static hailo_status proccess_write_ccw_action(ContextSwitchConfigActionPtr &configuration_action,
     std::vector<ConfigBuffer> &config_resources,
     const bool support_pre_fetch,
     std::vector<ContextSwitchConfigActionPtr> &processed_configuration_actions)
 {
     assert(ContextSwitchConfigAction::Type::WriteDataCcw == configuration_action->get_type());
-    const auto &write_ccw_action = *static_cast<const WriteDataCcwAction*>(configuration_action.get());
+    auto &write_ccw_action = *static_cast<WriteDataCcwAction*>(configuration_action.get());
 
     const auto config_stream_index = write_ccw_action.config_stream_index();
     assert(config_stream_index < config_resources.size());
-    auto status = write_ccw_to_buffer(config_resources[config_stream_index], write_ccw_action, support_pre_fetch);
+    auto status = write_ccw_action.write_to_config_buffer(config_resources[config_stream_index], support_pre_fetch);
     CHECK_SUCCESS(status);
 
     status = push_fetch_config_actions(config_resources[config_stream_index], config_stream_index,
@@ -589,13 +569,6 @@ static hailo_status proccess_write_ccw_action(const ContextSwitchConfigActionPtr
     return HAILO_SUCCESS;
 }
 
-// TODO HRT-10073: change to supported features list
-static bool is_hailo1x_device_type(const hailo_device_architecture_t dev_arch)
-{
-    // Compare with HAILO1X device archs
-    return (HAILO_ARCH_HAILO15H == dev_arch) || (HAILO_ARCH_HAILO15M == dev_arch) || (HAILO_ARCH_PLUTO == dev_arch);
-}
-
 static Expected<uint8_t> find_dummy_stream(const LayerInfo &layer_info, const ContextResources &context_resources,
     const bool is_null_shmifo_supported)
 {
@@ -610,9 +583,9 @@ static Expected<uint8_t> find_dummy_stream(const LayerInfo &layer_info, const Co
     }
 }
 
-static hailo_status add_change_vdma_to_stream_mapping_impl(const ProtoHEFHwArch &hw_arch,
+static hailo_status add_change_vdma_to_stream_mapping_impl(const HEFHwArch &hw_arch,
     const LayerInfo &layer_info, const ResourcesManager &resources_manager,
-    ContextResources &context_resources, uint8_t context_index,
+    ContextResources &context_resources, uint16_t context_index,
     std::vector<ContextSwitchConfigActionPtr> &processed_configuration_actions)
 {
     auto vdma_channel = resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name);
@@ -623,7 +596,7 @@ static hailo_status add_change_vdma_to_stream_mapping_impl(const ProtoHEFHwArch
     uint8_t stream_index = layer_info.stream_index;
     if (is_dummy_stream) {
         auto dummy_stream_index = find_dummy_stream(layer_info, context_resources,
-            is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)));
+            HailoRTCommon::is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch)));
         CHECK_EXPECTED_AS_STATUS(dummy_stream_index);
         stream_index = *dummy_stream_index;
     }
@@ -635,9 +608,9 @@ static hailo_status add_change_vdma_to_stream_mapping_impl(const ProtoHEFHwArch
     return HAILO_SUCCESS;
 }
 
-static hailo_status add_change_vdma_to_stream_mapping(const ProtoHEFHwArch &hw_arch,
+static hailo_status add_change_vdma_to_stream_mapping(const HEFHwArch &hw_arch,
     const CoreOpMetadata &core_op_metadata, const ResourcesManager &resources_manager,
-    ContextResources &context_resources, uint8_t context_index,
+    ContextResources &context_resources, uint16_t context_index,
     std::vector<ContextSwitchConfigActionPtr> &processed_configuration_actions)
 {
     for (const auto &layer_info : core_op_metadata.get_all_layer_infos()) {
@@ -726,7 +699,7 @@ static hailo_status push_edge_layer_activation_actions(
     return HAILO_SUCCESS;
 }
 
-static hailo_status proccess_trigger_new_data_input_action(const ProtoHEFHwArch &hw_arch,
+static hailo_status proccess_trigger_new_data_input_action(const HEFHwArch &hw_arch,
     const ContextSwitchConfigActionPtr &configuration_action,
     uint32_t trigger_new_data_from_input_group_start,
     uint32_t trigger_new_data_from_input_group_end,
@@ -734,7 +707,7 @@ static hailo_status proccess_trigger_new_data_input_action(const ProtoHEFHwArch
     const CoreOpMetadata &core_op_metadata,
     const ResourcesManager &resources_manager,
     ContextResources &context_resources,
-    uint8_t context_index,
+    uint16_t context_index,
     std::vector<ContextSwitchConfigActionPtr> &processed_configuration_actions, bool is_single_context)
 {
     const bool PUSH_ALL_EDGE_LAYERS = false;
@@ -782,10 +755,9 @@ static hailo_status proccess_trigger_new_data_input_action(const ProtoHEFHwArch
 static hailo_status add_fetch_config_actions(std::vector<ContextSwitchConfigActionPtr> &configuration_actions,
     std::vector<ConfigBuffer> &config_resources, bool support_pre_fetch)
 {
-
     std::vector<ContextSwitchConfigActionPtr> processed_configuration_actions;
     for (uint32_t action_index = 0; action_index < configuration_actions.size(); action_index++) {
-        const auto &configuration_action = configuration_actions[action_index];
+        auto &configuration_action = configuration_actions[action_index];
         if (ContextSwitchConfigAction::Type::WriteDataCcw == configuration_action->get_type()) {
             auto status = proccess_write_ccw_action(configuration_action, config_resources,
                 support_pre_fetch, processed_configuration_actions);
@@ -840,9 +812,9 @@ static hailo_status add_config_channel_activation_actions(std::vector<ContextSwi
 // * TriggerNewDataFromDataInput for each input layer (inter context/ boundary) in the context. This action is given
 //   from the HEF.
 // * Finally StartBurstCreditsTaskAction
-static hailo_status handle_edge_layer_activation_actions(const ProtoHEFHwArch &hw_arch,
+static hailo_status handle_edge_layer_activation_actions(const HEFHwArch &hw_arch,
     std::vector<ContextSwitchConfigActionPtr> &configuration_actions, const CoreOpMetadata &core_op_metadata,
-    const ResourcesManager &resources_manager, ContextResources &context_resources, uint8_t context_index,
+    const ResourcesManager &resources_manager, ContextResources &context_resources, uint16_t context_index,
     bool is_single_context)
 {
     const auto repeated_indexes = get_repreated_actions_boundary_indices(configuration_actions);
@@ -915,15 +887,21 @@ static hailo_status handle_repeated_actions(std::vector<ContextSwitchConfigActio
     return HAILO_SUCCESS;
 }
 
-static hailo_status write_action_list(const ContextResources & context_resources, ContextSwitchBufferBuilder &builder,
-    const std::vector<ContextSwitchConfigActionPtr> &actions)
+static hailo_status write_action_list(const ContextResources & context_resources,
+    std::shared_ptr<ActionListBufferBuilder> &builder, const std::vector<ContextSwitchConfigActionPtr> &actions)
 {
+    // Mark first action buffer of context to know when new context is starting (needed for dynamic contexts)
+    bool is_first_action_buffer_of_context = true;
     for (const auto &action : actions) {
         auto action_buffers = action->serialize(context_resources);
         CHECK_EXPECTED_AS_STATUS(action_buffers);
 
         for (auto &action_buffer : action_buffers.value()) {
-            builder.write_action(MemoryView(action_buffer));
+            const bool last_action_buffer_in_context = (action_buffer == *(action_buffers.value().end() - 1)) &&
+                (action == *(actions.end() - 1));
+            builder->write_action(MemoryView(action_buffer), context_resources.get_context_type(),
+                is_first_action_buffer_of_context, last_action_buffer_in_context);
+            is_first_action_buffer_of_context = false;
         }
     }
 
@@ -952,9 +930,9 @@ static hailo_status add_edge_layer_end_of_context_actions(const ContextResources
     return HAILO_SUCCESS;
 }
 
-static hailo_status fill_context_recipes_for_multi_context(const ProtoHEFHwArch &hw_arch,
+static hailo_status fill_context_recipes_for_multi_context(const HEFHwArch &hw_arch,
     ContextResources &context_resources, ResourcesManager &resources_manager,
-    uint8_t context_index, const CoreOpMetadata &core_op_metadata, const ContextMetadata &context_metadata,
+    uint16_t context_index, const CoreOpMetadata &core_op_metadata, const ContextMetadata &context_metadata,
     bool is_single_context)
 {
     hailo_status status = HAILO_UNINITIALIZED;
@@ -966,7 +944,7 @@ static hailo_status fill_context_recipes_for_multi_context(const ProtoHEFHwArch
     // Parse context
     std::vector<ContextSwitchConfigActionPtr> actions = context_metadata.get_actions();
 
-    const auto support_pre_fetch = is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch));
+    const auto support_pre_fetch = HailoRTCommon::is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch));
     status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch);
     CHECK_SUCCESS(status);
 
@@ -991,7 +969,7 @@ static hailo_status fill_context_recipes_for_multi_context(const ProtoHEFHwArch
     status = handle_repeated_actions(actions);
     CHECK_SUCCESS(status);
 
-    return write_action_list(context_resources, context_resources.builder(), actions);
+    return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions);
 }
 
 static hailo_status create_boundary_channels(ResourcesManager &resources_manager,
@@ -1013,7 +991,7 @@ static hailo_status create_boundary_channels(ResourcesManager &resources_manager
 
 static hailo_status fill_activation_config_recepies_for_multi_context(
     ContextResources &context_resources, ResourcesManager &resources_manager,
-    std::shared_ptr<CoreOpMetadata> core_op_metadata, const ProtoHEFHwArch &hw_arch)
+    std::shared_ptr<CoreOpMetadata> core_op_metadata, const HEFHwArch &hw_arch)
 {
     auto hw_consts = Control::get_hw_consts(resources_manager.get_device());
     CHECK_EXPECTED_AS_STATUS(hw_consts);
@@ -1045,7 +1023,7 @@ static hailo_status fill_activation_config_recepies_for_multi_context(
         actions.emplace_back(action.release());
     }
 
-    return write_action_list(context_resources, context_resources.builder(), actions);
+    return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions);
 }
 
 static Expected<ContextSwitchConfigActionPtr> create_switch_lcu_batch_action(const ContextSwitchConfigActionPtr action,
@@ -1057,8 +1035,9 @@ static Expected<ContextSwitchConfigActionPtr> create_switch_lcu_batch_action(con
     uint32_t kernel_done_count = 0;
 
     CHECK_AS_EXPECTED((ContextSwitchConfigAction::Type::EnableLcuDefault == action->get_type()) ||
+        (ContextSwitchConfigAction::Type::SwitchLcuBatch == action->get_type()) ||
         (ContextSwitchConfigAction::Type::EnableLcuNonDefault == action->get_type()), HAILO_INVALID_ARGUMENT,
-        "Invalid action type - must be enable lcu (default or non default), Received type {}", action->get_type());
+        "Invalid action type - must be enable lcu (default or non default) or switch lcu batch, Received type {}", action->get_type());
 
     const auto params_buffer = action->serialize_params(context_resources);
     CHECK_EXPECTED(params_buffer);
@@ -1069,19 +1048,25 @@ static Expected<ContextSwitchConfigActionPtr> create_switch_lcu_batch_action(con
         lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id);
         network_index = params->network_index;
         kernel_done_count = CONTEXT_SWITCH_DEFS__ENABLE_LCU_DEFAULT_KERNEL_COUNT;
-    } else {
+    } else if (ContextSwitchConfigAction::Type::EnableLcuNonDefault == action->get_type()) {
         const auto params = reinterpret_cast<const CONTEXT_SWITCH_DEFS__enable_lcu_action_non_default_data_t*>(params_buffer.value().data());
         cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(params->packed_lcu_id);
         lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id);
         network_index = params->network_index;
         kernel_done_count = params->kernel_done_count;
+    } else if (ContextSwitchConfigAction::Type::SwitchLcuBatch == action->get_type()) {
+        const auto params = reinterpret_cast<const CONTEXT_SWITCH_DEFS__switch_lcu_batch_action_data_t*>(params_buffer.value().data());
+        cluster_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_CLUSTER_INDEX_READ(params->packed_lcu_id);
+        lcu_index = CONTEXT_SWITCH_DEFS__PACKED_LCU_ID_LCU_INDEX_READ(params->packed_lcu_id);
+        network_index = params->network_index;
+        kernel_done_count = params->kernel_done_count;
     }
 
     return SwitchLcuBatchAction::create(cluster_index, lcu_index, network_index, kernel_done_count);
 }
 
 static hailo_status fill_batch_switching_context_edge_layers(ContextResources &context_resources, const CoreOpMetadata &core_op_metadata, ResourcesManager &resources_manager,
-    const ProtoHEFHwArch &hw_arch)
+    const HEFHwArch &hw_arch)
 {
     auto hw_consts = Control::get_hw_consts(resources_manager.get_device());
     CHECK_EXPECTED_AS_STATUS(hw_consts);
@@ -1130,7 +1115,8 @@ static hailo_status add_lcu_actions_to_batch_switch_context(ContextResources &co
     // In the batch switch context
     static const std::set<ContextSwitchConfigAction::Type> ENABLE_LCU_ACTIONS = {
         ContextSwitchConfigAction::Type::EnableLcuDefault,
-        ContextSwitchConfigAction::Type::EnableLcuNonDefault
+        ContextSwitchConfigAction::Type::EnableLcuNonDefault,
+        ContextSwitchConfigAction::Type::SwitchLcuBatch
     };
 
     const auto lcu_batch_switch_actions = core_op_metadata.preliminary_context().get_actions_of_type(ENABLE_LCU_ACTIONS);
@@ -1161,7 +1147,7 @@ static hailo_status create_change_boundary_input_batch_actions(const ContextReso
 }
 
 static hailo_status add_edge_layers_actions_to_batch_switch_context(ContextResources &context_resources, const CoreOpMetadata &core_op_metadata,
-    ResourcesManager &resources_manager, const ProtoHEFHwArch &hw_arch, std::vector<ContextSwitchConfigActionPtr> &actions)
+    ResourcesManager &resources_manager, const HEFHwArch &hw_arch, std::vector<ContextSwitchConfigActionPtr> &actions)
 {
     auto status = fill_batch_switching_context_edge_layers(context_resources, core_op_metadata, resources_manager, hw_arch);
     CHECK_SUCCESS(status);
@@ -1197,7 +1183,7 @@ static hailo_status add_edge_layers_actions_to_batch_switch_context(ContextResou
 
 static hailo_status fill_batch_switching_context_config_recepies_for_multi_context(
     ContextResources &context_resources, const CoreOpMetadata &core_op_metadata, ResourcesManager &resources_manager,
-    const ProtoHEFHwArch &hw_arch)
+    const HEFHwArch &hw_arch)
 {
     std::vector<ContextSwitchConfigActionPtr> actions;
 
@@ -1210,10 +1196,10 @@ static hailo_status fill_batch_switching_context_config_recepies_for_multi_conte
     status = handle_repeated_actions(actions);
     CHECK_SUCCESS(status);
 
-    return write_action_list(context_resources, context_resources.builder(), actions);
+    return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions);
 }
 
-static hailo_status fill_preliminary_config_recepies_for_multi_context(const ProtoHEFHwArch &hw_arch,
+static hailo_status fill_preliminary_config_recepies_for_multi_context(const HEFHwArch &hw_arch,
     ContextResources &context_resources, ResourcesManager &resources_manager,
     std::shared_ptr<CoreOpMetadata> core_op_metadata, const ContextMetadata &preliminary_context,
     bool is_single_context)
@@ -1231,7 +1217,7 @@ static hailo_status fill_preliminary_config_recepies_for_multi_context(const Pro
     // Parse preliminary config
     std::vector<ContextSwitchConfigActionPtr> actions = preliminary_context.get_actions();
 
-    const auto support_pre_fetch = is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch));
+    const auto support_pre_fetch = HailoRTCommon::is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch));
     auto status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch);
     CHECK_SUCCESS(status);
 
@@ -1247,14 +1233,12 @@ static hailo_status fill_preliminary_config_recepies_for_multi_context(const Pro
     status = handle_repeated_actions(actions);
     CHECK_SUCCESS(status);
 
-    return write_action_list(context_resources, context_resources.builder(), actions);
+    return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions);
 }
 
-
-
 Expected<std::shared_ptr<ResourcesManager>> ResourcesManagerBuilder::build(uint8_t current_core_op_index, VdmaDevice &device,
     HailoRTDriver &driver, const ConfigureNetworkParams &config_params,
-    std::shared_ptr<CoreOpMetadata> core_op_metadata, const ProtoHEFHwArch &hw_arch)
+    std::shared_ptr<CoreOpMetadata> core_op_metadata, const HEFHwArch &hw_arch, std::shared_ptr<ShefFileHandle> shef_file_handle)
 {
     const auto num_contexts = core_op_metadata->dynamic_contexts().size() +
         CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS;
@@ -1278,13 +1262,24 @@ Expected<std::shared_ptr<ResourcesManager>> ResourcesManagerBuilder::build(uint8
     auto status = create_boundary_channels(resources_manager.value(), *core_op_metadata);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    auto activation_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION);
+    status = resources_manager->fill_internal_buffers_info();
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    // No allocation of edge layers in the activation context. No need for context index here
+    auto INVLID_CONTEXT_INDEX = static_cast<uint16_t>(UINT16_MAX);
+    auto ACTIVATION_CONTEXT_INDEX = INVLID_CONTEXT_INDEX;
+
+    auto activation_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION,
+        ACTIVATION_CONTEXT_INDEX);
     CHECK_EXPECTED(activation_context);
     status = fill_activation_config_recepies_for_multi_context(activation_context.value().get(),
         resources_manager.value(), core_op_metadata, hw_arch);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    auto batch_switching_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_BATCH_SWITCHING);
+    // No allocation of edge layers in the batch switching context. No need for context index here
+    auto BATCH_SWITCH_CONTEXT_INDEX = INVLID_CONTEXT_INDEX;
+    auto batch_switching_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_BATCH_SWITCHING,
+        BATCH_SWITCH_CONTEXT_INDEX);
     CHECK_EXPECTED(batch_switching_context);
     status = fill_batch_switching_context_config_recepies_for_multi_context(batch_switching_context.value().get(),
         *core_op_metadata, resources_manager.value(), hw_arch);
@@ -1292,17 +1287,25 @@ Expected<std::shared_ptr<ResourcesManager>> ResourcesManagerBuilder::build(uint8
 
     const bool is_single_context = core_op_metadata->dynamic_contexts().size() == 1;
 
+    if (nullptr != shef_file_handle) {
+        // We will start reading CCWs from the HEF file so we need to open it
+        status = shef_file_handle->open();
+        CHECK_SUCCESS_AS_EXPECTED(status);
+    }
+
+    auto PRELIMINARY_CONTEXT_INDEX = static_cast<uint16_t>(0);
     auto preliminary_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_PRELIMINARY,
-        core_op_metadata->preliminary_context().config_buffers_info());
+        PRELIMINARY_CONTEXT_INDEX, core_op_metadata->preliminary_context().config_buffers_info());
     CHECK_EXPECTED(preliminary_context);
     status = fill_preliminary_config_recepies_for_multi_context(hw_arch, preliminary_context.value().get(),
         resources_manager.value(), core_op_metadata, core_op_metadata->preliminary_context(), is_single_context);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    uint8_t context_index = 0;
+    uint16_t context_index = 0;
+    auto FIRST_DYNAMIC_CONTEXT_INDEX = 1;
     for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) {
         auto new_context = resources_manager->add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC,
-            context_metadata.config_buffers_info());
+            static_cast<uint16_t>(FIRST_DYNAMIC_CONTEXT_INDEX + context_index), context_metadata.config_buffers_info());
         CHECK_EXPECTED(new_context);
 
         status = fill_context_recipes_for_multi_context(hw_arch, new_context.value().get(), resources_manager.value(),
@@ -1313,6 +1316,11 @@ Expected<std::shared_ptr<ResourcesManager>> ResourcesManagerBuilder::build(uint8
         context_index++;
     }
 
+    if (nullptr != shef_file_handle) {
+        status = shef_file_handle->close();
+        CHECK_SUCCESS_AS_EXPECTED(status);
+    }
+
     status = resources_manager->configure();
     CHECK_SUCCESS_AS_EXPECTED(status);
 
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp
index e596cb06..a97f9552 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp
@@ -10,21 +10,20 @@
 #ifndef _HAILO_RESOURCE_MANAGER_BUILDER_HPP_
 #define _HAILO_RESOURCE_MANAGER_BUILDER_HPP_
 
-#include "hef/hef_internal.hpp"
 #include "core_op/resource_manager/resource_manager.hpp"
 
 
 namespace hailort
 {
 
+class ShefFileHandle;
 class ResourcesManagerBuilder final {
 public:
     ResourcesManagerBuilder() = delete;
 
-    /* TODO HRT-5067 - work with hailo_device_architecture_t instead of ProtoHEFHwArch */
     static Expected<std::shared_ptr<ResourcesManager>> build(uint8_t net_group_index, VdmaDevice &device,
         HailoRTDriver &driver, const ConfigureNetworkParams &config_params,
-        std::shared_ptr<CoreOpMetadata> core_op, const ProtoHEFHwArch &hw_arch);
+        std::shared_ptr<CoreOpMetadata> core_op, const HEFHwArch &hw_arch, std::shared_ptr<ShefFileHandle> shef_file_handle);
 
 };
 
diff --git a/hailort/libhailort/src/device_common/control.cpp b/hailort/libhailort/src/device_common/control.cpp
index 1ad286ce..ae9249b3 100644
--- a/hailort/libhailort/src/device_common/control.cpp
+++ b/hailort/libhailort/src/device_common/control.cpp
@@ -10,7 +10,8 @@
 #include "common/utils.hpp"
 #include "common/logger_macros.hpp"
 
-#include "hef/hef_internal.hpp"
+#include "hailo/hailort_common.hpp"
+#include "hef/core_op_metadata.hpp"
 #include "device_common/control.hpp"
 #include "hw_consts.hpp"
 #include "utils/soc_utils/partial_cluster_reader.hpp"
@@ -91,9 +92,8 @@ Expected<hailo_device_identity_t> control__parse_identify_results(CONTROL_PROTOC
     // Device architecture can be HAILO_ARCH_HAILO15H or HAILO_ARCH_HAILO15M - but the FW will always return HAILO_ARCH_HAILO15H
     // Based on a file the SCU gives us we can deduce the actual type
     if (HAILO_ARCH_HAILO15H == board_info.device_architecture) {
-        auto dev_arch_exp = PartialClusterReader::get_actual_dev_arch_from_fuse(board_info.device_architecture);
-        CHECK_EXPECTED(dev_arch_exp);
-        board_info.device_architecture = dev_arch_exp.release();
+        TRY(const auto dev_arch, PartialClusterReader::get_actual_dev_arch_from_fuse(board_info.device_architecture));
+        board_info.device_architecture = dev_arch;
     }
 
     /* Write identify results to log */
@@ -208,6 +208,57 @@ hailo_status control__parse_core_identify_results(CONTROL_PROTOCOL__core_identif
     return HAILO_SUCCESS;
 }
 
+hailo_status log_detailed_fw_error(const Device &device, const CONTROL_PROTOCOL__status_t &fw_status, const CONTROL_PROTOCOL__OPCODE_t opcode)
+{
+    const char *firmware_status_text = NULL;
+    // Special care for user_config_examine - warning log will be printed if not loaded, since it can happen on happy-flow (e.g. no EEPROM)
+    if ((fw_status.major_status == CONTROL_PROTOCOL_STATUS_USER_CONFIG_EXAMINE_FAILED) &&
+        (fw_status.minor_status == FIRMWARE_CONFIGS_STATUS_USER_CONFIG_NOT_LOADED)) {
+            LOGGER__WARNING("Failed to examine user config, as it is not loaded or is not supported by the device.");
+    }
+
+    LOGGER__ERROR("Firmware control has failed. Major status: {:#x}, Minor status: {:#x}",
+            fw_status.major_status,
+            fw_status.minor_status);
+    auto common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.major_status, &firmware_status_text);
+    if (HAILO_COMMON_STATUS__SUCCESS == common_status) {
+        LOGGER__ERROR("Firmware major status: {}", firmware_status_text);
+    } else {
+        LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}",
+            (FIRMWARE_STATUS_t)fw_status.major_status, common_status);
+    }
+    common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.minor_status, &firmware_status_text);
+    if (HAILO_COMMON_STATUS__SUCCESS == common_status) {
+        LOGGER__ERROR("Firmware minor status: {}", firmware_status_text);
+    } else {
+        LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}",
+            (FIRMWARE_STATUS_t)fw_status.minor_status, common_status);
+    }
+
+    if ((CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.minor_status) ||
+        (CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.major_status)) {
+        auto device_arch = device.get_architecture();
+        auto dev_arch_str = (device_arch) ? HailoRTCommon::get_device_arch_str(*device_arch) : "Unable to parse arch";
+        LOGGER__ERROR("Opcode {} is not supported on the device." \
+            " This error usually occurs when the control is not supported for the device arch - ({}), or not compiled to the FW",
+            CONTROL_PROTOCOL__get_textual_opcode(opcode), dev_arch_str);
+    }
+
+    if ((CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.minor_status) ||
+        (CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.major_status)) {
+        LOGGER__ERROR("Opcode {} is not supported on the current board.", CONTROL_PROTOCOL__get_textual_opcode(opcode));
+        return HAILO_UNSUPPORTED_OPCODE;
+    }
+
+    if ((HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.minor_status) ||
+        (HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.major_status)) {
+        LOGGER__ERROR("Opcode {} is not supported", CONTROL_PROTOCOL__get_textual_opcode(opcode));
+        return HAILO_UNSUPPORTED_OPCODE;
+    }
+
+    return HAILO_FW_CONTROL_FAILURE;
+}
+
 hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t message_size,
     CONTROL_PROTOCOL__response_header_t **header, CONTROL_PROTOCOL__payload_t **payload,
     CONTROL_PROTOCOL__request_t *request, Device &device)
@@ -215,7 +266,6 @@ hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t mes
     hailo_status status = HAILO_UNINITIALIZED;
     HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED;
     CONTROL_PROTOCOL__status_t fw_status = {};
-    const char *firmware_status_text = NULL;
 
     /* Parse the response */
     common_status = CONTROL_PROTOCOL__parse_response(message, message_size, header, payload, &fw_status);
@@ -228,51 +278,12 @@ hailo_status Control::parse_and_validate_response(uint8_t *message, uint32_t mes
     if (HAILO_SUCCESS != status) {
         goto exit;
     }
-    /* Valdiate response was succesfull - both major and minor should be error free */
+    /* Validate response was successful - both major and minor should be error free */
     if (0 != fw_status.major_status) {
-        status = HAILO_FW_CONTROL_FAILURE;
-        LOGGER__ERROR("Firmware control has failed. Major status: {:#x}, Minor status: {:#x}",
-                fw_status.major_status,
-                fw_status.minor_status);
-        common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.major_status, &firmware_status_text);
-        if (HAILO_COMMON_STATUS__SUCCESS == common_status) {
-            LOGGER__ERROR("Firmware major status: {}", firmware_status_text);
-        } else {
-            LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}",
-                (FIRMWARE_STATUS_t)fw_status.major_status, common_status);
-        }
-        common_status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)fw_status.minor_status, &firmware_status_text);
-        if (HAILO_COMMON_STATUS__SUCCESS == common_status) {
-            LOGGER__ERROR("Firmware minor status: {}", firmware_status_text);
-        } else {
-            LOGGER__ERROR("Cannot find textual address for firmware status {:#x}, common_status = {}",
-                (FIRMWARE_STATUS_t)fw_status.minor_status, common_status);
-        }
-
-        if ((CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.minor_status) ||
-            (CONTROL_PROTOCOL_STATUS_CONTROL_UNSUPPORTED == fw_status.major_status)) {
-            auto device_arch = device.get_architecture();
-            auto dev_arch_str = (device_arch) ? HailoRTCommon::get_device_arch_str(*device_arch) : "Unable to parse arch";
-            LOGGER__ERROR("Opcode {} is not supported on the device." \
-                " This error usually occurs when the control is not supported for the device arch - ({}), or not compiled to the FW",
-                CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode)),
-                dev_arch_str);
-        }
-
-        if ((CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.minor_status) ||
-            (CONTROL_PROTOCOL_STATUS_UNSUPPORTED_DEVICE == fw_status.major_status)) {
-            LOGGER__ERROR("Opcode {} is not supported on the current board.",
-                CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode)));
-        }
-
-        if ((HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.minor_status) ||
-            (HAILO_CONTROL_STATUS_UNSUPPORTED_OPCODE == fw_status.major_status)) {
-            status = HAILO_UNSUPPORTED_OPCODE;
-            LOGGER__ERROR("Opcode {} is not supported",
-                CONTROL_PROTOCOL__get_textual_opcode((CONTROL_PROTOCOL__OPCODE_t)BYTE_ORDER__ntohl(request->header.common_header.opcode)));
-        }
-
+        status = log_detailed_fw_error(device, fw_status,
+            static_cast<CONTROL_PROTOCOL__OPCODE_t>(BYTE_ORDER__ntohl(request->header.common_header.opcode)));
         goto exit;
+
     }
 
     /* Validate response opcode is same as request */
@@ -2382,7 +2393,7 @@ hailo_status Control::context_switch_set_network_group_header(Device &device,
 }
 
 hailo_status Control::context_switch_set_context_info_chunk(Device &device,
-    const CONTROL_PROTOCOL__context_switch_context_info_single_control_t &context_info)
+    const CONTROL_PROTOCOL__context_switch_context_info_chunk_t &context_info)
 {
     hailo_status status = HAILO_UNINITIALIZED;
     HAILO_COMMON_STATUS_t common_status = HAILO_COMMON_STATUS__UNINITIALIZED;
@@ -2422,7 +2433,7 @@ hailo_status Control::context_switch_set_context_info_chunk(Device &device,
 }
 
 hailo_status Control::context_switch_set_context_info(Device &device,
-    const std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> &context_infos)
+    const std::vector<CONTROL_PROTOCOL__context_switch_context_info_chunk_t> &context_infos)
 {
     for (const auto &context_info : context_infos) {
         auto status = context_switch_set_context_info_chunk(device, context_info);
@@ -2543,7 +2554,7 @@ hailo_status Control::set_pause_frames(Device &device, uint8_t rx_pause_frames_e
 }
 
 hailo_status Control::download_context_action_list_chunk(Device &device, uint32_t network_group_id,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index,
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index,
     uint16_t action_list_offset, size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list,
     uint16_t *action_list_length, bool *is_action_list_end, uint32_t *batch_counter)
 {
@@ -2614,7 +2625,7 @@ hailo_status Control::download_context_action_list_chunk(Device &device, uint32_
 }
 
 hailo_status Control::download_context_action_list(Device &device, uint32_t network_group_id,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, size_t action_list_max_size,
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, size_t action_list_max_size,
     uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length, uint32_t *batch_counter)
 {
     hailo_status status = HAILO_UNINITIALIZED;
@@ -3073,14 +3084,10 @@ Expected<uint32_t> Control::get_partial_clusters_layout_bitmap(Device &device)
         return std::stoi(std::string(force_layout_env));
     }
 
-    auto dev_arch_exp = device.get_architecture();
-    CHECK_EXPECTED(dev_arch_exp);
-    const auto dev_arch = dev_arch_exp.release();
+    TRY(const auto dev_arch, device.get_architecture());
     // In Both cases of Hailo15H and Hailo15M read fuse file (If no file found will return default value of all clusters)
     if ((HAILO_ARCH_HAILO15H == dev_arch) || (HAILO_ARCH_HAILO15M == dev_arch)) {
-        auto bitmap_exp = PartialClusterReader::get_partial_clusters_layout_bitmap(dev_arch);
-        CHECK_EXPECTED(bitmap_exp);
-        const auto bitmap = bitmap_exp.release();
+        TRY(const auto bitmap, PartialClusterReader::get_partial_clusters_layout_bitmap(dev_arch));
         if (PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15_DEFAULT == bitmap) {
             return Expected<uint32_t>(PARTIAL_CLUSTERS_LAYOUT_IGNORE);
         } else {
diff --git a/hailort/libhailort/src/device_common/control.hpp b/hailort/libhailort/src/device_common/control.hpp
index 01a180fc..6b0ed3dc 100644
--- a/hailort/libhailort/src/device_common/control.hpp
+++ b/hailort/libhailort/src/device_common/control.hpp
@@ -280,7 +280,7 @@ class Control final
      */
     // TODO: fix
     static hailo_status download_context_action_list(Device &device, uint32_t network_group_id,
-        CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index,
+        CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index,
         size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length,
         uint32_t *batch_counter);
             
@@ -343,7 +343,7 @@ class Control final
     static hailo_status write_memory(Device &device, uint32_t address, const uint8_t *data, uint32_t data_length);
     static hailo_status read_memory(Device &device, uint32_t address, uint8_t *data, uint32_t data_length);
     static hailo_status context_switch_set_context_info(Device &device,
-        const std::vector<CONTROL_PROTOCOL__context_switch_context_info_single_control_t> &context_infos);
+        const std::vector<CONTROL_PROTOCOL__context_switch_context_info_chunk_t> &context_infos);
     static hailo_status context_switch_set_network_group_header(Device &device,
         const CONTROL_PROTOCOL__application_header_t &network_group_header);
     static hailo_status wd_enable(Device &device, uint8_t cpu_id, bool should_enable);
@@ -398,11 +398,11 @@ class Control final
         uint8_t *buffer, uint32_t *actual_read_data_length);
     static hailo_status write_user_config_chunk(Device &device, uint32_t offset, const uint8_t *data, uint32_t chunk_size);
     static hailo_status download_context_action_list_chunk(Device &device, uint32_t network_group_id,
-        CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, uint16_t action_list_offset,
+        CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, uint16_t action_list_offset,
         size_t action_list_max_size, uint32_t *base_address, uint8_t *action_list, uint16_t *action_list_length,
         bool *is_action_list_end, uint32_t *batch_counter);
     static hailo_status context_switch_set_context_info_chunk(Device &device,
-        const CONTROL_PROTOCOL__context_switch_context_info_single_control_t &context_info);
+        const CONTROL_PROTOCOL__context_switch_context_info_chunk_t &context_info);
     static hailo_status change_context_switch_status(Device &device,
             CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status,
             uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count);
diff --git a/hailort/libhailort/src/device_common/control_protocol.cpp b/hailort/libhailort/src/device_common/control_protocol.cpp
index aad1a2b7..af0ad08a 100644
--- a/hailort/libhailort/src/device_common/control_protocol.cpp
+++ b/hailort/libhailort/src/device_common/control_protocol.cpp
@@ -59,6 +59,10 @@ const char *CONTROL_PROTOCOL__get_textual_opcode(CONTROL_PROTOCOL__OPCODE_t opco
 
 #define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (5)
 
+#define CHECK_NOT_NULL_COMMON_STATUS(arg, status) _CHECK(nullptr != (arg), (status), "CHECK_NOT_NULL for {} failed", #arg)
+#define CHECK_COMMON_STATUS(cond, ret_val, ...) \
+    _CHECK((cond), (ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__))
+
 /* Functions declarations */
 HAILO_COMMON_STATUS_t control_protocol__parse_message(uint8_t *message,
         uint32_t message_size,
@@ -252,11 +256,11 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_fw_logger_request(CONTROL_PROTO
 {
     size_t local_request_size = 0;
 
-    CHECK(request != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
-    CHECK(request_size != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_COMMON_STATUS(request != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_COMMON_STATUS(request_size != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
 
-    CHECK(level <= (uint8_t) CONTROL_PROTOCOL__FW_MAX_LOGGER_LEVEL, HAILO_STATUS__CONTROL_PROTOCOL__INVALID_ARGUMENT);
-    CHECK(interface_mask <= CONTROL_PROTOCOL__FW_MAX_LOGGER_INTERFACE, HAILO_STATUS__CONTROL_PROTOCOL__INVALID_ARGUMENT);
+    CHECK_COMMON_STATUS(level <= (uint8_t) CONTROL_PROTOCOL__FW_MAX_LOGGER_LEVEL, HAILO_STATUS__CONTROL_PROTOCOL__INVALID_ARGUMENT);
+    CHECK_COMMON_STATUS(interface_mask <= CONTROL_PROTOCOL__FW_MAX_LOGGER_INTERFACE, HAILO_STATUS__CONTROL_PROTOCOL__INVALID_ARGUMENT);
 
     static_assert((uint32_t) FW_LOGGER_LEVEL_TRACE == (uint32_t) HAILO_FW_LOGGER_LEVEL_TRACE,
         "mismatch in FW_LOGGER_LEVEL_TRACE and HAILO_FW_LOGGER_LEVEL_TRACE");
@@ -294,8 +298,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_throttling_state_request(CONTRO
 {
     size_t local_request_size = 0;
 
-    CHECK_NOT_NULL(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
-    CHECK_NOT_NULL(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_NOT_NULL_COMMON_STATUS(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_NOT_NULL_COMMON_STATUS(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
 
     /* Header */
     local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__set_throttling_state_request_t);
@@ -318,8 +322,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_overcurrent_state_request(CONTR
 {
     size_t local_request_size = 0;
 
-    CHECK_NOT_NULL(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
-    CHECK_NOT_NULL(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_NOT_NULL_COMMON_STATUS(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_NOT_NULL_COMMON_STATUS(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
 
     /* Header */
     local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__set_overcurrent_state_request_t);
@@ -347,8 +351,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_clock_freq_request(CONTROL_PROT
 {
     size_t local_request_size = 0;
 
-    CHECK(request != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
-    CHECK(request_size != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_COMMON_STATUS(request != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_COMMON_STATUS(request_size != nullptr, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
 
     /* Header */
     local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__set_clock_freq_request_t);
@@ -1663,7 +1667,7 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_network_group_he
 
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_context_info_request(
     CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, 
-    const CONTROL_PROTOCOL__context_switch_context_info_single_control_t *context_info)
+    const CONTROL_PROTOCOL__context_switch_context_info_chunk_t *context_info)
 {
     HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
     size_t local_request_size = 0;
@@ -1678,17 +1682,17 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_context_info_req
         sizeof(CONTROL_PROTOCOL__context_switch_set_context_info_request_t) + context_info->context_network_data_length;
     control_protocol__pack_request_header(request, sequence, HAILO_CONTROL_OPCODE_CONTEXT_SWITCH_SET_CONTEXT_INFO, 4);
 
-    /* is_first_control_per_context */
-    request->parameters.context_switch_set_context_info_request.is_first_control_per_context_length = 
-        BYTE_ORDER__htonl(sizeof(request->parameters.context_switch_set_context_info_request.is_first_control_per_context));
-    request->parameters.context_switch_set_context_info_request.is_first_control_per_context = 
-        context_info->is_first_control_per_context;
+    /* is_first_chunk_per_context */
+    request->parameters.context_switch_set_context_info_request.is_first_chunk_per_context_length = 
+        BYTE_ORDER__htonl(sizeof(request->parameters.context_switch_set_context_info_request.is_first_chunk_per_context));
+    request->parameters.context_switch_set_context_info_request.is_first_chunk_per_context = 
+        context_info->is_first_chunk_per_context;
 
-    /* is_last_control_per_context */
-    request->parameters.context_switch_set_context_info_request.is_last_control_per_context_length = 
-        BYTE_ORDER__htonl(sizeof(request->parameters.context_switch_set_context_info_request.is_last_control_per_context));
-    request->parameters.context_switch_set_context_info_request.is_last_control_per_context = 
-        context_info->is_last_control_per_context;
+    /* is_last_chunk_per_context */
+    request->parameters.context_switch_set_context_info_request.is_last_chunk_per_context_length = 
+        BYTE_ORDER__htonl(sizeof(request->parameters.context_switch_set_context_info_request.is_last_chunk_per_context));
+    request->parameters.context_switch_set_context_info_request.is_last_chunk_per_context = 
+        context_info->is_last_chunk_per_context;
 
     /* context_type */
     request->parameters.context_switch_set_context_info_request.context_type_length = 
@@ -1751,8 +1755,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_pause_frames_request(CONTROL_PR
             size_t *request_size, uint32_t sequence, uint8_t rx_pause_frames_enable)
 {
 
-    CHECK_NOT_NULL(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
-    CHECK_NOT_NULL(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_NOT_NULL_COMMON_STATUS(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_NOT_NULL_COMMON_STATUS(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
 
     /* Header */
     size_t local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + sizeof(CONTROL_PROTOCOL__set_pause_frames_t);
@@ -1770,7 +1774,7 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_pause_frames_request(CONTROL_PR
 
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_download_context_action_list_request(CONTROL_PROTOCOL__request_t *request, 
     size_t *request_size, uint32_t sequence, uint32_t network_group_id,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, uint16_t action_list_offset)
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, uint16_t action_list_offset)
 {
     HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
     size_t local_request_size = 0;
@@ -2325,8 +2329,8 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_run_bist_test_request(
 {
     size_t local_request_size = 0;
 
-    CHECK_NOT_NULL(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
-    CHECK_NOT_NULL(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_NOT_NULL_COMMON_STATUS(request, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
+    CHECK_NOT_NULL_COMMON_STATUS(request_size, HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
 
     /* Header */
     local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + 
diff --git a/hailort/libhailort/src/device_common/control_protocol.hpp b/hailort/libhailort/src/device_common/control_protocol.hpp
index ae0b9674..5fb914fd 100644
--- a/hailort/libhailort/src/device_common/control_protocol.hpp
+++ b/hailort/libhailort/src/device_common/control_protocol.hpp
@@ -97,12 +97,12 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_network_group_he
     const CONTROL_PROTOCOL__application_header_t *network_group_header);
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_context_switch_set_context_info_request(
         CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, 
-        const CONTROL_PROTOCOL__context_switch_context_info_single_control_t *context_info);
+        const CONTROL_PROTOCOL__context_switch_context_info_chunk_t *context_info);
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_idle_time_set_measuremment_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint8_t measurement_enable);
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_idle_time_get_measuremment_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence);
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_download_context_action_list_request(CONTROL_PROTOCOL__request_t *request,
     size_t *request_size, uint32_t sequence, uint32_t network_group_id,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint8_t context_index, uint16_t action_list_offset);
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index, uint16_t action_list_offset);
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_context_switch_status_request(
         CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence,
         CONTROL_PROTOCOL__CONTEXT_SWITCH_STATUS_t state_machine_status, uint8_t application_index,
diff --git a/hailort/libhailort/src/device_common/d2h_events_parser.cpp b/hailort/libhailort/src/device_common/d2h_events_parser.cpp
index 5ac599fa..e60f7c36 100644
--- a/hailort/libhailort/src/device_common/d2h_events_parser.cpp
+++ b/hailort/libhailort/src/device_common/d2h_events_parser.cpp
@@ -30,6 +30,9 @@ using namespace hailort;
 /* Function prototype for control operations */
 typedef HAILO_COMMON_STATUS_t (*firmware_notifications_parser_t) (D2H_EVENT_MESSAGE_t *d2h_notification_message);
 
+#define CHECK_COMMON_STATUS(cond, ret_val, ...) \
+    _CHECK((cond), (ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__))
+
 /**********************************************************************
  * Private Declarations
  **********************************************************************/
@@ -328,11 +331,11 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_cpu_ecc_error_noti
 {
     HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
 
-    CHECK(D2H_EVENT_HEALTH_MONITOR_CPU_ECC_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count,
+    CHECK_COMMON_STATUS(D2H_EVENT_HEALTH_MONITOR_CPU_ECC_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count,
             HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_COUNT,
             "d2h event invalid parameter count: {}", d2h_notification_message->header.parameter_count);
 
-    CHECK(sizeof(d2h_notification_message->message_parameters.health_monitor_cpu_ecc_event) == d2h_notification_message->header.payload_length,
+    CHECK_COMMON_STATUS(sizeof(d2h_notification_message->message_parameters.health_monitor_cpu_ecc_event) == d2h_notification_message->header.payload_length,
             HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_LENGTH,
             "d2h event invalid payload_length: {}", d2h_notification_message->header.payload_length);
 
@@ -374,11 +377,11 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_breakpoint_reached
 {
     HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
 
-    CHECK(D2H_EVENT_CONTEXT_SWITCH_BREAKPOINT_REACHED_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count,
+    CHECK_COMMON_STATUS(D2H_EVENT_CONTEXT_SWITCH_BREAKPOINT_REACHED_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count,
             HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_COUNT,
             "d2h event invalid parameter count: {}", d2h_notification_message->header.parameter_count);
 
-    CHECK(d2h_notification_message->header.payload_length == 
+    CHECK_COMMON_STATUS(d2h_notification_message->header.payload_length == 
             sizeof(d2h_notification_message->message_parameters.context_switch_breakpoint_reached_event),
             HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_LENGTH,
             "d2h event invalid payload_length: {}", d2h_notification_message->header.payload_length);
@@ -400,11 +403,11 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_run_time_error_not
     const char *run_time_error_status_text = NULL;
     uint32_t run_time_error_status = 0;
 
-    CHECK(D2H_EVENT_CONTEXT_SWITCH_RUN_TIME_ERROR_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count,
+    CHECK_COMMON_STATUS(D2H_EVENT_CONTEXT_SWITCH_RUN_TIME_ERROR_EVENT_PARAMETER_COUNT == d2h_notification_message->header.parameter_count,
             HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_COUNT,
             "d2h event invalid parameter count: {}", d2h_notification_message->header.parameter_count);
 
-    CHECK(d2h_notification_message->header.payload_length == 
+    CHECK_COMMON_STATUS(d2h_notification_message->header.payload_length == 
             sizeof(d2h_notification_message->message_parameters.context_switch_run_time_error_event),
             HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_LENGTH,
             "d2h event invalid payload_length: {}", d2h_notification_message->header.payload_length);
@@ -412,7 +415,7 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_context_switch_run_time_error_not
     run_time_error_status = d2h_notification_message->message_parameters.context_switch_run_time_error_event.exit_status;
     
     status = FIRMWARE_STATUS__get_textual((FIRMWARE_STATUS_t)run_time_error_status, &run_time_error_status_text);
-    CHECK((HAILO_COMMON_STATUS__SUCCESS == status), status, 
+    CHECK_COMMON_STATUS((HAILO_COMMON_STATUS__SUCCESS == status), status, 
         "Cannot find textual address for run time status {:#x}, status = {}", (FIRMWARE_STATUS_t)run_time_error_status, status);
 
     LOGGER__ERROR("Got Context switch run time error on net_group index {}, batch index {}, context index {}, action index {} with status {}",
diff --git a/hailort/libhailort/src/device_common/device.cpp b/hailort/libhailort/src/device_common/device.cpp
index 57f6fb39..71224599 100644
--- a/hailort/libhailort/src/device_common/device.cpp
+++ b/hailort/libhailort/src/device_common/device.cpp
@@ -387,7 +387,7 @@ hailo_status Device::set_sleep_state(hailo_sleep_state_t sleep_state)
     return Control::set_sleep_state(*this, sleep_state);
 }
 
-hailo_status Device::dma_map(void *address, size_t size, hailo_stream_direction_t direction)
+hailo_status Device::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction)
 {
     (void) address;
     (void) size;
@@ -395,21 +395,14 @@ hailo_status Device::dma_map(void *address, size_t size, hailo_stream_direction_
     return HAILO_NOT_IMPLEMENTED;
 }
 
-hailo_status Device::dma_unmap(void *address, hailo_stream_direction_t direction)
+hailo_status Device::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction)
 {
     (void) address;
+    (void) size;
     (void) direction;
     return HAILO_NOT_IMPLEMENTED;
 }
 
-Expected<std::pair<vdma::MappedBufferPtr, bool>> Device::try_dma_map(vdma::DmaAbleBufferPtr buffer,
-    hailo_stream_direction_t direction)
-{
-    (void) buffer;
-    (void) direction;
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
 hailo_status Device::direct_write_memory(uint32_t address, const void *buffer, uint32_t size)
 {
     (void) address;
@@ -538,7 +531,7 @@ Expected<std::vector<uint8_t>> Device::get_number_of_dynamic_contexts_per_networ
 }
 
 Expected<Buffer> Device::download_context_action_list(uint32_t network_group_id, uint8_t context_type,
-    uint8_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size)
+    uint16_t context_index, uint32_t *base_address, uint32_t *batch_counter, uint16_t max_size)
 {
     CHECK_ARG_NOT_NULL_AS_EXPECTED(base_address);
     CHECK_ARG_NOT_NULL_AS_EXPECTED(batch_counter);
@@ -575,7 +568,7 @@ hailo_status Device::set_context_action_list_timestamp_batch(uint16_t batch_inde
 
 hailo_status Device::set_context_switch_breakpoint(uint8_t breakpoint_id, bool break_at_any_network_group_index,
     uint8_t network_group_index, bool break_at_any_batch_index, uint16_t batch_index, bool break_at_any_context_index,
-    uint8_t context_index, bool break_at_any_action_index, uint16_t action_index) 
+    uint16_t context_index, bool break_at_any_action_index, uint16_t action_index) 
 {
     CONTROL_PROTOCOL__context_switch_breakpoint_data_t breakpoint_data = {
         break_at_any_network_group_index,
diff --git a/hailort/libhailort/src/device_common/device_internal.cpp b/hailort/libhailort/src/device_common/device_internal.cpp
index eca7a3a7..1fa6d72e 100644
--- a/hailort/libhailort/src/device_common/device_internal.cpp
+++ b/hailort/libhailort/src/device_common/device_internal.cpp
@@ -15,7 +15,7 @@
 #include "device_common/device_internal.hpp"
 #include "network_group/network_group_internal.hpp"
 #include "utils/sensor_config_utils.hpp"
-
+#include "hef/hef_internal.hpp"
 
 namespace hailort
 {
@@ -26,7 +26,8 @@ DeviceBase::DeviceBase(Type type) :
     m_d2h_notification_thread(),
     m_notif_fetch_thread_params(make_shared_nothrow<NotificationThreadSharedParams>()),
     m_d2h_callbacks{{0,0}},
-    m_callbacks_lock()
+    m_callbacks_lock(),
+    m_is_shutdown_core_ops_called(false)
     // TODO: Handle m_notif_fetch_thread_params null pointer
 {
 #ifndef NDEBUG
@@ -565,8 +566,17 @@ void DeviceBase::d2h_notification_thread_main(const std::string &device_id)
             continue;
         }
 
-        hailo_notification_t callback_notification;
         uint32_t notification_fw_id = notification.header.event_id;
+
+        if (HEALTH_MONITOR_CLOSED_STREAMS_D2H_EVENT_ID == notification_fw_id) {
+            if (!m_is_shutdown_core_ops_called) {
+                LOGGER__WARNING("Aborting Infer, Device {} got closed streams notification from \'Health Monitor\'", device_id);
+                shutdown_core_ops();
+                m_is_shutdown_core_ops_called = true;
+            }
+        }
+
+        hailo_notification_t callback_notification;
         hailo_notification_id_t hailo_notification_id;
         hailo_status status = fw_notification_id_to_hailo((D2H_EVENT_ID_t)notification_fw_id, &hailo_notification_id);
         if (HAILO_SUCCESS != status) {
@@ -600,9 +610,10 @@ hailo_status DeviceBase::check_hef_is_compatible(Hef &hef)
     const auto device_arch = get_architecture();
     CHECK_EXPECTED_AS_STATUS(device_arch, "Can't get device architecture (is the FW loaded?)");
 
-    if (!is_hef_compatible(device_arch.value(), hef.pimpl->get_device_arch())) {
+    if (!is_hef_compatible(device_arch.value(), static_cast<HEFHwArch>(hef.pimpl->get_device_arch()))) {
         auto device_arch_str = HailoRTCommon::get_device_arch_str(device_arch.value());
-        auto hef_arch_str = HailoRTCommon::get_device_arch_str(hef_arch_to_device_arch(hef.pimpl->get_device_arch()));
+        auto hef_arch_str =
+            HailoRTCommon::get_device_arch_str(hef_arch_to_device_arch(static_cast<HEFHwArch>(hef.pimpl->get_device_arch())));
 
         LOGGER__ERROR("HEF format is not compatible with device. Device arch: {}, HEF arch: {}",
             device_arch_str.c_str(), hef_arch_str.c_str());
@@ -615,16 +626,19 @@ hailo_status DeviceBase::check_hef_is_compatible(Hef &hef)
         CHECK_EXPECTED_AS_STATUS(extended_device_info_expected,  "Can't get device extended info");
         hailo_extended_device_information_t extended_device_information = extended_device_info_expected.release();
         check_clock_rate_for_hailo8(extended_device_information.neural_network_core_clock_rate,
-            hef.pimpl->get_device_arch());
+            static_cast<HEFHwArch>(hef.pimpl->get_device_arch()));
     }
 
-    if ((ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8L == hef.pimpl->get_device_arch()) && (HAILO_ARCH_HAILO8 == device_arch.value())) {
-        LOGGER__WARNING(
-            "HEF was compiled for Hailo8L device, while the device itself is Hailo8. " \
-            "This will result in lower performance.");
+    if ((static_cast<ProtoHEFHwArch>(HEFHwArch::HW_ARCH__HAILO8L) == hef.pimpl->get_device_arch()) &&
+        (HAILO_ARCH_HAILO8 == device_arch.value())) {
+        LOGGER__WARNING("HEF was compiled for Hailo8L device, while the device itself is Hailo8. " \
+        "This will result in lower performance.");
+    } else if ((static_cast<ProtoHEFHwArch>(HEFHwArch::HW_ARCH__HAILO15M) == hef.pimpl->get_device_arch()) &&
+        (HAILO_ARCH_HAILO15H == device_arch.value())) {
+        LOGGER__WARNING("HEF was compiled for Hailo15M device, while the device itself is Hailo15H. " \
+        "This will result in lower performance.");
     }
 
-
     return HAILO_SUCCESS;
 }
 
@@ -714,46 +728,46 @@ hailo_status DeviceBase::validate_fw_version_for_platform(const hailo_device_ide
     return validate_binary_version_for_platform(&fw_version, &min_supported_fw_version, fw_binary_type);
 }
 
-bool DeviceBase::is_hef_compatible(hailo_device_architecture_t device_arch, ProtoHEFHwArch hef_arch)
+bool DeviceBase::is_hef_compatible(hailo_device_architecture_t device_arch, HEFHwArch hef_arch)
 {
     switch (device_arch) {
     case HAILO_ARCH_HAILO8:
-        return (hef_arch == PROTO__HW_ARCH__HAILO8P) || (hef_arch == PROTO__HW_ARCH__HAILO8R) || (hef_arch == PROTO__HW_ARCH__HAILO8L);
+        return (hef_arch == HEFHwArch::HW_ARCH__HAILO8P) || (hef_arch == HEFHwArch::HW_ARCH__HAILO8R) || (hef_arch == HEFHwArch::HW_ARCH__HAILO8L);
     case HAILO_ARCH_HAILO8L:
-        return (hef_arch == PROTO__HW_ARCH__HAILO8L);
+        return (hef_arch == HEFHwArch::HW_ARCH__HAILO8L);
     case HAILO_ARCH_HAILO15H:
         // Compare with HW_ARCH__LAVENDER and HW_ARCH__GINGER to support hefs compiled for them
-        return (hef_arch == PROTO__HW_ARCH__GINGER) || (hef_arch == PROTO__HW_ARCH__LAVENDER) ||
-            (hef_arch == PROTO__HW_ARCH__HAILO15H) || (hef_arch == PROTO__HW_ARCH__HAILO15M);
+        return (hef_arch == HEFHwArch::HW_ARCH__GINGER) || (hef_arch == HEFHwArch::HW_ARCH__LAVENDER) ||
+            (hef_arch == HEFHwArch::HW_ARCH__HAILO15H) || (hef_arch == HEFHwArch::HW_ARCH__HAILO15M);
     case HAILO_ARCH_PLUTO:
-        return (hef_arch == PROTO__HW_ARCH__PLUTO);
+        return (hef_arch == HEFHwArch::HW_ARCH__PLUTO);
     case HAILO_ARCH_HAILO15M:
-        return (hef_arch == PROTO__HW_ARCH__HAILO15M);
+        return (hef_arch == HEFHwArch::HW_ARCH__HAILO15M);
     default:
         return false;
     }
 }
 
-hailo_device_architecture_t DeviceBase::hef_arch_to_device_arch(ProtoHEFHwArch hef_arch)
+hailo_device_architecture_t DeviceBase::hef_arch_to_device_arch(HEFHwArch hef_arch)
 {
     switch (hef_arch) {
-    case PROTO__HW_ARCH__SAGE_A0:
+    case HEFHwArch::HW_ARCH__SAGE_A0:
         return HAILO_ARCH_HAILO8_A0;
-    case PROTO__HW_ARCH__HAILO8:
-    case PROTO__HW_ARCH__HAILO8P:
-    case PROTO__HW_ARCH__HAILO8R:
-    case PROTO__HW_ARCH__SAGE_B0:
-    case PROTO__HW_ARCH__PAPRIKA_B0:
+    case HEFHwArch::HW_ARCH__HAILO8:
+    case HEFHwArch::HW_ARCH__HAILO8P:
+    case HEFHwArch::HW_ARCH__HAILO8R:
+    case HEFHwArch::HW_ARCH__SAGE_B0:
+    case HEFHwArch::HW_ARCH__PAPRIKA_B0:
         return HAILO_ARCH_HAILO8;
-    case PROTO__HW_ARCH__HAILO8L:
+    case HEFHwArch::HW_ARCH__HAILO8L:
         return HAILO_ARCH_HAILO8L;
-    case PROTO__HW_ARCH__HAILO15H:
-    case PROTO__HW_ARCH__GINGER:
-    case PROTO__HW_ARCH__LAVENDER:
+    case HEFHwArch::HW_ARCH__HAILO15H:
+    case HEFHwArch::HW_ARCH__GINGER:
+    case HEFHwArch::HW_ARCH__LAVENDER:
         return HAILO_ARCH_HAILO15H;
-    case PROTO__HW_ARCH__PLUTO:
+    case HEFHwArch::HW_ARCH__PLUTO:
         return HAILO_ARCH_PLUTO;
-    case PROTO__HW_ARCH__HAILO15M:
+    case HEFHwArch::HW_ARCH__HAILO15M:
         return HAILO_ARCH_HAILO15M;
 
     default:
@@ -761,9 +775,9 @@ hailo_device_architecture_t DeviceBase::hef_arch_to_device_arch(ProtoHEFHwArch h
     }
 }
 
-void DeviceBase::check_clock_rate_for_hailo8(uint32_t clock_rate, ProtoHEFHwArch hef_hw_arch)
+void DeviceBase::check_clock_rate_for_hailo8(uint32_t clock_rate, HEFHwArch hef_hw_arch)
 {
-    uint32_t expected_clock_rate = (hef_hw_arch == ProtoHEFHwArch::PROTO__HW_ARCH__HAILO8R) ? HAILO8R_CLOCK_RATE : HAILO8_CLOCK_RATE;
+    uint32_t expected_clock_rate = (hef_hw_arch == HEFHwArch::HW_ARCH__HAILO8R) ? HAILO8R_CLOCK_RATE : HAILO8_CLOCK_RATE;
     if (expected_clock_rate != clock_rate) {
         LOGGER__WARNING(
             "HEF was compiled assuming clock rate of {} MHz, while the device clock rate is {} MHz. " \
diff --git a/hailort/libhailort/src/device_common/device_internal.hpp b/hailort/libhailort/src/device_common/device_internal.hpp
index 8ffe7671..29ac623e 100644
--- a/hailort/libhailort/src/device_common/device_internal.hpp
+++ b/hailort/libhailort/src/device_common/device_internal.hpp
@@ -23,7 +23,6 @@
 #include "hailo/hailort.h"
 
 #include "d2h_event_queue.hpp"
-#include "hef/hef_internal.hpp"
 
 #include "firmware_header.h"
 #include "firmware_header_utils.h"
@@ -43,6 +42,23 @@ namespace hailort
 
 #define CLOCKS_IN_MHZ (1000 * 1000)
 
+enum class HEFHwArch // Must be aligned to ProtoHEFHwArch
+{
+    HW_ARCH__HAILO8 = 0,
+    HW_ARCH__HAILO8P = 1,
+    HW_ARCH__HAILO8R = 2,
+    HW_ARCH__HAILO8L = 3,
+    HW_ARCH__HAILO15H = 103,
+    HW_ARCH__HAILO15M = 4,
+
+    HW_ARCH__SAGE_A0 = 100,
+    HW_ARCH__SAGE_B0 = 101,
+    HW_ARCH__PAPRIKA_B0 = 102,
+    HW_ARCH__GINGER = 104,
+    HW_ARCH__LAVENDER = 105,
+    HW_ARCH__PLUTO = 106,
+};
+
 class DeviceBase : public Device
 {
 public:
@@ -81,7 +97,7 @@ class DeviceBase : public Device
     virtual Expected<Buffer> read_user_config() override;
     virtual hailo_status write_user_config(const MemoryView &buffer) override;
     virtual hailo_status erase_user_config() override;
-    static hailo_device_architecture_t hef_arch_to_device_arch(ProtoHEFHwArch hef_arch);
+    static hailo_device_architecture_t hef_arch_to_device_arch(HEFHwArch hef_arch);
 
     virtual Expected<hailo_device_architecture_t> get_architecture() const override
     {
@@ -101,6 +117,7 @@ class DeviceBase : public Device
     // Special value to signal the d2h notification thread to terminate
     static const uint32_t TERMINATE_EVENT_ID = std::numeric_limits<uint32_t>::max();
     
+    virtual void shutdown_core_ops() = 0;
     virtual hailo_reset_device_mode_t get_default_reset_mode() = 0;
     virtual hailo_status reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) = 0;
     virtual Expected<D2H_EVENT_MESSAGE_t> read_notification() = 0;
@@ -126,8 +143,8 @@ class DeviceBase : public Device
         firmware_version_t *min_supported_binary_version, FW_BINARY_TYPE_t fw_binary_type);
     static hailo_status validate_fw_version_for_platform(const hailo_device_identity_t &board_info,
         firmware_version_t fw_version, FW_BINARY_TYPE_t fw_binary_type);
-    static bool is_hef_compatible(hailo_device_architecture_t device_arch, ProtoHEFHwArch hw_arch);
-    static void check_clock_rate_for_hailo8(uint32_t clock_rate, ProtoHEFHwArch hef_hw_arch);
+    static bool is_hef_compatible(hailo_device_architecture_t device_arch, HEFHwArch hw_arch);
+    static void check_clock_rate_for_hailo8(uint32_t clock_rate, HEFHwArch hef_hw_arch);
     hailo_status store_sensor_control_buffers(const std::vector<SENSOR_CONFIG__operation_cfg_t> &control_buffers, uint32_t section_index, hailo_sensor_types_t sensor_type,
         uint32_t reset_config_size, uint16_t config_height, uint16_t config_width, uint16_t config_fps, const std::string &config_name);
     virtual void notification_fetch_thread(std::shared_ptr<NotificationThreadSharedParams> params);
@@ -140,6 +157,7 @@ class DeviceBase : public Device
 
     d2h_notification_callback_t m_d2h_callbacks[HAILO_NOTIFICATION_ID_COUNT];
     std::mutex m_callbacks_lock;
+    bool m_is_shutdown_core_ops_called;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/eth/eth_device.cpp b/hailort/libhailort/src/eth/eth_device.cpp
index 764c87b8..c7685a50 100644
--- a/hailort/libhailort/src/eth/eth_device.cpp
+++ b/hailort/libhailort/src/eth/eth_device.cpp
@@ -20,6 +20,7 @@
 #include "eth/udp.hpp"
 #include "device_common/control.hpp"
 #include "network_group/network_group_internal.hpp"
+#include "hef/hef_internal.hpp"
 
 #include <stdlib.h>
 #include <errno.h>
@@ -304,6 +305,17 @@ hailo_reset_device_mode_t EthernetDevice::get_default_reset_mode()
     return HAILO_RESET_DEVICE_MODE_CHIP;
 }
 
+// TODO - HRT-13234, move to DeviceBase
+void EthernetDevice::shutdown_core_ops()
+{
+    for (auto core_op : m_core_ops) {
+        auto status = core_op->shutdown();
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed to shutdown core op with status {}", status);
+        }
+    }
+}
+
 hailo_status EthernetDevice::reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type)
 {
     hailo_status status = HAILO_UNINITIALIZED;
diff --git a/hailort/libhailort/src/eth/eth_device.hpp b/hailort/libhailort/src/eth/eth_device.hpp
index fca41f1b..880be87b 100644
--- a/hailort/libhailort/src/eth/eth_device.hpp
+++ b/hailort/libhailort/src/eth/eth_device.hpp
@@ -30,6 +30,7 @@ class EthernetDevice : public DeviceBase {
     virtual Expected<size_t> read_log(MemoryView &buffer, hailo_cpu_id_t cpu_id) override;
     virtual hailo_status wait_for_wakeup() override;
     virtual void increment_control_sequence() override;
+    virtual void shutdown_core_ops() override;
     virtual hailo_reset_device_mode_t get_default_reset_mode() override;
     virtual hailo_status reset_impl(CONTROL_PROTOCOL__reset_type_t reset_type) override;
 
@@ -72,6 +73,7 @@ class EthernetDevice : public DeviceBase {
     const hailo_eth_device_info_t m_device_info;
     std::string m_device_id;
     Udp m_control_udp;
+    // TODO - HRT-13234, move to DeviceBase
     std::vector<std::shared_ptr<CoreOp>> m_core_ops;
     std::vector<std::shared_ptr<ConfiguredNetworkGroup>> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context
     ActiveCoreOpHolder m_active_core_op_holder;
diff --git a/hailort/libhailort/src/eth/eth_stream.cpp b/hailort/libhailort/src/eth/eth_stream.cpp
index 61164ebd..b84db336 100644
--- a/hailort/libhailort/src/eth/eth_stream.cpp
+++ b/hailort/libhailort/src/eth/eth_stream.cpp
@@ -134,7 +134,7 @@ Expected<size_t> EthernetInputStream::sync_write_raw_buffer(const MemoryView &bu
 
     size_t size = buffer.size();
     status = m_udp.send((uint8_t*)buffer.data(), &size, this->configuration.use_dataflow_padding, this->configuration.max_payload_size);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("Udp send was aborted!");
         return make_unexpected(status);
     }
@@ -157,7 +157,7 @@ hailo_status EthernetInputStream::write_impl(const MemoryView &buffer)
     } else {
         status = eth_stream__write_all_no_sync(buffer.data(), offset, buffer.size());
     }
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("eth_stream__write_all was aborted!");
         return status;
     }
@@ -191,7 +191,7 @@ hailo_status EthernetInputStream::eth_stream__write_with_remainder(const void *b
     while (offset < offset_end_without_remainder) {
         transfer_size = offset_end_without_remainder - offset;
         auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast<const uint8_t*>(buffer) + offset, transfer_size));
-        if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) {
+        if (HAILO_STREAM_ABORT == expected_bytes_written.status()) {
             LOGGER__INFO("sync_write_raw_buffer was aborted!");
             return expected_bytes_written.status();
         }
@@ -200,7 +200,7 @@ hailo_status EthernetInputStream::eth_stream__write_with_remainder(const void *b
     }
     if (0 < remainder_size) {
         auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast<const uint8_t*>(buffer) + offset, remainder_size));
-        if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) {
+        if (HAILO_STREAM_ABORT == expected_bytes_written.status()) {
             LOGGER__INFO("sync_write_raw_buffer was aborted!");
             return expected_bytes_written.status();
         }
@@ -236,7 +236,7 @@ hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(co
     
         transfer_size = offset_end_without_remainder - offset;
         auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast<const uint8_t*>(buffer) + offset, transfer_size));
-        if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) {
+        if (HAILO_STREAM_ABORT == expected_bytes_written.status()) {
             LOGGER__INFO("sync_write_raw_buffer was aborted!");
             return expected_bytes_written.status();
         }
@@ -249,7 +249,7 @@ hailo_status TokenBucketEthernetInputStream::eth_stream__write_with_remainder(co
         (void)token_bucket.consumeWithBorrowAndWait(static_cast<double>(remainder_size), rate_bytes_per_sec, BURST_SIZE);
         
         auto expected_bytes_written = sync_write_raw_buffer(MemoryView::create_const(static_cast<const uint8_t*>(buffer) + offset, remainder_size));
-        if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_written.status()) {
+        if (HAILO_STREAM_ABORT == expected_bytes_written.status()) {
             LOGGER__INFO("sync_write_raw_buffer was aborted!");
             return expected_bytes_written.status();
         }
@@ -316,7 +316,7 @@ hailo_status EthernetInputStream::eth_stream__write_all_with_sync(const void *bu
     for (size_t i = 0; i < number_of_frames; i++) {
         // Write frame by frame, whereas the remainder packet is the sync packet
         status = eth_stream__write_with_remainder(buffer, offset, frame_size, this->configuration.sync_size);
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             LOGGER__INFO("eth_stream__write_with_remainder was aborted!");
             return status;
         }
@@ -515,7 +515,7 @@ hailo_status EthernetOutputStream::read_all_no_sync(void *buffer, size_t offset,
         transfer_size = offset_end - offset;
         MemoryView buffer_view(static_cast<uint8_t*>(buffer) + offset, transfer_size);
         auto expected_bytes_read = this->sync_read_raw_buffer(buffer_view);
-        if (HAILO_STREAM_ABORTED_BY_USER == expected_bytes_read.status()) {
+        if (HAILO_STREAM_ABORT == expected_bytes_read.status()) {
             LOGGER__INFO("sync_read_raw_buffer was aborted!");
             return expected_bytes_read.status();
         }
@@ -555,7 +555,7 @@ hailo_status EthernetOutputStream::read_all_with_sync(void *buffer, size_t offse
         status = expected_bytes_read.status();
         if (HAILO_TIMEOUT == status) {
             return handle_timeout(buffer, offset, initial_offset, frame_size);
-        } else if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        } else if (HAILO_STREAM_ABORT == status) {
             LOGGER__INFO("sync_read_raw_buffer was aborted");
             return status;
         } else if (HAILO_SUCCESS != status) {
@@ -658,7 +658,7 @@ hailo_status EthernetOutputStream::read_impl(MemoryView buffer)
     } else {
         status = this->read_all_no_sync(buffer.data(), 0, buffer.size());
     }
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("read was aborted!");
         return status;
     }
@@ -676,7 +676,7 @@ Expected<size_t> EthernetOutputStream::sync_read_raw_buffer(MemoryView &buffer)
 
     auto buffer_size = buffer.size();
     status = m_udp.recv((uint8_t*)buffer.data(),&buffer_size);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("Udp recv was aborted!");
         return make_unexpected(status);
     }
diff --git a/hailort/libhailort/src/eth/network_rate_calculator.cpp b/hailort/libhailort/src/eth/network_rate_calculator.cpp
index 2d7def5b..76937d8f 100644
--- a/hailort/libhailort/src/eth/network_rate_calculator.cpp
+++ b/hailort/libhailort/src/eth/network_rate_calculator.cpp
@@ -117,10 +117,10 @@ Expected<std::map<std::string, uint32_t>> NetworkUdpRateCalculator::calculate_in
             total_input_rate, total_output_rate, max_supported_bandwidth);
         if (total_output_rate > total_input_rate) {
             // Output is bigger than max rate. Adjusting input rate accordingly
-            auto input_output_ratio = (total_input_rate / total_output_rate);
+            double input_output_ratio = ((double)total_input_rate / total_output_rate);
             LOGGER__WARNING("Output Bps ({}) is bigger than input Bps ({}) output (ratio is: {})", total_output_rate,
                 total_input_rate, input_output_ratio);
-            max_supported_bandwidth *= input_output_ratio;
+            max_supported_bandwidth = static_cast<uint32_t>(input_output_ratio * max_supported_bandwidth);
         }
         auto total_inputs_rate_to_max_supported_ratio = (static_cast<float64_t>(max_supported_bandwidth) / total_input_rate);
         for (auto &rate_pair : input_rates) {
diff --git a/hailort/libhailort/src/eth/udp.cpp b/hailort/libhailort/src/eth/udp.cpp
index af97e7b9..1d820e48 100644
--- a/hailort/libhailort/src/eth/udp.cpp
+++ b/hailort/libhailort/src/eth/udp.cpp
@@ -129,7 +129,7 @@ hailo_status Udp::send(uint8_t *buffer, size_t *size, bool use_padding, size_t m
 
     status = m_socket.send_to((const uint8_t*)send_ptr, *size, MSG_CONFIRM, (const struct sockaddr *) &m_device_address,
          m_device_address_length, &number_of_sent_bytes);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("Socket send_to was aborted!");
         return status;
     } 
@@ -161,7 +161,7 @@ hailo_status Udp::recv(uint8_t *buffer, size_t *size)
 
     status = m_socket.recv_from(buffer, *size,  0, (struct sockaddr *) &m_device_address, m_device_address_length,
         &number_of_received_bytes); 
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("Socket recv_from was aborted!");
         return status;
     }
diff --git a/hailort/libhailort/src/hailort.cpp b/hailort/libhailort/src/hailort.cpp
index 6f1faa88..96b1ce20 100644
--- a/hailort/libhailort/src/hailort.cpp
+++ b/hailort/libhailort/src/hailort.cpp
@@ -36,6 +36,7 @@
 #include "vdevice/vdevice_internal.hpp"
 #include "utils/profiler/tracer_macros.hpp"
 #include "utils/exported_resource_manager.hpp"
+#include "utils/buffer_storage.hpp"
 
 #include <chrono>
 #include <tuple>
@@ -46,9 +47,10 @@ using namespace hailort;
 // Note: Async stream API uses BufferPtr as a param. When exporting BufferPtrs to the user via c-api, they must be
 //       stored in some container, otherwise their ref count may reach zero and they will be freed, despite the
 //       c-api user still using them. (shared_ptr<T> doesn't have a release method like unique_ptr<T>)
-// Singleton holding a mapping between the address of a buffer allocated/mapped via hailo_allocate_buffer/hailo_dma_map_buffer_to_device
+// Singleton holding a mapping between the address of a buffer allocated/mapped via hailo_allocate_buffer
 // to the underlying BufferPtr. When a buffer is freed via hailo_free_buffer, the BufferPtr object will be removed from
 // the storage.
+// TODO HRT-12726: remove the export manager
 using ExportedBufferManager = ExportedResourceManager<BufferPtr, void *>;
 
 COMPAT__INITIALIZER(hailort__initialize_logger)
@@ -1090,11 +1092,11 @@ hailo_status hailo_allocate_buffer(size_t size, const hailo_buffer_parameters_t
     CHECK_ARG_NOT_NULL(buffer_out);
     CHECK(0 != size, HAILO_INVALID_ARGUMENT, "Buffer size must be greater than zero");
 
-    auto buffer_storage_params = BufferStorageParams::create(*allocation_params);
-    CHECK_EXPECTED_AS_STATUS(buffer_storage_params);
+    BufferStorageParams buffer_storage_params{};
+    buffer_storage_params.flags = allocation_params->flags;
 
     // Create buffer
-    auto buffer = Buffer::create_shared(size, *buffer_storage_params);
+    auto buffer = Buffer::create_shared(size, buffer_storage_params);
     CHECK_EXPECTED_AS_STATUS(buffer);
 
     // Store the buffer in manager (otherwise it'll be freed at the end of this func)
@@ -1112,45 +1114,34 @@ hailo_status hailo_free_buffer(void *buffer)
     return ExportedBufferManager::unregister_resource(buffer);
 }
 
-// TODO: hailo_dma_map_buffer_to_device/hailo_dma_unmap_buffer_from_device aren't thread safe when crossed with
+// TODO: hailo_device_dma_map_buffer/hailo_device_dma_unmap_buffer aren't thread safe when crossed with
 //       hailo_allocate_buffer/hailo_free_buffer (HRT-10669)
-hailo_status hailo_dma_map_buffer_to_device(void *buffer, size_t size, hailo_device device, hailo_dma_buffer_direction_t direction)
+hailo_status hailo_device_dma_map_buffer(hailo_device device,void *address, size_t size, hailo_dma_buffer_direction_t direction)
 {
-    CHECK_ARG_NOT_NULL(buffer);
     CHECK_ARG_NOT_NULL(device);
-
-    auto hailort_allocated_buffer = ExportedBufferManager::get_resource(buffer);
-    if (hailort_allocated_buffer) {
-        // TODO: this will change here HRT-10983
-        // The buffer has been allocated by hailort
-        // The mapping is held by the Buffer object
-        auto mapping_result = hailort_allocated_buffer->get()->storage().dma_map(*reinterpret_cast<Device*>(device), direction);
-        CHECK_EXPECTED_AS_STATUS(mapping_result);
-        const auto is_new_mapping = mapping_result.value();
-        return is_new_mapping ? HAILO_SUCCESS : HAILO_DMA_MAPPING_ALREADY_EXISTS;
-    }
-
-    // The buffer has been allocated by the user
-    return reinterpret_cast<Device*>(device)->dma_map(buffer, size,
-        (HAILO_DMA_BUFFER_DIRECTION_H2D == direction) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM);
+    CHECK_ARG_NOT_NULL(address);
+    return reinterpret_cast<Device*>(device)->dma_map(address, size, direction);
 }
 
-hailo_status hailo_dma_unmap_buffer_from_device(void *buffer, hailo_device device, hailo_dma_buffer_direction_t direction)
+hailo_status hailo_device_dma_unmap_buffer(hailo_device device, void *address, size_t size, hailo_dma_buffer_direction_t direction)
 {
-    CHECK_ARG_NOT_NULL(buffer);
     CHECK_ARG_NOT_NULL(device);
+    CHECK_ARG_NOT_NULL(address);
+    return reinterpret_cast<Device*>(device)->dma_unmap(address, size, direction);
+}
 
-    auto hailort_allocated_buffer = ExportedBufferManager::get_resource(buffer);
-    if (hailort_allocated_buffer) {
-        // TODO: mappings get dtor'd when the Buffer object is dtor'd.
-        //       We want all the mapping to be held in one place for hailort::Buffers and for user alloacted buffers
-        //       so this will change (HRT-10983)
-        return HAILO_SUCCESS;
-    }
+hailo_status hailo_vdevice_dma_map_buffer(hailo_vdevice vdevice,void *address, size_t size, hailo_dma_buffer_direction_t direction)
+{
+    CHECK_ARG_NOT_NULL(vdevice);
+    CHECK_ARG_NOT_NULL(address);
+    return reinterpret_cast<VDevice*>(vdevice)->dma_map(address, size, direction);
+}
 
-    // The buffer has been allocated by the user
-    return reinterpret_cast<Device*>(device)->dma_unmap(buffer,
-        (HAILO_DMA_BUFFER_DIRECTION_H2D == direction) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM);
+hailo_status hailo_vdevice_dma_unmap_buffer(hailo_vdevice vdevice, void *address, size_t size, hailo_dma_buffer_direction_t direction)
+{
+    CHECK_ARG_NOT_NULL(vdevice);
+    CHECK_ARG_NOT_NULL(address);
+    return reinterpret_cast<VDevice*>(vdevice)->dma_unmap(address, size, direction);
 }
 
 hailo_status hailo_calculate_eth_input_rate_limits(hailo_hef hef, const char *network_group_name, uint32_t fps,
@@ -1318,20 +1309,7 @@ hailo_status hailo_stream_read_raw_buffer_async(hailo_output_stream stream, void
     CHECK_ARG_NOT_NULL(buffer);
     CHECK_ARG_NOT_NULL(callback);
 
-    auto buffer_ref = ExportedBufferManager::get_resource(buffer);
-    if (HAILO_NOT_FOUND == buffer_ref.status()) {
-        // User addr (buffer hasn't been allocated by hailo_allocate_buffer)
-        return (reinterpret_cast<OutputStream*>(stream))->read_async(buffer, size,
-            wrap_c_user_callback(callback, opaque));
-    }
-
-    // buffer has been allocated by hailo_allocate_buffer
-    CHECK_EXPECTED_AS_STATUS(buffer_ref);
-    auto buffer_ptr = buffer_ref->get();
-    assert(buffer_ptr != nullptr);
-    CHECK(size == buffer_ptr->size(), HAILO_INVALID_ARGUMENT);
-
-    return (reinterpret_cast<OutputStream*>(stream))->read_async(buffer_ptr,
+    return (reinterpret_cast<OutputStream*>(stream))->read_async(buffer, size,
         wrap_c_user_callback(callback, opaque));
 }
 
@@ -1342,20 +1320,7 @@ hailo_status hailo_stream_write_raw_buffer_async(hailo_input_stream stream, cons
     CHECK_ARG_NOT_NULL(buffer);
     CHECK_ARG_NOT_NULL(callback);
 
-    auto buffer_ref = ExportedBufferManager::get_resource(const_cast<void *>(buffer));
-    if (HAILO_NOT_FOUND == buffer_ref.status()) {
-        // User addr (buffer hasn't been allocated by hailo_allocate_buffer)
-        return (reinterpret_cast<InputStream*>(stream))->write_async(buffer, size,
-            wrap_c_user_callback(callback, opaque));
-    }
-
-    // buffer has been allocated by hailo_allocate_buffer
-    CHECK_EXPECTED_AS_STATUS(buffer_ref);
-    auto buffer_ptr = buffer_ref->get();
-    assert(buffer_ptr != nullptr);
-    CHECK(size == buffer_ptr->size(), HAILO_INVALID_ARGUMENT);
-
-    return (reinterpret_cast<InputStream*>(stream))->write_async(buffer_ptr,
+    return (reinterpret_cast<InputStream*>(stream))->write_async(buffer, size,
         wrap_c_user_callback(callback, opaque));
 }
 
@@ -2222,16 +2187,24 @@ hailo_status hailo_vstream_write_raw_buffer(hailo_input_vstream input_vstream, c
     CHECK_ARG_NOT_NULL(buffer);
 
     auto status = reinterpret_cast<InputVStream*>(input_vstream)->write(MemoryView::create_const(buffer, buffer_size));
+    if (HAILO_STREAM_ABORT == status) {
+        return status;
+    }
     CHECK_SUCCESS(status);
     return HAILO_SUCCESS;
 }
 
 hailo_status hailo_vstream_write_pix_buffer(hailo_input_vstream input_vstream, const hailo_pix_buffer_t *buffer)
 {
+    CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer->memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!");
+
     CHECK_ARG_NOT_NULL(input_vstream);
     CHECK_ARG_NOT_NULL(buffer);
 
     auto status = reinterpret_cast<InputVStream*>(input_vstream)->write(*buffer);
+    if (HAILO_STREAM_ABORT == status) {
+        return status;
+    }
     CHECK_SUCCESS(status);
     return HAILO_SUCCESS;
 }
@@ -2240,8 +2213,11 @@ hailo_status hailo_vstream_read_raw_buffer(hailo_output_vstream output_vstream,
 {
     CHECK_ARG_NOT_NULL(output_vstream);
     CHECK_ARG_NOT_NULL(dst);
-    
+
     auto status = reinterpret_cast<OutputVStream*>(output_vstream)->read(MemoryView(dst, dst_size));
+    if (HAILO_STREAM_ABORT == status) {
+        return status;
+    }
     CHECK_SUCCESS(status);
     return HAILO_SUCCESS;
 }
diff --git a/hailort/libhailort/src/hef/context_switch_actions.cpp b/hailort/libhailort/src/hef/context_switch_actions.cpp
index b060466f..db6e9c2a 100644
--- a/hailort/libhailort/src/hef/context_switch_actions.cpp
+++ b/hailort/libhailort/src/hef/context_switch_actions.cpp
@@ -10,6 +10,7 @@
 
 #include "context_switch_actions.hpp"
 #include "core_op/resource_manager/resource_manager.hpp"
+#include "hef/hef_internal.hpp"
 
 #include "context_switch_defs.h"
 
@@ -171,22 +172,60 @@ Expected<Buffer> DeactivateConfigChannelAction::serialize_params(const ContextRe
     return Buffer::create(reinterpret_cast<uint8_t*>(&params), sizeof(params));
 }
 
-Expected<ContextSwitchConfigActionPtr> WriteDataCcwAction::create(
+Expected<ContextSwitchConfigActionPtr> WriteDataCcwActionByBuffer::create(
     Buffer &&data, uint8_t config_stream_index, size_t total_ccw_burst)
 {
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(total_ccw_burst), HAILO_INVALID_HEF,
         "Too many ccw burst {} (must fit in uint16)", total_ccw_burst);
-    auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataCcwAction(
+    auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataCcwActionByBuffer(
         std::move(data), config_stream_index, static_cast<uint16_t>(total_ccw_burst)));
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
     return result;
 }
 
-WriteDataCcwAction::WriteDataCcwAction(Buffer &&data, uint8_t config_stream_index, uint16_t total_ccw_burst) :
+hailo_status WriteDataCcwActionByBuffer::write_to_config_buffer(ConfigBuffer& config_buffer, bool should_support_pre_fetch)
+{
+    bool is_last_write = config_buffer.size_left() == size();
+    if (should_support_pre_fetch && is_last_write) {
+        auto status = config_buffer.pad_with_nops();
+        CHECK_SUCCESS(status);
+    }
+
+    auto status = config_buffer.write(MemoryView(m_data));
+    CHECK_SUCCESS(status);
+
+    if (should_support_pre_fetch && is_last_write) {
+        auto desc_count = config_buffer.program_descriptors();
+        CHECK_EXPECTED_AS_STATUS(desc_count);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+Expected<ContextSwitchConfigActionPtr> WriteDataCcwAction::create(uint32_t offset, size_t size, uint8_t config_stream_index,
+    size_t total_ccw_burst, std::shared_ptr<ShefFileHandle> shef_file_handle)
+{
+    CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(total_ccw_burst), HAILO_INVALID_HEF,
+        "Too many ccw burst {} (must fit in uint16)", total_ccw_burst);
+    auto result = ContextSwitchConfigActionPtr(new (std::nothrow) WriteDataCcwAction(
+        offset, size, config_stream_index, static_cast<uint16_t>(total_ccw_burst), shef_file_handle));
+    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
+    return result;
+}
+
+WriteDataCcwActionByBuffer::WriteDataCcwActionByBuffer(Buffer &&data, uint8_t config_stream_index, uint16_t total_ccw_burst) :
+    WriteDataCcwAction(0, 0, config_stream_index, total_ccw_burst, nullptr),
+    m_data(std::move(data))
+{}
+
+WriteDataCcwAction::WriteDataCcwAction(uint32_t offset, size_t size, uint8_t config_stream_index, uint16_t total_ccw_burst,
+        std::shared_ptr<ShefFileHandle> shef_file_handle) :
     ContextSwitchConfigAction(Type::WriteDataCcw),
-    m_data(std::move(data)),
+    m_offset(offset),
+    m_size(size),
     m_config_stream_index(config_stream_index),
-    m_total_ccw_burst(total_ccw_burst)
+    m_total_ccw_burst(total_ccw_burst),
+    m_shef_file_handle(shef_file_handle)
 {}
 
 Expected<std::vector<Buffer>> WriteDataCcwAction::serialize(const ContextResources &) const
@@ -207,6 +246,24 @@ Expected<Buffer> WriteDataCcwAction::serialize_params(const ContextResources &)
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
+hailo_status WriteDataCcwAction::write_to_config_buffer(ConfigBuffer& config_buffer, bool should_support_pre_fetch)
+{
+    bool is_last_write = config_buffer.size_left() == size();
+
+    auto buffer = m_shef_file_handle->read(m_offset, m_size);
+    CHECK_EXPECTED_AS_STATUS(buffer);
+
+    auto status = config_buffer.write(MemoryView(buffer.value()));
+    CHECK_SUCCESS(status);
+
+    if (should_support_pre_fetch && is_last_write) {
+        auto desc_count = config_buffer.program_descriptors();
+        CHECK_EXPECTED_AS_STATUS(desc_count);
+    }
+
+    return HAILO_SUCCESS;
+}
+
 Expected<ContextSwitchConfigActionPtr> AddCcwBurstAction::create(uint8_t config_stream_index, uint16_t ccw_bursts)
 {
     auto result = ContextSwitchConfigActionPtr(new (std::nothrow) AddCcwBurstAction(config_stream_index, ccw_bursts));
diff --git a/hailort/libhailort/src/hef/context_switch_actions.hpp b/hailort/libhailort/src/hef/context_switch_actions.hpp
index 63170949..31ab7d4b 100644
--- a/hailort/libhailort/src/hef/context_switch_actions.hpp
+++ b/hailort/libhailort/src/hef/context_switch_actions.hpp
@@ -19,6 +19,7 @@
 
 #include "device_common/control_protocol.hpp"
 #include "context_switch_defs.h"
+#include "core_op/resource_manager/config_buffer.hpp"
 
 
 namespace hailort
@@ -155,11 +156,12 @@ class DeactivateConfigChannelAction : public ContextSwitchConfigAction
     const vdma::ChannelId m_channel_id;
 };
 
+class ShefFileHandle;
 class WriteDataCcwAction : public ContextSwitchConfigAction
 {
 public:
-    static Expected<ContextSwitchConfigActionPtr> create(Buffer &&data, uint8_t config_stream_index,
-        size_t total_ccw_burst);
+    static Expected<ContextSwitchConfigActionPtr> create(uint32_t offset, size_t size, uint8_t config_stream_index,
+        size_t total_ccw_burst, std::shared_ptr<ShefFileHandle> shef_file_handle);
     WriteDataCcwAction(WriteDataCcwAction &&) = default;
     WriteDataCcwAction(const WriteDataCcwAction &) = delete;
     WriteDataCcwAction &operator=(WriteDataCcwAction &&) = delete;
@@ -170,17 +172,41 @@ class WriteDataCcwAction : public ContextSwitchConfigAction
     virtual bool supports_repeated_block() const override;
     virtual Expected<Buffer> serialize_params(const ContextResources &context_resources) const override;
 
-    const MemoryView data() const { return MemoryView::create_const(m_data.data(), m_data.size()); }
     uint8_t config_stream_index() const { return m_config_stream_index; }
     uint16_t total_ccw_burst() const { return m_total_ccw_burst; }
+    virtual size_t size() const { return m_size; }
+    virtual hailo_status write_to_config_buffer(ConfigBuffer& config_buffer, bool should_support_pre_fetch);
+
+protected:
+    WriteDataCcwAction(uint32_t offset, size_t size, uint8_t config_stream_index,
+        uint16_t total_ccw_burst, std::shared_ptr<ShefFileHandle> shef_file_handle);
+
+    uint32_t m_offset;
+    size_t m_size;
+    const uint8_t m_config_stream_index;
+    const uint16_t m_total_ccw_burst;
+    std::shared_ptr<ShefFileHandle> m_shef_file_handle;
+};
+
+class WriteDataCcwActionByBuffer : public WriteDataCcwAction
+{
+public:
+    static Expected<ContextSwitchConfigActionPtr> create(Buffer &&data, uint8_t config_stream_index,
+        size_t total_ccw_burst);
+    WriteDataCcwActionByBuffer(WriteDataCcwActionByBuffer &&) = default;
+    WriteDataCcwActionByBuffer(const WriteDataCcwActionByBuffer &) = delete;
+    WriteDataCcwActionByBuffer &operator=(WriteDataCcwActionByBuffer &&) = delete;
+    WriteDataCcwActionByBuffer &operator=(const WriteDataCcwActionByBuffer &) = delete;
+    virtual ~WriteDataCcwActionByBuffer() = default;
+
+    virtual size_t size() const override { return m_data.size(); }
+    virtual hailo_status write_to_config_buffer(ConfigBuffer& config_buffer, bool should_support_pre_fetch) override;
 
 private:
-    WriteDataCcwAction(Buffer &&data, uint8_t config_stream_index,
+    WriteDataCcwActionByBuffer(Buffer &&data, uint8_t config_stream_index,
         uint16_t total_ccw_burst);
 
     Buffer m_data;
-    const uint8_t m_config_stream_index;
-    const uint16_t m_total_ccw_burst;
 };
 
 class AddCcwBurstAction : public ContextSwitchConfigAction
diff --git a/hailort/libhailort/src/hef/core_op_metadata.cpp b/hailort/libhailort/src/hef/core_op_metadata.cpp
index c600123d..b700dd8a 100644
--- a/hailort/libhailort/src/hef/core_op_metadata.cpp
+++ b/hailort/libhailort/src/hef/core_op_metadata.cpp
@@ -8,7 +8,6 @@
  **/
 
 #include "core_op_metadata.hpp"
-#include "hef_internal.hpp"
 #include <numeric>
 
 namespace hailort
@@ -52,9 +51,10 @@ static bool is_edge_under_mux(const LayerInfo &info, const std::string &edge_nam
 }
 
 ContextMetadata::ContextMetadata(std::vector<ContextSwitchConfigActionPtr> &&actions,
-    ConfigBufferInfoMap&& config_buffers_info) :
+    ConfigBufferInfoMap&& config_buffers_info, bool const_input_layer_found) :
     m_actions(std::move(actions)),
-    m_config_buffers_info(std::move(config_buffers_info))
+    m_config_buffers_info(std::move(config_buffers_info)),
+    m_const_input_layer_found(const_input_layer_found)
 {}
 
 const ConfigBufferInfoMap &ContextMetadata::config_buffers_info() const
@@ -67,6 +67,11 @@ const std::vector<ContextSwitchConfigActionPtr> &ContextMetadata::get_actions()
     return m_actions;
 }
 
+bool ContextMetadata::const_input_layer_found() const
+{
+    return m_const_input_layer_found;
+}
+
 std::vector<ContextSwitchConfigActionPtr> ContextMetadata::get_actions_of_type(
     const std::set<ContextSwitchConfigAction::Type> &action_types) const
 {
@@ -183,12 +188,15 @@ CoreOpMetadata::CoreOpMetadata(const std::string &core_op_name,
     std::vector<ContextMetadata> &&dynamic_contexts,
     std::vector<ConfigChannelInfo> &&config_channels_info,
     SupportedFeatures &supported_features,
-    std::vector<std::string> sorted_network_names)
+    std::vector<std::string> sorted_network_names,
+    bool can_fast_batch_switch)
     :   m_preliminary_context(std::move(preliminary_context)),
         m_dynamic_contexts(std::move(dynamic_contexts)),
         m_config_channels_info(std::move(config_channels_info)),
         m_core_op_name(core_op_name), m_supported_features(supported_features),
-        m_sorted_network_names(sorted_network_names) {}
+        m_sorted_network_names(sorted_network_names),
+        m_can_fast_batch_switch(can_fast_batch_switch)
+        {}
 
 std::vector<LayerInfo> CoreOpMetadata::get_input_layer_infos() const
 {
@@ -375,30 +383,83 @@ Expected<NetworkGroupMetadata> NetworkGroupMetadata::create(const std::string &n
     SupportedFeatures &supported_features, const std::vector<std::string> &sorted_network_names,
     std::vector<hailort::net_flow::PostProcessOpMetadataPtr> &ops_metadata)
 {
-    auto all_layers_infos = get_all_layer_infos(core_ops_metadata_per_arch);
-    CHECK_EXPECTED(all_layers_infos);
+    return NetworkGroupMetadata(network_group_name, std::move(core_ops_metadata_per_arch), sorted_output_names,
+        supported_features, sorted_network_names, ops_metadata);
+}
+
+Expected<CoreOpMetadataPtr> NetworkGroupMetadata::get_core_op_metadata() const
+/* This function is used for names getters (such as get_vstream_names_from_stream_name),
+    so should be same across all clusters layouts */
+{
+    CHECK_AS_EXPECTED(1 == m_core_ops_metadata_per_arch.size(), HAILO_INTERNAL_FAILURE);
+    auto core_op_metadata_exp = m_core_ops_metadata_per_arch.begin()->second.get_metadata(PARTIAL_CLUSTERS_LAYOUT_IGNORE);
+    CHECK_EXPECTED(core_op_metadata_exp);
+
+    auto core_op_metadata = core_op_metadata_exp.release();
+    return core_op_metadata;
+}
+
+Expected<std::vector<LayerInfo>> NetworkGroupMetadata::get_all_layer_infos() const
+{
+    auto core_op_metadata = get_core_op_metadata();
+    CHECK_EXPECTED(core_op_metadata);
+
+    return core_op_metadata.value()->get_all_layer_infos();
+}
+
+Expected<std::vector<LayerInfo>> NetworkGroupMetadata::get_input_layer_infos(const std::string &network_name) const
+{
+    auto core_op_metadata = get_core_op_metadata();
+    CHECK_EXPECTED(core_op_metadata);
+
+    return core_op_metadata.value()->get_input_layer_infos(network_name);
+}
+
+Expected<std::vector<LayerInfo>> NetworkGroupMetadata::get_output_layer_infos(const std::string &network_name) const
+{
+    auto core_op_metadata = get_core_op_metadata();
+    CHECK_EXPECTED(core_op_metadata);
+
+    return core_op_metadata.value()->get_output_layer_infos(network_name);
+}
+
+Expected<std::vector<hailo_vstream_info_t>> NetworkGroupMetadata::get_input_vstream_infos(const std::string &network_name) const
+{
+    auto input_layer_infos = get_input_layer_infos(network_name);
+    CHECK_EXPECTED(input_layer_infos);
 
     std::vector<hailo_vstream_info_t> input_vstream_infos;
+    for (auto &layer_info : input_layer_infos.value()) {
+        auto vstreams_info = LayerInfoUtils::get_vstream_infos_from_layer_info(layer_info);
+        input_vstream_infos.insert(input_vstream_infos.end(),
+            std::make_move_iterator(vstreams_info.begin()), std::make_move_iterator(vstreams_info.end()));
+    }
+    CHECK_AS_EXPECTED(0 != input_vstream_infos.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name);
+
+    return input_vstream_infos;
+}
+
+Expected<std::vector<hailo_vstream_info_t>> NetworkGroupMetadata::get_output_vstream_infos(const std::string &network_name) const
+{
+    auto output_layer_infos = get_output_layer_infos(network_name);
+    CHECK_EXPECTED(output_layer_infos);
+
     std::vector<hailo_vstream_info_t> output_vstream_infos;
-    for (auto &layer_info : all_layers_infos.value()) {
-        if (std::any_of(ops_metadata.begin(), ops_metadata.end(),
+    for (auto &layer_info : output_layer_infos.value()) {
+        if (std::any_of(m_ops_metadata.begin(), m_ops_metadata.end(),
             [&layer_info](auto &op_metadata) { return contains(op_metadata->get_input_names(), layer_info.name); })) {
             continue; // all output_vstream_infos that relates to the op are coming from the op itself instead of layer_infos
         }
+
         auto vstreams_info = LayerInfoUtils::get_vstream_infos_from_layer_info(layer_info);
-        if (HAILO_D2H_STREAM == layer_info.direction) {
-            // In case of fused nms layers, several LayerInfos will contain data about the same fused layer
-            for (auto &vstream_info : vstreams_info) {
-                if (!LayerInfoUtils::vstream_info_already_in_vector(output_vstream_infos, vstream_info.name)) {
-                    output_vstream_infos.push_back(vstream_info);
-                }
+        // In case of fused nms layers, several LayerInfos will contain data about the same fused layer
+        for (auto &vstream_info : vstreams_info) {
+            if (!LayerInfoUtils::vstream_info_already_in_vector(output_vstream_infos, vstream_info.name)) {
+                output_vstream_infos.push_back(vstream_info);
             }
-        } else {
-            input_vstream_infos.insert(input_vstream_infos.end(),
-                std::make_move_iterator(vstreams_info.begin()), std::make_move_iterator(vstreams_info.end()));
         }
     }
-    for (auto &metadata : ops_metadata) {
+    for (auto &metadata : m_ops_metadata) {
         auto vstream_info = metadata->get_output_vstream_info();
         CHECK_EXPECTED(vstream_info);
         output_vstream_infos.push_back(vstream_info.release());
@@ -407,18 +468,18 @@ Expected<NetworkGroupMetadata> NetworkGroupMetadata::create(const std::string &n
     // Sort vstream infos by sorted_output_names
     hailo_status status = HAILO_SUCCESS;
     std::sort(output_vstream_infos.begin(), output_vstream_infos.end(),
-        [&sorted_output_names, &status](const auto &info1, const auto &info2)
+        [this, &status](const auto &info1, const auto &info2)
     {
-        const auto index1 = std::find(sorted_output_names.begin(), sorted_output_names.end(), std::string(info1.name));
-        const auto index2 = std::find(sorted_output_names.begin(), sorted_output_names.end(), std::string(info2.name));
+        const auto index1 = std::find(m_sorted_output_names.begin(), m_sorted_output_names.end(), std::string(info1.name));
+        const auto index2 = std::find(m_sorted_output_names.begin(), m_sorted_output_names.end(), std::string(info2.name));
 
-        if (sorted_output_names.end() == index1) {
+        if (m_sorted_output_names.end() == index1) {
             LOGGER__ERROR("VStream {} not found in sorted output names", info1.name);
             status = HAILO_INTERNAL_FAILURE;
             return false;
         }
 
-        if (sorted_output_names.end() == index2) {
+        if (m_sorted_output_names.end() == index2) {
             LOGGER__ERROR("VStream {} not found in sorted output names", info2.name);
             status = HAILO_INTERNAL_FAILURE;
             return false;
@@ -428,34 +489,9 @@ Expected<NetworkGroupMetadata> NetworkGroupMetadata::create(const std::string &n
     });
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    return NetworkGroupMetadata(network_group_name, std::move(core_ops_metadata_per_arch), sorted_output_names, supported_features, sorted_network_names,
-        input_vstream_infos, output_vstream_infos, ops_metadata);
-}
-
-Expected<std::vector<hailo_vstream_info_t>> NetworkGroupMetadata::get_input_vstream_infos(const std::string &network_name) const
-{
-    std::vector<hailo_vstream_info_t> res;
-    for (auto &vstream_info : m_input_vstreams_infos) {
-        if ((network_name == std::string(vstream_info.network_name)) || (network_name.empty()) || (network_name == default_network_name())) {
-            res.push_back(vstream_info);
-        }
-    }
-    CHECK_AS_EXPECTED(0 != res.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name);
-
-    return res;
-}
-
-Expected<std::vector<hailo_vstream_info_t>> NetworkGroupMetadata::get_output_vstream_infos(const std::string &network_name) const
-{
-    std::vector<hailo_vstream_info_t> res;
-    for (auto &vstream_info : m_output_vstreams_infos) {
-        if ((network_name == std::string(vstream_info.network_name)) || (network_name.empty()) || (network_name == default_network_name())) {
-            res.push_back(vstream_info);
-        }
-    }
-    CHECK_AS_EXPECTED(0 != res.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name);
+    CHECK_AS_EXPECTED(0 != output_vstream_infos.size(), HAILO_NOT_FOUND, "No VStreams where found for network {}", network_name);
 
-    return res;
+    return output_vstream_infos;
 }
 
 Expected<std::vector<hailo_vstream_info_t>> NetworkGroupMetadata::get_all_vstream_infos(const std::string &network_name) const
@@ -486,7 +522,7 @@ Expected<std::vector<std::string>> NetworkGroupMetadata::get_vstream_names_from_
         }
     }
 
-    auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch);
+    auto all_layers_infos = get_all_layer_infos();
     CHECK_EXPECTED(all_layers_infos);
     for (auto &layer_info : all_layers_infos.release()) {
         if (layer_info.is_multi_planar) {
@@ -521,7 +557,7 @@ Expected<std::vector<std::string>> NetworkGroupMetadata::get_stream_names_from_v
         }
     }
 
-    auto all_layers_infos = get_all_layer_infos(m_core_ops_metadata_per_arch);
+    auto all_layers_infos = get_all_layer_infos();
     CHECK_EXPECTED(all_layers_infos);
     for (auto &layer_info : all_layers_infos.value()) {
         if (layer_info.is_mux) {
diff --git a/hailort/libhailort/src/hef/core_op_metadata.hpp b/hailort/libhailort/src/hef/core_op_metadata.hpp
index 73a3f4d0..d00ca896 100644
--- a/hailort/libhailort/src/hef/core_op_metadata.hpp
+++ b/hailort/libhailort/src/hef/core_op_metadata.hpp
@@ -12,7 +12,7 @@
 
 #include "hef/layer_info.hpp"
 #include "hef/context_switch_actions.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/op_metadata.hpp"
 
 
 namespace hailort
@@ -31,6 +31,7 @@ struct SupportedFeatures {
     bool output_scale_by_feature = false;
     bool periph_calculation_in_hailort = false;
     bool core_hw_padding_config_in_dfc = false;
+    bool batch_register_config = false;
 };
 
 // For each config_stream_index we store vector of all ccw write length. The vector is used to build the config buffer.g
@@ -40,7 +41,7 @@ using ConfigBufferInfoMap = std::unordered_map<uint8_t, std::vector<uint32_t>>;
 class ContextMetadata final {
 public:
     ContextMetadata(std::vector<ContextSwitchConfigActionPtr> &&actions,
-        ConfigBufferInfoMap&& config_buffers_info);
+        ConfigBufferInfoMap&& config_buffers_info, bool const_input_layer_found);
 
     const std::vector<ContextSwitchConfigActionPtr> &get_actions() const;
     std::vector<ContextSwitchConfigActionPtr> get_actions_of_type(
@@ -61,9 +62,12 @@ class ContextMetadata final {
 
     Expected<size_t> get_layers_transfer_size(const std::vector<LayerInfo> &layer_infos) const;
     Expected<size_t> get_context_transfer_size() const;
+
+    bool const_input_layer_found() const;
 private:
     std::vector<ContextSwitchConfigActionPtr> m_actions;
     ConfigBufferInfoMap m_config_buffers_info;
+    bool m_const_input_layer_found;
 
     std::vector<LayerInfo> m_boundary_input_layers;
     std::vector<LayerInfo> m_boundary_output_layers;
@@ -84,7 +88,8 @@ class CoreOpMetadata final {
         std::vector<ContextMetadata> &&dynamic_contexts,
         std::vector<ConfigChannelInfo> &&config_channels_info,
         SupportedFeatures &supported_features,
-        std::vector<std::string> sorted_network_names);
+        std::vector<std::string> sorted_network_names,
+        bool can_fast_batch_switch);
 
     std::vector<LayerInfo> get_input_layer_infos() const;
     std::vector<LayerInfo> get_output_layer_infos() const;
@@ -125,6 +130,11 @@ class CoreOpMetadata final {
         return m_sorted_network_names;
     }
 
+    bool get_can_fast_batch_switch() const
+    {
+        return m_can_fast_batch_switch;
+    }
+
 private:
     // TODO: Remove
     const std::string default_network_name() const
@@ -138,6 +148,7 @@ class CoreOpMetadata final {
     std::string m_core_op_name;
     SupportedFeatures m_supported_features;
     std::vector<std::string> m_sorted_network_names;
+    bool m_can_fast_batch_switch;
 };
 
 using CoreOpMetadataPtr = std::shared_ptr<CoreOpMetadata>;
@@ -168,15 +179,11 @@ class NetworkGroupMetadata final {
         std::vector<std::string> &sorted_output_names,
         SupportedFeatures &supported_features,
         const std::vector<std::string> &sorted_network_names,
-        std::vector<hailo_vstream_info_t> &input_vstreams_infos,
-        std::vector<hailo_vstream_info_t> &output_vstreams_infos,
         std::vector<net_flow::PostProcessOpMetadataPtr> &ops_metadata) :
             m_network_group_name(network_group_name),
             m_sorted_output_names(sorted_output_names),
             m_supported_features(supported_features),
             m_sorted_network_names(sorted_network_names),
-            m_input_vstreams_infos(input_vstreams_infos),
-            m_output_vstreams_infos(output_vstreams_infos),
             m_core_ops_metadata_per_arch(std::move(core_ops_metadata_per_arch)),
             m_ops_metadata(ops_metadata)
         {};
@@ -216,25 +223,16 @@ class NetworkGroupMetadata final {
     }
 
 private:
-    static Expected<std::vector<LayerInfo>> get_all_layer_infos(std::map<std::string, CoreOpMetadataPerArch> &core_ops_metadata_per_arch)
-    /* This function is used for names getters (such as get_vstream_names_from_stream_name),
-       so should be same across all clusters layouts */
-    {
-        CHECK_AS_EXPECTED(1 == core_ops_metadata_per_arch.size(), HAILO_INTERNAL_FAILURE);
-        auto core_op_metadata = core_ops_metadata_per_arch.begin()->second.get_metadata(PARTIAL_CLUSTERS_LAYOUT_IGNORE);
-        CHECK_EXPECTED(core_op_metadata);
-
-        return core_op_metadata.value()->get_all_layer_infos();
-    }
+    Expected<CoreOpMetadataPtr> get_core_op_metadata() const;
+    Expected<std::vector<LayerInfo>> get_all_layer_infos() const;
+    Expected<std::vector<LayerInfo>> get_input_layer_infos(const std::string &network_name) const;
+    Expected<std::vector<LayerInfo>> get_output_layer_infos(const std::string &network_name) const;
 
     std::string m_network_group_name;
     std::vector<std::string> m_sorted_output_names;
     SupportedFeatures m_supported_features;
     std::vector<std::string> m_sorted_network_names;
 
-    std::vector<hailo_vstream_info_t> m_input_vstreams_infos;
-    std::vector<hailo_vstream_info_t> m_output_vstreams_infos;
-
     std::map<std::string, CoreOpMetadataPerArch> m_core_ops_metadata_per_arch; // Key is core_op_name
     std::vector<net_flow::PostProcessOpMetadataPtr> m_ops_metadata;
 
diff --git a/hailort/libhailort/src/hef/hef.cpp b/hailort/libhailort/src/hef/hef.cpp
index 7e959ec2..389af5be 100644
--- a/hailort/libhailort/src/hef/hef.cpp
+++ b/hailort/libhailort/src/hef/hef.cpp
@@ -24,6 +24,7 @@
 
 #include "net_flow/ops/nms_post_process.hpp"
 #include "net_flow/ops/yolov5_post_process.hpp"
+#include "net_flow/ops/yolov5_bbox_only_post_process.hpp"
 #include "net_flow/ops/yolox_post_process.hpp"
 #include "net_flow/ops/ssd_post_process.hpp"
 #include "net_flow/ops/argmax_post_process.hpp"
@@ -36,6 +37,8 @@
 #include "eth/hcp_config_core_op.hpp"
 #include "hef/layer_info.hpp"
 #include "device_common/control.hpp"
+#include "hw_consts.hpp"
+#include "utils/profiler/tracer_macros.hpp"
 
 #include "byte_order.h"
 #include "context_switch_defs.h"
@@ -49,6 +52,7 @@
 #include <algorithm>
 #include <cstring>
 #include <numeric>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
 
 
 namespace hailort
@@ -280,7 +284,7 @@ Expected<float64_t> Hef::get_bottleneck_fps(const std::string &net_group_name) c
 
 Expected<hailo_device_architecture_t> Hef::get_hef_device_arch() const
 {
-    return DeviceBase::hef_arch_to_device_arch(pimpl->get_device_arch());
+    return DeviceBase::hef_arch_to_device_arch(static_cast<HEFHwArch>(pimpl->get_device_arch()));
 }
 
 Expected<std::string> Hef::device_arch_to_string(const hailo_device_architecture_t arch)
@@ -320,6 +324,37 @@ Expected<std::vector<std::string>> Hef::get_vstream_names_from_stream_name(const
     return pimpl->get_vstream_names_from_stream_name(stream_name, net_group_name_str);
 }
 
+ShefFileHandle::ShefFileHandle(const std::string &hef_path, uint32_t ccws_buffer_offset)
+    : m_hef_path(hef_path), m_ccws_buffer_offset(ccws_buffer_offset) {}
+
+hailo_status ShefFileHandle::open()
+{
+    m_hef_file = std::ifstream(m_hef_path, std::ios::in | std::ios::binary);
+    CHECK(m_hef_file.is_open(), HAILO_OPEN_FILE_FAILURE, "Failed to open HEF file \"{}\". errno: {}", m_hef_path, errno);
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> ShefFileHandle::read(uint32_t offset, size_t size)
+{
+    auto buffer = Buffer::create(size);
+    CHECK_EXPECTED(buffer);
+
+    m_hef_file.seekg(m_ccws_buffer_offset + offset, m_hef_file.beg);
+    CHECK_AS_EXPECTED(m_hef_file.good(), HAILO_FILE_OPERATION_FAILURE, "Seeking in file failed");
+
+    m_hef_file.read(reinterpret_cast<char*>(buffer->data()), size);
+    CHECK_AS_EXPECTED(m_hef_file.good(), HAILO_FILE_OPERATION_FAILURE, "Failed reading ccw");
+
+    return buffer;
+}
+
+hailo_status ShefFileHandle::close()
+{
+    m_hef_file.close();
+    CHECK_AS_EXPECTED(m_hef_file.good(), HAILO_CLOSE_FAILURE, "Closing file failed");
+    return HAILO_SUCCESS;
+}
+
 Expected<Hef::Impl> Hef::Impl::create(const std::string &hef_path)
 {
     hailo_status status = HAILO_UNINITIALIZED;
@@ -358,7 +393,7 @@ static hailo_status calc_istream_md5(std::ifstream &s, MD5_SUM_t &calculated_md5
     while (!s.eof()) {
         s.read(md5_buffer, HEF__MD5_BUFFER_SIZE);
         CHECK(!s.bad(), HAILO_FILE_OPERATION_FAILURE, "ifstream::read() failed");
-        MD5_Update(&md5, &md5_buffer, static_cast<size_t>(s.gcount()));
+        MD5_Update(&md5, &md5_buffer, s.gcount());
     }
     MD5_Final(calculated_md5, &md5);
 
@@ -369,14 +404,15 @@ static hailo_status calc_istream_md5(std::ifstream &s, MD5_SUM_t &calculated_md5
     return HAILO_SUCCESS;
 }
 
-hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, MD5_SUM_t &calculated_md5, size_t proto_size)
+hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, MD5_SUM_t &calculated_md5, size_t hef_file_residue_size)
 {
-    CHECK(HEADER_MAGIC == BYTE_ORDER__htonl(header.magic), HAILO_INVALID_HEF,
+    CHECK(HEADER_MAGIC == header.magic, HAILO_INVALID_HEF,
         "HEF magic does not match. detected magic - {:x}", header.magic);
 
-    CHECK(HEADER_VERSION == BYTE_ORDER__htonl(header.version), HAILO_INVALID_HEF, "HEF version does not match");
+    auto version = header.version;
+    CHECK((HEADER_VERSION_0 == version) || (HEADER_VERSION_1 == version), HAILO_INVALID_HEF, "HEF version does not match");
 
-    CHECK(proto_size == BYTE_ORDER__htonl(header.hef_proto_length), HAILO_INVALID_HEF,
+    CHECK(hef_file_residue_size == header.hef_proto_size + header.ccws_size, HAILO_INVALID_HEF,
         "HEF file length does not match");
 
     CHECK(0 == memcmp(&calculated_md5, &header.expected_md5, sizeof(MD5_SUM_t)), HAILO_INVALID_HEF,
@@ -406,6 +442,13 @@ void Hef::Impl::init_md5(MD5_SUM_t &calculated_md5)
     memcpy(m_md5, calculated_md5, sizeof(m_md5));
 }
 
+void Hef::Impl::clear_hef_buffer()
+{
+#ifdef HAILO_SUPPORT_MULTI_PROCESS
+    m_hef_buffer = Buffer();
+#endif // HAILO_SUPPORT_MULTI_PROCESS
+}
+
 hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path)
 {
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
@@ -418,23 +461,36 @@ hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path)
     CHECK(hef_file.is_open(), HAILO_OPEN_FILE_FAILURE, "Failed to open HEF file \"{}\". errno: {}", hef_path, errno);
 
     hef__header_t header = {};
-    hef_file.read((char*)&header, sizeof(header));
+    hef_file.read(reinterpret_cast<char*>(&header), sizeof(header));
     CHECK(hef_file.good(), HAILO_FILE_OPERATION_FAILURE, "Failed reading HEF header");
 
-    auto proto_size = get_istream_size(hef_file);
-    CHECK_EXPECTED_AS_STATUS(proto_size);
+    header.magic = BYTE_ORDER__htonl(header.magic);
+    header.version = BYTE_ORDER__htonl(header.version);
+    header.hef_proto_size = BYTE_ORDER__htonl(header.hef_proto_size);
+    header.ccws_size = BYTE_ORDER__htonl(header.ccws_size);
+
+    auto hef_file_residue_size = get_istream_size(hef_file);
+    CHECK_EXPECTED_AS_STATUS(hef_file_residue_size);
 
     MD5_SUM_t calculated_md5 = {};
     auto status = calc_istream_md5(hef_file, calculated_md5);
     CHECK_SUCCESS(status);
 
-    status = validate_hef_header(header, calculated_md5, proto_size.value());
+    status = validate_hef_header(header, calculated_md5, hef_file_residue_size.value());
     CHECK_SUCCESS(status);
 
+    if (HEADER_VERSION_1 == header.version) {
+        auto ptr = make_shared_nothrow<ShefFileHandle>(hef_path,
+            static_cast<uint32_t>(sizeof(header) + header.hef_proto_size));
+        CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
+        m_shef_file_handle = ptr;
+    }
+
     init_md5(calculated_md5);
 
     ProtoHEFHef hef_message;
-    auto rb = hef_message.ParseFromIstream(&hef_file);
+    google::protobuf::io::IstreamInputStream zero_copy_input(&hef_file);
+    auto rb = hef_message.ParseFromBoundedZeroCopyStream(&zero_copy_input, header.hef_proto_size);
     CHECK(rb, HAILO_INVALID_HEF, "Failed parsing HEF file");
     status = transfer_protobuf_field_ownership(hef_message);
     CHECK_SUCCESS(status);
@@ -448,6 +504,7 @@ hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path)
     status = validate_hef_extensions();
     CHECK_SUCCESS(status);
 
+    TRACE(HefLoadedTrace, hef_path, m_header.sdk_version(), m_md5);
     return HAILO_SUCCESS;
 }
 
@@ -460,7 +517,12 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
     CHECK(hef_memview.size() >= sizeof(hef__header_t), HAILO_INVALID_HEF, "Invalid HEF header");
-    const hef__header_t &header = reinterpret_cast<const hef__header_t&>(*hef_memview.data());
+    const hef__header_t &raw_header = reinterpret_cast<const hef__header_t&>(*hef_memview.data());
+    auto header = raw_header;
+    header.magic = BYTE_ORDER__htonl(header.magic);
+    header.version = BYTE_ORDER__htonl(header.version);
+    header.hef_proto_size = BYTE_ORDER__htonl(header.hef_proto_size);
+    header.ccws_size = BYTE_ORDER__htonl(header.ccws_size);
 
     auto proto_buffer = (hef_memview.data() + sizeof(header));
     auto proto_size = (hef_memview.size() - sizeof(header));
@@ -664,19 +726,24 @@ static Expected<std::vector<ConfigChannelInfo>> parse_config_channels_info(const
 
 Expected<CoreOpMetadataPtr> Hef::Impl::create_metadata_per_arch(const ProtoHEFCoreOpMock &core_op, const std::vector<std::string> &sorted_network_names)
 {
-    auto preliminary_context = HefUtils::parse_preliminary_context(core_op.preliminary_config, m_supported_features);
+    auto preliminary_context = HefUtils::parse_preliminary_context(core_op.preliminary_config, m_supported_features, m_shef_file_handle);
     CHECK_EXPECTED(preliminary_context);
 
-    auto dynamic_contexts = HefUtils::parse_dynamic_contexts(core_op, m_supported_features, get_device_arch());
+    auto dynamic_contexts = HefUtils::parse_dynamic_contexts(core_op, m_supported_features, get_device_arch(), m_shef_file_handle);
     CHECK_EXPECTED(dynamic_contexts);
 
-    auto config_channels_info = parse_config_channels_info(core_op);
+    auto config_channels_info =  parse_config_channels_info(core_op);
     CHECK_EXPECTED(config_channels_info);
 
+    // If const input layer is found in the preliminary context, or first dynamic context we can't use fast batch switch
+    const auto can_fast_batch_switch =
+        !(preliminary_context.value().const_input_layer_found() || dynamic_contexts.value()[0].const_input_layer_found());
+
     // Currently, CoreOp name is the same as network_group_name, thats why we init it with it.
     // TODO: HRT-9551 - Change it when supporting multi core ops.
     auto metadata_per_arch = make_shared_nothrow<CoreOpMetadata>(core_op.network_group_metadata.network_group_name(),
-        preliminary_context.release(), dynamic_contexts.release(), config_channels_info.release(), m_supported_features, sorted_network_names);
+        preliminary_context.release(), dynamic_contexts.release(), config_channels_info.release(),
+        m_supported_features, sorted_network_names, can_fast_batch_switch);
     CHECK_NOT_NULL_AS_EXPECTED(metadata_per_arch, HAILO_OUT_OF_HOST_MEMORY);
     return metadata_per_arch;
 }
@@ -792,7 +859,7 @@ const MemoryView Hef::Impl::get_hef_memview()
 }
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
-Hef::Impl::Impl(const std::string &hef_path, hailo_status &status)
+Hef::Impl::Impl(const std::string &hef_path, hailo_status &status) : m_shef_file_handle(nullptr)
 {
     status = HAILO_UNINITIALIZED;
     GOOGLE_PROTOBUF_VERIFY_VERSION;
@@ -806,7 +873,7 @@ Hef::Impl::Impl(const std::string &hef_path, hailo_status &status)
     status = HAILO_SUCCESS;
 }
 
-Hef::Impl::Impl(const MemoryView &hef_memview, hailo_status &status)
+Hef::Impl::Impl(const MemoryView &hef_memview, hailo_status &status) : m_shef_file_handle(nullptr)
 {
     status = HAILO_UNINITIALIZED;
     GOOGLE_PROTOBUF_VERIFY_VERSION;
@@ -852,6 +919,8 @@ SupportedFeatures Hef::Impl::get_supported_features(const ProtoHEFHeader &header
         header, hef_extensions, included_features);
     supported_features.core_hw_padding_config_in_dfc = check_hef_optional_extension(ProtoHEFExtensionType::HW_PADDING,
         header, hef_optional_extensions);
+    supported_features.batch_register_config = check_hef_extension(ProtoHEFExtensionType::BATCH_REGISTER_CONFIG,
+        header, hef_extensions, included_features);
 
     return supported_features;
 }
@@ -865,6 +934,7 @@ net_flow::NmsPostProcessConfig create_post_process_nms_config(const ProtoHEFOp &
     nms_config.number_of_classes = op_proto.nms_op().classes();
     nms_config.background_removal = op_proto.nms_op().background_removal();
     nms_config.background_removal_index = op_proto.nms_op().background_removal_index();
+    nms_config.bbox_only = op_proto.nms_op().bbox_decoding_only();
 
     return nms_config;
 }
@@ -913,6 +983,21 @@ Expected<std::unordered_map<std::string, net_flow::BufferMetaData>> create_input
     return inputs_metadata;
 }
 
+uint32_t compute_num_of_proposals(const std::unordered_map<std::string, net_flow::BufferMetaData> &inputs_metadatas, std::map<std::string, 
+    std::vector<int>> &anchors)
+{
+    uint32_t num_of_proposals = 0;
+    for (const auto &input_metadata_pair : inputs_metadatas) {
+        auto &name = input_metadata_pair.first;
+        auto &input_metadata = input_metadata_pair.second;
+        assert(contains(anchors, name));
+        auto &layer_anchors = anchors.at(name);
+        auto num_of_anchors = net_flow::YOLOv5PostProcessOp::get_num_of_anchors(layer_anchors);
+        num_of_proposals += static_cast<uint32_t>(num_of_anchors * input_metadata.shape.height * input_metadata.shape.width);
+    }
+    return num_of_proposals;
+}
+
 Expected<net_flow::PostProcessOpMetadataPtr> create_yolov5_op_metadata(const ProtoHEFOp &op_proto,
     const std::map<size_t, LayerInfo> &pad_index_to_streams_info, const std::map<size_t, size_t> &input_to_output_pads,
     const std::string &network_name)
@@ -929,13 +1014,41 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_yolov5_op_metadata(const Pro
     std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
     net_flow::BufferMetaData output_metadata{};
     output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
-        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV5);
+        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV5,
+        nms_config.bbox_only);
+
     outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
 
     return net_flow::Yolov5OpMetadata::create(inputs_metadata.release(), outputs_metadata, nms_config, yolo_config.release(),
         network_name);
 }
 
+Expected<net_flow::PostProcessOpMetadataPtr> create_yolov5_bbox_only_op_metadata(const ProtoHEFOp &op_proto,
+    const std::map<size_t, LayerInfo> &pad_index_to_streams_info, const std::map<size_t, size_t> &input_to_output_pads,
+    const std::string &network_name)
+{
+    auto nms_config = create_post_process_nms_config(op_proto);
+
+    auto yolo_v5_config = create_yolov5_config(op_proto.nms_op().yolo_nms_op().bbox_decoders(),
+        op_proto.nms_op().yolo_nms_op().image_height(), op_proto.nms_op().yolo_nms_op().image_width(), pad_index_to_streams_info);
+    CHECK_EXPECTED(yolo_v5_config);
+
+    auto inputs_metadata = create_inputs_metadata(op_proto, pad_index_to_streams_info, input_to_output_pads);
+    CHECK_EXPECTED(inputs_metadata);
+
+    std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
+    net_flow::BufferMetaData output_metadata{};
+    uint32_t num_of_proposals = compute_num_of_proposals(inputs_metadata.value(), yolo_v5_config->anchors);
+    output_metadata.shape = {1, num_of_proposals, YOLOV5_BBOX_NUM_OF_VALUES + op_proto.nms_op().classes()};
+
+    output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
+        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV5, nms_config.bbox_only);
+    outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
+
+    return net_flow::Yolov5BboxOnlyOpMetadata::create(inputs_metadata.release(), outputs_metadata, nms_config, yolo_v5_config.release(),
+        network_name);
+}
+
 Expected<net_flow::PostProcessOpMetadataPtr> create_yolov5_seg_op_metadata(const ProtoHEFOp &op_proto,
     const std::map<size_t, LayerInfo> &pad_index_to_streams_info, const std::map<size_t, size_t> &input_to_output_pads,
     const std::string &network_name)
@@ -950,15 +1063,19 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_yolov5_seg_op_metadata(const
 
     auto proto_layer_name = op_proto.nms_op().yolo_seg_op().proto_info().proto_layer();
     CHECK_AS_EXPECTED(contains(inputs_metadata.value(), proto_layer_name), HAILO_INVALID_HEF);
-    net_flow::YoloV5SegPostProcessConfig yolov5_seg_config =
-    {static_cast<float32_t>(op_proto.nms_op().yolo_seg_op().mask_threshold()),
-    op_proto.nms_op().yolo_seg_op().proto_info().proto_layer()};
+
+    const uint32_t SIZE_FACTOR = 2;
+    net_flow::YoloV5SegPostProcessConfig yolov5_seg_config = {};
+    yolov5_seg_config.mask_threshold = static_cast<float32_t>(op_proto.nms_op().yolo_seg_op().mask_threshold());
+    yolov5_seg_config.max_accumulated_mask_size = static_cast<uint32_t>(
+        yolov5_config->image_height * yolov5_config->image_width * SIZE_FACTOR);
+    yolov5_seg_config.proto_layer_name = proto_layer_name;
 
     std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
     net_flow::BufferMetaData output_metadata{};
     output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type
         ({ HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK, HAILO_FORMAT_FLAGS_NONE },
-        net_flow::OperationType::YOLOV5SEG);
+        net_flow::OperationType::YOLOV5SEG, nms_config.bbox_only);
     outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
 
     return net_flow::Yolov5SegOpMetadata::create(inputs_metadata.release(), outputs_metadata, nms_config, yolov5_config.release(),
@@ -979,7 +1096,7 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_yolov8_op_metadata(const Pro
     std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
     net_flow::BufferMetaData output_metadata{};
     output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
-        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV8);
+        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOV8, nms_config.bbox_only);
     outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
 
     for (auto &bbox_proto : op_proto.nms_op().yolov8_nms_op().bbox_decoders()) {
@@ -1024,7 +1141,7 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_yolox_op_metadata(const Prot
     std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
     net_flow::BufferMetaData output_metadata{};
     output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
-        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOX);
+        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::YOLOX, nms_config.bbox_only);
     outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
 
     for (const auto &bbox_proto : op_proto.nms_op().yolox_nms_op().bbox_decoders()) {
@@ -1100,7 +1217,7 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_ssd_op_metadata(const ProtoH
     std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
     net_flow::BufferMetaData output_metadata{};
     output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
-        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::SSD);
+        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, net_flow::OperationType::SSD, nms_config.bbox_only);
     outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
 
     for (auto &input_pad : op_proto.input_pads()) {
@@ -1168,7 +1285,7 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_iou_op_metadata(const ProtoH
     std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
     net_flow::BufferMetaData output_metadata{};
     output_metadata.format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
-        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, op_type);
+        { HAILO_FORMAT_TYPE_AUTO, HAILO_FORMAT_ORDER_AUTO, HAILO_FORMAT_FLAGS_NONE }, op_type, nms_config.bbox_only);
     outputs_metadata.insert({op_proto.output_pads()[0].name(), output_metadata});
 
     for (auto &input_pad : op_proto.input_pads()) {
@@ -1239,7 +1356,8 @@ Expected<std::shared_ptr<net_flow::OpMetadata>> create_logits_op_metadata(const
 
     // TODO: HRT-10603
     const auto &op_input_layer_info = pad_index_to_streams_info.at(output_pad_index);
-    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
+    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(
+        DeviceBase::hef_arch_to_device_arch(static_cast<HEFHwArch>(hef_arch)));
     CHECK_EXPECTED(max_periph_bytes_from_hef);
 
     // TODO HRT-12099 - return invalid hef error when remove support for hefs with no max_shmifo size
@@ -1303,11 +1421,19 @@ Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> Hef::Impl::create_ops_
                 net_flow::PostProcessOpMetadataPtr post_process_op_metadata;
                 switch (op_proto.nms_op().nms_op_case()) {
                     case ProtoHEFNmsOp::kYoloNmsOp: {
-                        auto expected_post_process_op_metadata = create_yolov5_op_metadata(op_proto, pad_index_to_streams_info,
+                        if (op_proto.nms_op().bbox_decoding_only()) {
+                            auto expected_post_process_op_metadata = create_yolov5_bbox_only_op_metadata(op_proto, pad_index_to_streams_info,
+                                input_to_output_pads, network_name);
+                            CHECK_EXPECTED(expected_post_process_op_metadata);
+                            post_process_op_metadata = expected_post_process_op_metadata.release();
+                            break;
+                        } else {
+                            auto expected_post_process_op_metadata = create_yolov5_op_metadata(op_proto, pad_index_to_streams_info,
                             input_to_output_pads, network_name);
-                        CHECK_EXPECTED(expected_post_process_op_metadata);
-                        post_process_op_metadata = expected_post_process_op_metadata.release();
-                        break;
+                            CHECK_EXPECTED(expected_post_process_op_metadata);
+                            post_process_op_metadata = expected_post_process_op_metadata.release();
+                            break;
+                        }
                     }
                     case ProtoHEFNmsOp::kYoloxNmsOp: {
                         auto expected_post_process_op_metadata = create_yolox_op_metadata(op_proto, pad_index_to_streams_info,
@@ -1791,7 +1917,7 @@ static hailo_3d_image_shape_t parse_layer_hw_shape(const ProtoHEFEdgeLayerBase &
 
 hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info, 
     const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto,
-    bool transposed, const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info,
+    bool transposed, const uint16_t context_index, const uint8_t network_index, LayerInfo &layer_info,
     const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer)
 {
     auto format_order_exp = HailoRTDefaults::get_device_format_order(base_info.format());
@@ -1826,7 +1952,8 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas
     CHECK_EXPECTED_AS_STATUS(type);
     layer_info.format.type = type.value();
 
-    auto max_periph_bytes_from_hef = HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hef_arch));
+    auto max_periph_bytes_from_hef =
+        HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(static_cast<HEFHwArch>(hef_arch)));
     CHECK_EXPECTED_AS_STATUS(max_periph_bytes_from_hef);
     // TODO HRT-12099 - return invalid hef error when remove support for hefs with no max_shmifo size
     const auto max_periph_bytes = (0 == base_info.max_shmifo_size()) ? max_periph_bytes_from_hef.value() :
@@ -1871,7 +1998,7 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas
 
 hailo_status HefUtils::fill_layer_info(const ProtoHEFEdgeLayerInfo &info, 
     const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
-    hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
+    hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, 
     uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch,
     const bool is_part_of_mux_layer)
 {
@@ -2005,7 +2132,7 @@ hailo_status HefUtils::fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info, L
 
 hailo_status HefUtils::fill_mux_info(const ProtoHEFEdgeLayerMux &info,
     const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
-    hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
+    hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, 
     uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
 {
     if (HAILO_MAX_STREAM_NAME_SIZE < (info.name().length() + 1)) {
@@ -2083,7 +2210,7 @@ Expected<hailo_format_order_t> convert_planes_format_to_hailo_format_order(const
 
 hailo_status HefUtils::fill_planes_info(const ProtoHEFEdgeLayerPlanes &info,
     const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
-    hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
+    hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, 
     uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch)
 {
     auto layer_type = get_layer_type(edge_connection_type);
@@ -2146,7 +2273,7 @@ hailo_status HefUtils::fill_planes_info(const ProtoHEFEdgeLayerPlanes &info,
 
 hailo_status HefUtils::fill_boundary_layers_info(
     const ProtoHEFCoreOpMock &core_op,
-    const uint8_t context_index,
+    const uint16_t context_index,
     const ProtoHEFEdgeLayer &layer,
     const SupportedFeatures &supported_features,
     ContextMetadata &context_metadata,
@@ -2154,7 +2281,7 @@ hailo_status HefUtils::fill_boundary_layers_info(
 {
     auto layer_info = get_boundary_layer_info(core_op, context_index, layer, supported_features, hef_arch);
     CHECK_EXPECTED_AS_STATUS(layer_info);
-    
+
     context_metadata.add_boundary_layer(layer_info.release());
 
     return HAILO_SUCCESS;
@@ -2162,7 +2289,7 @@ hailo_status HefUtils::fill_boundary_layers_info(
 
 hailo_status HefUtils::fill_inter_context_layers_info(
     const ProtoHEFCoreOpMock &core_op,
-    const uint8_t context_index,
+    const uint16_t context_index,
     const ProtoHEFEdgeLayer &layer,
     const SupportedFeatures &supported_features,
     ContextMetadata &context_metadata)
@@ -2176,7 +2303,7 @@ hailo_status HefUtils::fill_inter_context_layers_info(
 
 hailo_status HefUtils::fill_ddr_layers_info(
     const ProtoHEFCoreOpMock &core_op,
-    const uint8_t context_index,
+    const uint16_t context_index,
     const ProtoHEFEdgeLayer &layer,
     const SupportedFeatures &supported_features,
     ContextMetadata &context_metadata)
@@ -2191,7 +2318,7 @@ hailo_status HefUtils::fill_ddr_layers_info(
 hailo_status HefUtils::check_ddr_pairs_match(
     const std::vector<LayerInfo> &context_ddr_input_layers,
     const std::vector<LayerInfo> &context_ddr_output_layers,
-    const uint8_t context_index)
+    const uint16_t context_index)
 {
     CHECK(context_ddr_input_layers.size() == context_ddr_output_layers.size(), HAILO_INVALID_HEF,
         "DDR pairs must be equal in size for context {}" ,context_index);
@@ -2310,7 +2437,7 @@ static std::pair<uint8_t, uint16_t> old_hef_parse_initial_l3(uint32_t initial_l3
 }
 
 static Expected<ContextSwitchConfigActionPtr> parse_action(const ProtoHEFAction &proto_action,
-    const SupportedFeatures &supported_features)
+    const SupportedFeatures &supported_features, bool &const_input_layer_found)
 {
     switch (proto_action.action_case()) {
         case ProtoHEFAction::kDisableLcu:
@@ -2328,6 +2455,8 @@ static Expected<ContextSwitchConfigActionPtr> parse_action(const ProtoHEFAction
                 "Failed to parse HEF. Invalid lcu_index: {}.", proto_action.enable_lcu().lcu_index());
             CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(proto_action.enable_lcu().lcu_kernel_done_address()), HAILO_INVALID_HEF,
                 "Failed to parse HEF. Invalid lcu_kernel_done_address: {}.", proto_action.enable_lcu().lcu_kernel_done_address());
+            CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(proto_action.enable_lcu().lcu_kernel_done_count()), HAILO_INVALID_HEF,
+                "Failed to parse HEF. Invalid lcu_kernel_done_count: {}.", proto_action.enable_lcu().lcu_kernel_done_count());
 
             auto support_multi_networks = supported_features.multi_network_support;
             auto network_index = static_cast<uint8_t>((support_multi_networks) ? proto_action.enable_lcu().network_index() : 0);
@@ -2340,6 +2469,24 @@ static Expected<ContextSwitchConfigActionPtr> parse_action(const ProtoHEFAction
             return EnableLcuAction::create(cluster_index, lcu_index, network_index, kernel_done_address,
                 kernel_done_count);
         }
+        case ProtoHEFAction::kSwitchLcuBatch:
+        {
+            CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.switch_lcu_batch().cluster_index()), HAILO_INVALID_HEF,
+                "Failed to parse HEF. Invalid cluster_index: {}.", proto_action.switch_lcu_batch().cluster_index());
+            CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.switch_lcu_batch().lcu_index()), HAILO_INVALID_HEF,
+                "Failed to parse HEF. Invalid lcu_index: {}.", proto_action.switch_lcu_batch().lcu_index());
+
+            auto support_multi_networks = supported_features.multi_network_support;
+            auto network_index = static_cast<uint8_t>((support_multi_networks) ? proto_action.switch_lcu_batch().network_index() : 0);
+
+            const auto cluster_index = static_cast<uint8_t>(proto_action.switch_lcu_batch().cluster_index());
+            const auto lcu_index = static_cast<uint8_t>(proto_action.switch_lcu_batch().lcu_index());
+            // the kernel_done_count field isn't used but required as legacy.
+            const auto NULL_KERNEL_DONE_COUNT = (uint32_t)0;
+
+            return SwitchLcuBatchAction::create(cluster_index, lcu_index, network_index,
+                NULL_KERNEL_DONE_COUNT);
+        }
         case ProtoHEFAction::kEnableSequencer:
         {
             CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(proto_action.enable_sequencer().cluster_index()), HAILO_INVALID_HEF,
@@ -2465,6 +2612,11 @@ static Expected<ContextSwitchConfigActionPtr> parse_action(const ProtoHEFAction
             const auto network_index = static_cast<uint8_t>((support_multi_networks) ? proto_action.write_data_by_type().network_index() : 0);
             const auto shift = static_cast<uint8_t>(proto_action.write_data_by_type().shift());
 
+            // If data_type is BATCH_SIZE - can't fast batch switch
+            if (ProtoHEFWriteDataType::BATCH_SIZE == data_type) {
+                const_input_layer_found = true;
+            }
+
             return WriteDataByTypeAction::create(address, data_type, data, shift, mask, network_index);
         }
         default:
@@ -2521,9 +2673,46 @@ static hailo_status merge_write_ccw_actions(
         config_buffer_infos[config_stream_index].emplace_back(static_cast<uint32_t>(config_buffer->size()));
 
         const size_t total_ccw_burst = ccw_buffers.size();
-        auto action = WriteDataCcwAction::create(config_buffer.release(), config_stream_index, total_ccw_burst);
+        auto action = WriteDataCcwActionByBuffer::create(config_buffer.release(), config_stream_index, total_ccw_burst);
+        CHECK_EXPECTED_AS_STATUS(action);
+
+        actions.emplace_back(action.release());
+    }
+
+    return HAILO_SUCCESS;
+}
 
+static hailo_status build_write_ccw_actions(
+    std::vector<ContextSwitchConfigActionPtr> &actions,
+    ConfigBufferInfoMap &config_buffer_infos,
+    const std::vector<const ProtoHEFActionWriteDataCcw *> &write_ccw_actions,
+    std::shared_ptr<ShefFileHandle> shef_file_handle)
+{
+    std::unordered_map<uint8_t, uint32_t> ccws_per_config_index;
+    for (const auto *write_ccw_action : write_ccw_actions) {
+        CHECK(IS_FIT_IN_UINT8(write_ccw_action->cfg_channel_index()), HAILO_INVALID_HEF,
+            "Invalid cfg channel index");
+        const auto config_stream_index = static_cast<uint8_t>(write_ccw_action->cfg_channel_index());
+        if (ccws_per_config_index.find(config_stream_index) == ccws_per_config_index.end()) {
+            ccws_per_config_index[config_stream_index] = 0;
+        }
+        ccws_per_config_index[config_stream_index]++;
+    }
+    for (const auto *write_ccw_action : write_ccw_actions) {
+        if (write_ccw_action->data().size() == 0) {
+            continue;
+        }
+        const shef__ccw_offset_t *ccw_offset = reinterpret_cast<const shef__ccw_offset_t*>(write_ccw_action->data().data());
+        const auto config_stream_index = static_cast<uint8_t>(write_ccw_action->cfg_channel_index());
+
+        assert(BYTE_ORDER__htonl(ccw_offset->size) < std::numeric_limits<uint32_t>::max());
+        config_buffer_infos[config_stream_index].emplace_back(static_cast<uint32_t>(BYTE_ORDER__htonl(ccw_offset->size)));
+
+        const size_t total_ccw_burst = ccws_per_config_index[config_stream_index];
+        auto action = WriteDataCcwAction::create(BYTE_ORDER__htonl(ccw_offset->offset), BYTE_ORDER__htonl(ccw_offset->size),
+            config_stream_index, total_ccw_burst, shef_file_handle);
         CHECK_EXPECTED_AS_STATUS(action);
+
         actions.emplace_back(action.release());
     }
 
@@ -2533,7 +2722,9 @@ static hailo_status merge_write_ccw_actions(
 static hailo_status parse_operation(std::vector<ContextSwitchConfigActionPtr> &actions,
     ConfigBufferInfoMap &config_buffer_infos,
     const ProtoHEFOperation &operation_proto,
-    const SupportedFeatures &supported_features)
+    const SupportedFeatures &supported_features,
+    std::shared_ptr<ShefFileHandle> shef_file_handle,
+    bool &const_input_layer_found)
 {
     auto trigger_action = parse_trigger_action(operation_proto.trigger());
     CHECK_EXPECTED_AS_STATUS(trigger_action);
@@ -2554,12 +2745,17 @@ static hailo_status parse_operation(std::vector<ContextSwitchConfigActionPtr> &a
                 (next_action_index == operation_proto.actions_size()) ||
                 (operation_proto.actions(next_action_index).action_case() != ProtoHEFAction::kWriteDataCcw);
             if (is_last_ccw) {
-                auto status = merge_write_ccw_actions(actions, config_buffer_infos, current_write_ccw_actions);
-                CHECK_SUCCESS(status);
+                if (nullptr != shef_file_handle) {
+                    auto status = build_write_ccw_actions(actions, config_buffer_infos, current_write_ccw_actions, shef_file_handle);
+                    CHECK_SUCCESS(status);
+                } else {
+                    auto status = merge_write_ccw_actions(actions, config_buffer_infos, current_write_ccw_actions);
+                    CHECK_SUCCESS(status);
+                }
                 current_write_ccw_actions.clear();
             }
         } else {
-            auto action = parse_action(proto_action, supported_features);
+            auto action = parse_action(proto_action, supported_features, const_input_layer_found);
             CHECK_EXPECTED_AS_STATUS(action);
             actions.emplace_back(action.release());
         }
@@ -2571,30 +2767,32 @@ static hailo_status parse_operation(std::vector<ContextSwitchConfigActionPtr> &a
 
 static Expected<ContextMetadata> parse_operations(
     const google::protobuf::RepeatedPtrField<ProtoHEFOperation> &operations_proto,
-    const SupportedFeatures &supported_features)
+    const SupportedFeatures &supported_features, std::shared_ptr<ShefFileHandle> shef_file_handle)
 {
     std::vector<ContextSwitchConfigActionPtr> actions;
     ConfigBufferInfoMap config_buffer_infos;
+    bool const_input_layer_found = false;
 
     for (const auto &operation_proto : operations_proto) {
-        auto status = parse_operation(actions, config_buffer_infos, operation_proto, supported_features);
+        auto status = parse_operation(actions, config_buffer_infos, operation_proto, supported_features, shef_file_handle,
+            const_input_layer_found);
         CHECK_SUCCESS_AS_EXPECTED(status);
     }
 
-    return ContextMetadata(std::move(actions), std::move(config_buffer_infos));
+    return ContextMetadata(std::move(actions), std::move(config_buffer_infos), const_input_layer_found);
 }
 
 Expected<ContextMetadata> HefUtils::parse_preliminary_context(const ProtoHEFPreliminaryConfig &preliminary_proto,
-    const SupportedFeatures &supported_features)
+    const SupportedFeatures &supported_features, std::shared_ptr<ShefFileHandle> shef_file_handle)
 {
-    return parse_operations(preliminary_proto.operation(), supported_features);
+    return parse_operations(preliminary_proto.operation(), supported_features, shef_file_handle);
 }
 
 Expected<ContextMetadata> HefUtils::parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op,
-    const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features,
-    const ProtoHEFHwArch &hef_arch)
+    const ProtoHEFContext &context_proto, uint16_t context_index, const SupportedFeatures &supported_features,
+    const ProtoHEFHwArch &hef_arch, std::shared_ptr<ShefFileHandle> shef_file_handle)
 {
-    auto context_metadata_exp = parse_operations(context_proto.operations(), supported_features);
+    auto context_metadata_exp = parse_operations(context_proto.operations(), supported_features, shef_file_handle);
     CHECK_EXPECTED(context_metadata_exp);
     ContextMetadata context_metadata = context_metadata_exp.release();
 
@@ -2644,12 +2842,13 @@ static hailo_status validate_unique_boundary_names(const std::vector<ContextMeta
 }
 
 Expected<std::vector<ContextMetadata>> HefUtils::parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, const SupportedFeatures &supported_features,
-    const ProtoHEFHwArch &hef_arch)
+    const ProtoHEFHwArch &hef_arch, std::shared_ptr<ShefFileHandle> shef_file_handle)
 {
     std::vector<ContextMetadata> contexts_metadata;
-    for (uint8_t context_index = 0; context_index < core_op.contexts.size(); context_index++) {
+    for (uint16_t context_index = 0; context_index < core_op.contexts.size(); context_index++) {
         auto &context_proto = core_op.contexts[context_index];
-        auto context_metadata = parse_single_dynamic_context(core_op, context_proto, context_index, supported_features, hef_arch);
+        auto context_metadata = parse_single_dynamic_context(core_op, context_proto, context_index, supported_features,
+            hef_arch, shef_file_handle);
         CHECK_EXPECTED(context_metadata);
         contexts_metadata.emplace_back(context_metadata.release());
     }
@@ -2772,7 +2971,7 @@ Expected<hailo_nms_info_t> HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo
 }
 
 Expected<LayerInfo> HefUtils::get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op,
-    const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
+    const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
     const ProtoHEFHwArch &hef_arch)
 {
     // We parse only boundary layers for user usage
@@ -2826,18 +3025,18 @@ static Expected<ConnectedContextInfo> parse_connected_context_info(
         "Failed to parse HEF. Invalid connected_sys_index: {}.", connected_context_proto.sys_index());
     CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(connected_context_proto.engine_id()), HAILO_INVALID_HEF,
         "Failed to parse HEF. Invalid engine_id: {}. in connected_contexts", connected_context_proto.engine_id());
-    CHECK_AS_EXPECTED(IS_FIT_IN_UINT8(connected_context_proto.index()), HAILO_INVALID_HEF,
+    CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(connected_context_proto.index()), HAILO_INVALID_HEF,
         "Failed to parse HEF. Invalid connected_context_index: {}.", connected_context_proto.index());
 
     ConnectedContextInfo connected_context{};
-    connected_context.context_index = static_cast<uint8_t>(connected_context_proto.index());
+    connected_context.context_index = static_cast<uint16_t>(connected_context_proto.index());
     connected_context.stream_index = static_cast<uint8_t>(connected_context_proto.sys_index());
     connected_context.dma_engine_index = static_cast<uint8_t>(connected_context_proto.engine_id());
     return connected_context;
 }
 
 Expected<LayerInfo> HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpMock &core_op,
-    const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
+    const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
 {
     LayerInfo result = {};
     CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "Inter-context layer can't be mux.");
@@ -2893,7 +3092,7 @@ Expected<LayerInfo> HefUtils::get_inter_context_layer_info(const ProtoHEFCoreOpM
 }
 
 Expected<LayerInfo> HefUtils::get_ddr_layer_info(const ProtoHEFCoreOpMock &core_op,
-    const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
+    const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features)
 {
     LayerInfo result = {};
     CHECK_AS_EXPECTED(PROTO__EDGE_LAYER_TYPE__INFO == layer.edge_layer_type(), HAILO_INVALID_HEF, "DDR layer can't be mux.");
@@ -3075,7 +3274,7 @@ Expected<std::vector<WriteMemoryInfo>> Hef::Impl::create_single_context_core_op_
                     break;
                 }
                 case ProtoHEFAction::kWriteDataCcw: {
-                    auto config_buffer = parse_ccw_buffer(action.write_data_ccw().data());
+                    auto config_buffer = parse_ccw_buffer(action.write_data_ccw().data()); // TODO: make this not supported in sHEF
                     CHECK_EXPECTED(config_buffer);
                     config_buffers.emplace_back(config_buffer.release());
                     break;
@@ -3093,6 +3292,12 @@ Expected<std::vector<WriteMemoryInfo>> Hef::Impl::create_single_context_core_op_
                     config_buffers.emplace_back(std::move(write_memory_info));
                     break;
                 }
+                case ProtoHEFAction::kSwitchLcuBatch: {
+                    LOGGER__ERROR("Parsing error. Context-switch optimization related actions are not supported over Ethernet. "
+                        "If you use the Ethernet interface, please disable context-switch optimizations in the Dataflow Compiler (SDK) and then re-create the HEF. "
+                        "See the Dataflow Compiler user guide for more information.");
+                    return make_unexpected(HAILO_INVALID_HEF);
+                }
                 case ProtoHEFAction::kAllowInputDataflow: {
                 case ProtoHEFAction::kWaitForModuleConfigDone:
                     // We ignore the 'wait_for_interrupt' actions. After writing the configurations we can be sure everything is configured and dont need to wait for interrupts
@@ -3122,6 +3327,11 @@ ProtoHEFHwArch Hef::Impl::get_device_arch()
     return m_header.hw_arch();
 }
 
+std::shared_ptr<ShefFileHandle> Hef::Impl::get_shef_file_handle()
+{
+    return m_shef_file_handle;
+}
+
 Expected<float64_t> Hef::Impl::get_bottleneck_fps(const std::string &net_group_name)
 {
     auto core_op = get_core_op_by_net_group_name(net_group_name);
diff --git a/hailort/libhailort/src/hef/hef_internal.hpp b/hailort/libhailort/src/hef/hef_internal.hpp
index b1c40020..7e6d91ab 100644
--- a/hailort/libhailort/src/hef/hef_internal.hpp
+++ b/hailort/libhailort/src/hef/hef_internal.hpp
@@ -31,19 +31,19 @@
 #include "hailo/hef.hpp"
 #include "hailo/network_group.hpp"
 #include "hailo/hailort_defaults.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/op_metadata.hpp"
 
 #include "hef/core_op_metadata.hpp"
 #include "hef/layer_info.hpp"
 #include "hef/context_switch_actions.hpp"
 #include "net_flow/ops/op.hpp"
-#include "net_flow/pipeline/pipeline_internal.hpp"
 #include "device_common/control_protocol.hpp"
 
 #include "control_protocol.h"
 #include <functional>
 #include <bitset>
 #include <memory>
+#include <fstream>
 
 extern "C" {
 #include "md5.h"
@@ -53,8 +53,6 @@ extern "C" {
 namespace hailort
 {
 
-#define DEFAULT_NMS_NO_BURST_SIZE (1)
-
 class CoreOpMetadata;
 class CoreOp;
 using ProtoHEFNetworkGroupPtr = std::shared_ptr<ProtoHEFNetworkGroup>;
@@ -116,12 +114,19 @@ struct ProtoHEFCoreOpMock {
 typedef struct {
     uint32_t magic;
     uint32_t version;
-    uint32_t hef_proto_length;
-    uint32_t reserved;
+    uint32_t hef_proto_size;
+    uint32_t ccws_size;
     MD5_SUM_t expected_md5;
 } hef__header_t;
 #pragma pack(pop)
 
+#pragma pack(push, 1)
+typedef struct {
+    uint32_t offset;
+    uint32_t size;
+} shef__ccw_offset_t;
+#pragma pack(pop)
+
 typedef enum {
     HEF__FORMAT__TF_RGB = 0,
     HEF__FORMAT__FRAMES,
@@ -169,7 +174,9 @@ static const std::vector<ProtoHEFExtensionType> SUPPORTED_EXTENSIONS = {
     HAILO_NET_FLOW_YOLOV5_SEG_NMS, // Extension added in platform 4.15 release
     HAILO_NET_FLOW_IOU_NMS, // Extension added in platform 4.15 release
     HW_PADDING, // Extension added in platform 4.16 release
-    HAILO_NET_FLOW_YOLOV8_NMS // Extension added in platform 4.16 release
+    HAILO_NET_FLOW_YOLOV8_NMS, // Extension added in platform 4.16 release
+    BATCH_REGISTER_CONFIG, // Extension added in platform 4.17 release
+    HAILO_NET_FLOW_BBOX_DECODING // Extension added in platform 4.18 release
 };
 
 static inline bool is_h2d_boundary_info_layer(const ProtoHEFEdgeLayer& layer)
@@ -223,12 +230,26 @@ class VdmaConfigCoreOp;
 class VdmaDevice;
 class HailoRTDriver;
 
+class ShefFileHandle final
+{
+public:
+    ShefFileHandle(const std::string &hef_path, uint32_t ccws_buffer_offset);
+    hailo_status open();
+    Expected<Buffer> read(uint32_t offset, size_t size);
+    hailo_status close();
+
+private:
+    std::string m_hef_path;
+    std::ifstream m_hef_file;
+    uint32_t m_ccws_buffer_offset;
+};
 
 class Hef::Impl final
 {
 public:
     static const uint32_t HEADER_MAGIC = 0x01484546;
-    static const uint32_t HEADER_VERSION = 0;
+    static const uint32_t HEADER_VERSION_0 = 0; // Old HEF
+    static const uint32_t HEADER_VERSION_1 = 1; // New HEF (SHEF)
 
     static Expected<Impl> create(const std::string &hef_path);
     static Expected<Impl> create(const MemoryView &hef_buffer);
@@ -239,6 +260,8 @@ class Hef::Impl final
 
     Expected<std::pair<std::string, std::string>> get_network_group_and_network_name(const std::string &name);
 
+    void clear_hef_buffer();
+
     Expected<std::shared_ptr<ProtoHEFCoreOpMock>> get_core_op_by_net_group_name(const std::string &net_group_name="");
     Expected<std::vector<hailo_network_info_t>> get_network_infos(const std::string &net_group_name="");
 
@@ -261,6 +284,7 @@ class Hef::Impl final
     Expected<size_t> get_number_of_input_streams(const std::string &net_group_name="");
     Expected<size_t> get_number_of_output_streams(const std::string &net_group_name="");
     ProtoHEFHwArch get_device_arch();
+    std::shared_ptr<ShefFileHandle> get_shef_file_handle();
     Expected<float64_t> get_bottleneck_fps(const std::string &net_group_name="");
     static bool contains_ddr_layers(const ProtoHEFCoreOpMock &core_op);
     static hailo_status validate_core_op_unique_layer_names(const ProtoHEFCoreOpMock &core_op);
@@ -416,6 +440,7 @@ class Hef::Impl final
     std::vector<ProtoHEFOptionalExtension> m_hef_optional_extensions;
     std::bitset<SUPPORTED_EXTENSIONS_BITSET_SIZE> m_supported_extensions_bitset;
     MD5_SUM_t m_md5;
+    std::shared_ptr<ShefFileHandle> m_shef_file_handle;
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
     Buffer m_hef_buffer;
@@ -450,46 +475,46 @@ class HefUtils final
 
     static hailo_status fill_boundary_layers_info(
         const ProtoHEFCoreOpMock &core_op,
-        const uint8_t context_index,
+        const uint16_t context_index,
         const ProtoHEFEdgeLayer &layer,
         const SupportedFeatures &supported_features,
         ContextMetadata &context_metadata,
         const ProtoHEFHwArch &hef_arch);
     static Expected<LayerInfo> get_inter_context_layer_info(
-        const ProtoHEFCoreOpMock &core_op, const uint8_t context_index,
+        const ProtoHEFCoreOpMock &core_op, const uint16_t context_index,
         const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features);
     static hailo_status fill_inter_context_layers_info(
         const ProtoHEFCoreOpMock &core_op,
-        const uint8_t context_index,
+        const uint16_t context_index,
         const ProtoHEFEdgeLayer &layer,
         const SupportedFeatures &supported_features,
         ContextMetadata &context_metadata);
     static Expected<LayerInfo> get_ddr_layer_info(
-        const ProtoHEFCoreOpMock &core_op, const uint8_t context_index,
+        const ProtoHEFCoreOpMock &core_op, const uint16_t context_index,
         const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features);
     static hailo_status fill_ddr_layers_info(
         const ProtoHEFCoreOpMock &core_op,
-        const uint8_t context_index,
+        const uint16_t context_index,
         const ProtoHEFEdgeLayer &layer,
         const SupportedFeatures &supported_features,
         ContextMetadata &context_metadata);
     static hailo_status check_ddr_pairs_match(
         const std::vector<LayerInfo> &context_ddr_input_layers,
         const std::vector<LayerInfo> &context_ddr_output_layers,
-        const uint8_t context_index);
+        const uint16_t context_index);
     static Expected<ContextMetadata> parse_preliminary_context(const ProtoHEFPreliminaryConfig &preliminary_proto,
-        const SupportedFeatures &supported_features);
+        const SupportedFeatures &supported_features, std::shared_ptr<ShefFileHandle> shef_file_handle);
     static Expected<ContextMetadata> parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op,
-        const ProtoHEFContext &context_proto, uint8_t context_index, const SupportedFeatures &supported_features,
-        const ProtoHEFHwArch &hef_arch);
+        const ProtoHEFContext &context_proto, uint16_t context_index, const SupportedFeatures &supported_features,
+        const ProtoHEFHwArch &hef_arch, std::shared_ptr<ShefFileHandle> shef_file_handle);
     static Expected<std::vector<ContextMetadata>> parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op,
-        const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch);
+        const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, std::shared_ptr<ShefFileHandle> shef_file_handle);
     static Expected<hailo_nms_info_t> parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info,
         const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch);
     static Expected<LayerInfo> get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op,
-        const uint8_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
+        const uint16_t context_index, const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features,
         const ProtoHEFHwArch &hef_arch);
-    
+
     static Expected<std::string> get_partial_network_name_by_index(const ProtoHEFCoreOpMock &core_op, uint8_t network_index, const SupportedFeatures &supported_features);
 
     static std::string get_network_group_name(const ProtoHEFNetworkGroup &net_group, const SupportedFeatures &supported_features);
@@ -500,12 +525,12 @@ class HefUtils final
     // TODO HRT-12051: Remove is_part_of_mux_layer parameter when core_hw_padding is removed
     static hailo_status fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBase &base_info,
         const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFNetworkGroupMetadata &network_group_proto,
-        bool transposed, const uint8_t context_index, const uint8_t network_index, LayerInfo &layer_info,
+        bool transposed, const uint16_t context_index, const uint8_t network_index, LayerInfo &layer_info,
         const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer);
     // TODO HRT-12051: Remove is_part_of_mux_layer parameter when core_hw_padding is removed
     static hailo_status fill_layer_info(const ProtoHEFEdgeLayerInfo &info,
         const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
-        hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
+        hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, 
         uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features,
         const ProtoHEFHwArch &hef_arch, const bool is_part_of_mux_layer);
     static hailo_status fill_fused_nms_info(const ProtoHEFEdgeLayerFused &info,
@@ -513,12 +538,12 @@ class HefUtils final
             const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch);
     static hailo_status fill_mux_info(const ProtoHEFEdgeLayerMux &info,
         const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
-        hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
+        hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, 
         uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features,
         const ProtoHEFHwArch &hef_arch);
     static hailo_status fill_planes_info(const ProtoHEFEdgeLayerPlanes &info,
         const ProtoHEFEdgeConnectionType &edge_connection_type, const ProtoHEFCoreOpMock &core_op,
-        hailo_stream_direction_t direction, const uint8_t context_index, const std::string &partial_network_name, 
+        hailo_stream_direction_t direction, const uint16_t context_index, const std::string &partial_network_name, 
         uint8_t network_index, LayerInfo &layer_info, const SupportedFeatures &supported_features,
         const ProtoHEFHwArch &hef_arch);
 };
diff --git a/hailort/libhailort/src/hef/layer_info.hpp b/hailort/libhailort/src/hef/layer_info.hpp
index 385e86ea..5e996be7 100644
--- a/hailort/libhailort/src/hef/layer_info.hpp
+++ b/hailort/libhailort/src/hef/layer_info.hpp
@@ -14,7 +14,7 @@
 #include "hailo/hailort_common.hpp"
 #include "hailo/hailort_defaults.hpp"
 
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 
 #include "control_protocol.h"
 #include <vector>
@@ -45,7 +45,7 @@ struct BufferIndices {
 };
 
 struct ConnectedContextInfo {
-    uint8_t context_index;
+    uint16_t context_index;
     uint8_t dma_engine_index;
     uint8_t stream_index;
 };
@@ -67,7 +67,7 @@ struct LayerInfo {
     uint8_t network_index;
     CONTROL_PROTOCOL__nn_stream_config_t nn_stream_config;
     uint32_t max_shmifo_size;
-    uint8_t context_index;
+    uint16_t context_index;
     uint32_t pad_index = INVALID_PAD_INDEX;
 
     // Transformation and shape info
diff --git a/hailort/libhailort/src/hw_consts.hpp b/hailort/libhailort/src/hw_consts.hpp
index c576eed2..59dfe54b 100644
--- a/hailort/libhailort/src/hw_consts.hpp
+++ b/hailort/libhailort/src/hw_consts.hpp
@@ -19,12 +19,6 @@
 // Max periph bytes per buffer for hailo1x because (we use its value shifted right by 3 - according to the spec) to
 // configure shmifo credit size - which in hailo15 only has a width of 10 bits
 #define HAILO1X_PERIPH_BYTES_PER_BUFFER_MAX_SIZE                            (0x00002000L)
-#define HAILO1X_PERIPH_PAYLOAD_MAX_VALUE                                    (0x007FFFFFL)
-
-
-/** Vdma Channel registers ***************************************************/
-#define VDMA_CHANNEL_CONTROL_OFFSET         (0x00)
-#define VDMA_CHANNEL_NUM_AVAIL_OFFSET       (0x02)
-
+#define HAILO1X_PERIPH_PAYLOAD_MAX_VALUE                                    (0x01FFFFFFL)
 
 #endif /* _HAILO_HW_CONSTS_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/CMakeLists.txt b/hailort/libhailort/src/net_flow/CMakeLists.txt
index c49a12b3..8c9f7a65 100644
--- a/hailort/libhailort/src/net_flow/CMakeLists.txt
+++ b/hailort/libhailort/src/net_flow/CMakeLists.txt
@@ -3,6 +3,7 @@ cmake_minimum_required(VERSION 3.0.0)
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/nms_post_process.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolov5_post_process.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolov5_bbox_only_post_process.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolox_post_process.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/ssd_post_process.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/argmax_post_process.cpp
@@ -11,12 +12,19 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/ops/yolov8_post_process.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline_builder.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/inference_pipeline.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/pipeline_internal.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/filter_elements.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/queue_elements.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/edge_elements.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/multi_io_elements.cpp
+
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/async_pipeline_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/async_infer_runner.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/infer_model.cpp
+
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream_builder.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/vstream.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pipeline/inference_pipeline.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp b/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp
index 3d753213..d5cdaf91 100644
--- a/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp
@@ -131,6 +131,38 @@ ArgmaxFunction ArgmaxPostProcessOp::m_argmax_function_array[ARGMAX_NUM_OF_POSSIB
             ArgmaxPostProcessOp::execute_not_supported,
             ArgmaxPostProcessOp::execute_not_supported
         }
+    },
+    {
+        {
+            // F8CR x AUTO
+            // We don't support input_format_type to be auto
+            ArgmaxPostProcessOp::execute_not_supported,
+            ArgmaxPostProcessOp::execute_not_supported,
+            ArgmaxPostProcessOp::execute_not_supported,
+            ArgmaxPostProcessOp::execute_not_supported
+        },
+        {
+            // F8CR x UINT8
+            ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto
+            ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis<uint8_t, uint8_t>,
+            ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis<uint8_t, uint16_t>,
+            ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis<uint8_t, float32_t>,
+        },
+        {
+            // F8CR x UINT16
+            ArgmaxPostProcessOp::execute_not_supported, // We don't support output_format_type to be auto
+            ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis<uint16_t, uint8_t>,
+            ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis<uint16_t, uint16_t>,
+            ArgmaxPostProcessOp::F8CR_to_NHW_feature_axis<uint16_t, float32_t>,
+        },
+        {
+            // F8CR x FLOAT32
+            // We don't support input_format_type to be float32
+            ArgmaxPostProcessOp::execute_not_supported,
+            ArgmaxPostProcessOp::execute_not_supported,
+            ArgmaxPostProcessOp::execute_not_supported,
+            ArgmaxPostProcessOp::execute_not_supported
+        }
     }
 };
 
@@ -155,6 +187,9 @@ hailo_status ArgmaxPostProcessOp::execute(const std::map<std::string, MemoryView
         case HAILO_FORMAT_ORDER_NC:
             format_index = 2;
             break;
+        case HAILO_FORMAT_ORDER_F8CR:
+            format_index = 3;
+            break;
         default:
             LOGGER__ERROR("Argmax post-process received invalid input order {}",
                 HailoRTCommon::get_format_order_str(input_metadata.format.order));
@@ -217,6 +252,7 @@ hailo_status ArgmaxOpMetadata::validate_format_info()
     CHECK(
         ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHCW) &&  (output_metadata.format.order == HAILO_FORMAT_ORDER_NHW)) ||
         ((input_metadata.format.order == HAILO_FORMAT_ORDER_NHWC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NHW)) ||
+        ((input_metadata.format.order == HAILO_FORMAT_ORDER_F8CR) &&  (output_metadata.format.order == HAILO_FORMAT_ORDER_NHW)) ||
         ((input_metadata.format.order == HAILO_FORMAT_ORDER_NC) && (output_metadata.format.order == HAILO_FORMAT_ORDER_NC)),
         HAILO_INVALID_OPERATION, "Argmax op is not supported for input format order ({}) and output format order ({})",
         HailoRTCommon::get_format_order_str(input_metadata.format.order),
@@ -238,7 +274,7 @@ hailo_format_t ArgmaxOpMetadata::expand_output_format_autos(const hailo_format_t
         format.type = input_format.type;
     }
     if (format.order == HAILO_FORMAT_ORDER_AUTO) {
-        if (input_format.order == HAILO_FORMAT_ORDER_NHCW || input_format.order == HAILO_FORMAT_ORDER_NHWC) {
+        if (input_format.order == HAILO_FORMAT_ORDER_NHCW || input_format.order == HAILO_FORMAT_ORDER_NHWC || input_format.order == HAILO_FORMAT_ORDER_F8CR) {
             format.order = HAILO_FORMAT_ORDER_NHW;
         }
         if (input_format.order == HAILO_FORMAT_ORDER_NC) {
diff --git a/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp b/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp
index 5f2bb642..9df31616 100644
--- a/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp
@@ -17,7 +17,7 @@
 
 #include "hailo/hailort.h"
 #include "net_flow/ops/op.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/argmax_op_metadata.hpp"
 #include "common/utils.hpp"
 
 #include <iostream>
@@ -27,39 +27,13 @@ namespace hailort
 namespace net_flow
 {
 
-#define ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (3)
+#define ARGMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (4)
 #define ARGMAX_NUM_OF_POSSIBLE_FORMAT_TYPES (4)
-
-constexpr std::size_t ARGMAX_OUTPUT_FEATURES_SIZE {1};
-constexpr std::size_t ARGMAX_NUMBER_OF_SRCS {1};
-constexpr std::size_t ARGMAX_NUMBER_OF_DSTS {1};
+#define F8CR_FEATURES_IN_CHUNK (8)
 
 typedef hailo_status (*ArgmaxFunction)(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
     const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs);
 
-
-class ArgmaxOpMetadata : public OpMetadata
-{
-public:
-    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                                                        const std::string &network_name);
-    std::string get_op_description() override;
-    hailo_status validate_format_info() override;
-    static hailo_format_t expand_output_format_autos(const hailo_format_t &output_format, const hailo_format_t &input_format);
-
-    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
-
-private:
-    ArgmaxOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                        const std::string &network_name)
-        : OpMetadata(inputs_metadata, outputs_metadata, "Argmax-Post-Process", network_name, OperationType::ARGMAX)
-    {}
-
-    hailo_status validate_params() override;
-};
-
 class ArgmaxPostProcessOp : public Op
 {
 
@@ -148,6 +122,41 @@ class ArgmaxPostProcessOp : public Op
         return HAILO_SUCCESS;
     }
 
+    template<typename SrcType, typename DstType>
+    static hailo_status F8CR_to_NHW_feature_axis(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
+        const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+    {
+        auto src_ptr = (SrcType*)inputs.begin()->second.data();
+        auto dst_ptr = (DstType*)outputs.begin()->second.data();
+        const auto src_row_size = input_metadata.padded_shape.width * input_metadata.padded_shape.features;
+        const auto dst_row_size = output_metadata.shape.width;
+        const auto num_of_eight_channels_chunks = input_metadata.padded_shape.features / F8CR_FEATURES_IN_CHUNK;
+        const auto eight_channels_x_width_size = input_metadata.padded_shape.width * F8CR_FEATURES_IN_CHUNK;
+
+        for (uint32_t r = 0; r < input_metadata.shape.height; r++) {
+            const SrcType *src_row = src_ptr + (r * src_row_size);
+            DstType *dst_row = dst_ptr + (r * dst_row_size);
+            for (uint32_t w = 0; w < input_metadata.shape.width; w++) {
+                const SrcType *offset_in_row = src_row + (w * F8CR_FEATURES_IN_CHUNK);
+                DstType max_index = 0;
+                auto max_value = *offset_in_row;
+                for (uint32_t channel_chunk_id = 0; channel_chunk_id < num_of_eight_channels_chunks; channel_chunk_id++) {
+                    const SrcType *offset_in_column = offset_in_row + (eight_channels_x_width_size * channel_chunk_id);
+                    uint32_t num_of_channels_in_chunk = ((channel_chunk_id + 1 == num_of_eight_channels_chunks) ? (input_metadata.shape.features % F8CR_FEATURES_IN_CHUNK) : F8CR_FEATURES_IN_CHUNK );
+                    for (uint32_t c = 0; c < num_of_channels_in_chunk; c++) {
+                        const auto &current_value = *(offset_in_column + c);
+                        if (current_value > max_value) {
+                            max_index = static_cast<DstType>(c + F8CR_FEATURES_IN_CHUNK * channel_chunk_id);
+                            max_value = current_value;
+                        }
+                    }
+                }
+                dst_row[w] = max_index;
+            }
+        }
+        return HAILO_SUCCESS;
+    }
+
     static hailo_status execute_not_supported(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
         const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs);
 
diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
index 38bd2713..735f2b20 100644
--- a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
@@ -10,7 +10,6 @@
  **/
 
 #include "net_flow/ops/nms_post_process.hpp"
-#include "hef/hef_internal.hpp"
 
 namespace hailort
 {
@@ -193,7 +192,7 @@ hailo_status NmsPostProcessOp::hailo_nms_format(MemoryView dst_view)
     return HAILO_SUCCESS;
 }
 
-hailo_format_t NmsOpMetadata::expand_output_format_autos_by_op_type(const hailo_format_t &output_format, OperationType type)
+hailo_format_t NmsOpMetadata::expand_output_format_autos_by_op_type(const hailo_format_t &output_format, OperationType type, bool bbox_only)
 {
     auto format = output_format;
 
@@ -201,6 +200,8 @@ hailo_format_t NmsOpMetadata::expand_output_format_autos_by_op_type(const hailo_
     {
         if (OperationType::YOLOV5SEG == type) {
             format.order = HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK;
+        } else if (bbox_only) {
+            format.order = HAILO_FORMAT_ORDER_NHWC;
         } else {
             format.order = HAILO_FORMAT_ORDER_HAILO_NMS;
         }
@@ -256,4 +257,4 @@ hailo_nms_info_t NmsOpMetadata::nms_info()
 }
 
 }
-}
\ No newline at end of file
+}
diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
index ae2623b2..e37b2a9c 100644
--- a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
@@ -3,8 +3,8 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
- * @file op.hpp
- * @brief Net-Flow op
+ * @file nms_post_process.hpp
+ * @brief NMS op
  *
  * https://learnopencv.com/object-detection-using-yolov5-and-opencv-dnn-in-c-and-python :
  * The headline '4.3.5 POST-PROCESSING YOLOv5 Prediction Output' contains explanations on the YOLOv5 post-process.
@@ -22,6 +22,7 @@
 #include "common/logger_macros.hpp"
 
 #include "net_flow/ops/op.hpp"
+#include "net_flow/ops_metadata/nms_op_metadata.hpp"
 
 
 namespace hailort
@@ -95,70 +96,6 @@ inline bool operator==(const DetectionBbox &first, const DetectionBbox &second)
     return first.m_class_id == second.m_class_id && first.m_bbox == second.m_bbox;
 }
 
-struct NmsPostProcessConfig
-{
-    // User given confidence threshold for a bbox. A bbox will be consider as detection if the
-    // (objectness * class_score) is higher then the confidence_threshold.
-    double nms_score_th = 0;
-
-    // User given IoU threshold (intersection over union). This threshold is for performing
-    // Non-maximum suppression (Removing overlapping boxes).
-    double nms_iou_th = 0;
-
-    // Maximum amount of bboxes per nms class.
-    uint32_t max_proposals_per_class = 0;
-
-    // The model's number of classes. (This depends on the dataset that the model trained on).
-    uint32_t number_of_classes = 0;
-
-    // Toggle background class removal from results
-    bool background_removal = false;
-
-    // Index of background class for background removal
-    uint32_t background_removal_index = 0;
-
-    // Indicates whether or not NMS performs IoU over different classes for the same box.
-    // If set to false - NMS won't intersect different classes, and a box could have multiple labels.
-    bool cross_classes = false;
-};
-
-static const float32_t REMOVED_CLASS_SCORE = 0.0f;
-
-class NmsOpMetadata : public OpMetadata
-{
-public:
-    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                                                    const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                                                    const NmsPostProcessConfig &nms_post_process_config,
-                                                    const std::string &network_name,
-                                                    const OperationType type,
-                                                    const std::string &name);
-    virtual ~NmsOpMetadata() = default;
-    std::string get_nms_config_description();
-    hailo_status validate_format_info() override;
-    NmsPostProcessConfig &nms_config() { return m_nms_config;};
-    hailo_nms_info_t nms_info();
-    std::string get_op_description() override;
-    static hailo_format_t expand_output_format_autos_by_op_type(const hailo_format_t &output_format, OperationType type);
-
-    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
-
-protected:
-    NmsOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                    const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                    const NmsPostProcessConfig &nms_post_process_config,
-                    const std::string &name,
-                    const std::string &network_name,
-                    const OperationType type)
-        : OpMetadata(inputs_metadata, outputs_metadata, name, network_name, type),
-            m_nms_config(nms_post_process_config)
-    {}
-
-    hailo_status validate_params() override;
-
-private:
-    NmsPostProcessConfig m_nms_config;
-};
 
 class NmsPostProcessOp : public Op
 {
diff --git a/hailort/libhailort/src/net_flow/ops/op.hpp b/hailort/libhailort/src/net_flow/ops/op.hpp
index 2d94e7a7..0b958088 100644
--- a/hailort/libhailort/src/net_flow/ops/op.hpp
+++ b/hailort/libhailort/src/net_flow/ops/op.hpp
@@ -16,7 +16,7 @@
 #include "hailo/hailort.h"
 #include "hailo/buffer.hpp"
 #include "hailo/network_group.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/op_metadata.hpp"
 
 #include "common/utils.hpp"
 #include "common/logger_macros.hpp"
@@ -73,4 +73,4 @@ class Op
 }
 }
 
-#endif // _HAILO_NET_FLOW_OP_HPP_
\ No newline at end of file
+#endif // _HAILO_NET_FLOW_OP_HPP_
diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
index 8e4e3411..a94a54ec 100644
--- a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
@@ -15,13 +15,11 @@
 
 #include "hailo/hailort.h"
 #include "net_flow/ops/op.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/softmax_op_metadata.hpp"
 
 #include "common/utils.hpp"
 #include "hailo/quantization.hpp"
 
-#include <iostream>
-
 namespace hailort
 {
 namespace net_flow
@@ -30,34 +28,9 @@ namespace net_flow
 #define SOFTMAX_NUM_OF_POSSIBLE_FORMAT_ORDERS (2) // NHWC, NC
 #define SOFTMAX_NUM_OF_POSSIBLE_FORMAT_TYPES (4) // Auto, UINT8, UINT16, FLOAT32
 
-constexpr std::size_t SOFTMAX_NUMBER_OF_SRCS {1};
-constexpr std::size_t SOFTMAX_NUMBER_OF_DSTS {1};
-
 typedef hailo_status (*SoftmaxFunction)(const BufferMetaData &input_metadata, const BufferMetaData &output_metadata,
     const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs);
 
-class SoftmaxOpMetadata : public OpMetadata
-{
-public:
-    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                                                        const std::string &network_name);
-    std::string get_op_description() override;
-    hailo_status validate_format_info() override;
-    static hailo_format_t expand_output_format_autos(const hailo_format_t &output_format, const hailo_format_t &input_format);
-
-    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
-
-private:
-    SoftmaxOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                        const std::string &network_name)
-        : OpMetadata(inputs_metadata, outputs_metadata, "Softmax-Post-Process", network_name, OperationType::SOFTMAX)
-    {}
-
-    hailo_status validate_params() override;
-};
-
 class SoftmaxPostProcessOp : public Op
 {
 
diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
index 1d1f1309..82bb735f 100644
--- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
@@ -39,6 +39,8 @@ hailo_status SSDOpMetadata::validate_params()
         return status;
     }
 
+    CHECK(!nms_config().bbox_only, HAILO_INVALID_ARGUMENT, "SSDPostProcessOp: bbox_only is not supported for SSD model");
+
     // Validate each anchor is mapped by reg and cls inputs
     for (const auto &reg_to_cls_name : m_ssd_config.reg_to_cls_inputs) {
         CHECK(m_ssd_config.anchors.count(reg_to_cls_name.first), HAILO_INVALID_ARGUMENT,
@@ -90,9 +92,9 @@ hailo_status SSDOpMetadata::validate_format_info()
 std::string SSDOpMetadata::get_op_description()
 {
     auto nms_config_info = get_nms_config_description();
-    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}, Centers scales factor: {}, "
+    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}, Centers scales factor: {}, "
         "Bbox dimension scale factor: {}, Normalize boxes: {}", OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info,
-        m_ssd_config.image_height, m_ssd_config.image_width, m_ssd_config.centers_scale_factor, m_ssd_config.bbox_dimensions_scale_factor,
+        static_cast<int>(m_ssd_config.image_height), static_cast<int>(m_ssd_config.image_width), m_ssd_config.centers_scale_factor, m_ssd_config.bbox_dimensions_scale_factor,
         m_ssd_config.normalize_boxes);
     return config_info;
 }
@@ -233,4 +235,4 @@ hailo_status SSDPostProcessOp::extract_detections(const std::string &reg_input_n
 }
 
 }
-}
\ No newline at end of file
+}
diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
index ba1331e2..25d6077e 100644
--- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
@@ -13,69 +13,13 @@
 #define _HAILO_SSD_POST_PROCESS_HPP_
 
 #include "net_flow/ops/nms_post_process.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/ssd_op_metadata.hpp"
 
 namespace hailort
 {
 namespace net_flow
 {
 
-struct SSDPostProcessConfig
-{
-    // The image height.
-    float32_t image_height = 0;
-
-    // The image width.
-    float32_t image_width = 0;
-
-    uint32_t centers_scale_factor = 0;
-
-    uint32_t bbox_dimensions_scale_factor = 0;
-
-    uint32_t ty_index = 0;
-    uint32_t tx_index = 0;
-    uint32_t th_index = 0;
-    uint32_t tw_index = 0;
-
-    std::map<std::string, std::string> reg_to_cls_inputs;
-
-    // A vector of anchors, each element in the vector represents the anchors for a specific layer
-    // Each layer anchors vector is structured as {w,h} pairs.
-    // Each anchor is mapped by 2 keys:
-    //     1. reg input
-    //     2. cls input
-    std::map<std::string, std::vector<float32_t>> anchors;
-
-    // Indicates whether boxes should be normalized (and clipped)
-    bool normalize_boxes = false;
-};
-
-class SSDOpMetadata : public NmsOpMetadata
-{
-public:
-    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                                                        const NmsPostProcessConfig &nms_post_process_config,
-                                                        const SSDPostProcessConfig &ssd_post_process_config,
-                                                        const std::string &network_name);
-    std::string get_op_description() override;
-    hailo_status validate_format_info() override;
-    SSDPostProcessConfig &ssd_config() { return m_ssd_config;};
-
-private:
-    SSDPostProcessConfig m_ssd_config;
-    SSDOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                       const NmsPostProcessConfig &nms_post_process_config,
-                       const SSDPostProcessConfig &ssd_post_process_config,
-                       const std::string &network_name)
-        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "SSD-Post-Process", network_name, OperationType::SSD)
-        , m_ssd_config(ssd_post_process_config)
-    {}
-
-    hailo_status validate_params() override;
-};
-
 class SSDPostProcessOp : public NmsPostProcessOp
 {
 
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp
new file mode 100644
index 00000000..d03dc185
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp
@@ -0,0 +1,130 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolov5_bbox_only_post_process.cpp
+ * @brief YOLOv5 bbox only post process
+ *
+ **/
+
+#include "net_flow/ops/yolov5_bbox_only_post_process.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+Expected<std::shared_ptr<Op>> YOLOv5BboxOnlyPostProcessOp::create(std::shared_ptr<Yolov5BboxOnlyOpMetadata> metadata)
+{
+    auto status = metadata->validate_format_info();
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    auto op = std::shared_ptr<YOLOv5BboxOnlyPostProcessOp>(new (std::nothrow) YOLOv5BboxOnlyPostProcessOp(metadata));
+    CHECK_AS_EXPECTED(op != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return std::shared_ptr<Op>(std::move(op));
+}
+
+Expected<hailo_vstream_info_t> Yolov5BboxOnlyOpMetadata::get_output_vstream_info()
+{
+    auto vstream_info = NmsOpMetadata::get_output_vstream_info();
+    CHECK_EXPECTED(vstream_info);
+
+    vstream_info->shape = m_outputs_metadata.begin()->second.shape;
+    return vstream_info.release();
+}
+
+hailo_status Yolov5BboxOnlyOpMetadata::validate_format_info()
+{
+    for (const auto& output_metadata : m_outputs_metadata) {
+
+        CHECK(HAILO_FORMAT_TYPE_FLOAT32 == output_metadata.second.format.type, HAILO_INVALID_ARGUMENT, "The given output format type {} is not supported, "
+            "should be HAILO_FORMAT_TYPE_FLOAT32", HailoRTCommon::get_format_type_str(output_metadata.second.format.type));
+
+        CHECK(HAILO_FORMAT_ORDER_NHWC == output_metadata.second.format.order, HAILO_INVALID_ARGUMENT, "The given output format order {} is not supported, "
+            "should be HAILO_FORMAT_ORDER_NHWC", HailoRTCommon::get_format_order_str(output_metadata.second.format.order));
+
+        CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as transposed, which is not supported for this model.",
+            output_metadata.first);
+        CHECK(!(HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as argmax, which is not supported for this model.",
+            output_metadata.first);
+    }
+
+    assert(1 <= m_inputs_metadata.size());
+    const hailo_format_type_t& first_input_type = m_inputs_metadata.begin()->second.format.type;
+    for (const auto& input_metadata : m_inputs_metadata) {
+        CHECK(HAILO_FORMAT_ORDER_NHCW == input_metadata.second.format.order, HAILO_INVALID_ARGUMENT, "The given input format order {} is not supported, "
+            "should be HAILO_FORMAT_ORDER_NHCW", HailoRTCommon::get_format_order_str(input_metadata.second.format.order));
+
+        CHECK((HAILO_FORMAT_TYPE_UINT8 == input_metadata.second.format.type) ||
+            (HAILO_FORMAT_TYPE_UINT16 == input_metadata.second.format.type),
+            HAILO_INVALID_ARGUMENT, "The given input format type {} is not supported, should be HAILO_FORMAT_TYPE_UINT8 or HAILO_FORMAT_TYPE_UINT16",
+            HailoRTCommon::get_format_type_str(input_metadata.second.format.type));
+
+        CHECK(input_metadata.second.format.type == first_input_type, HAILO_INVALID_ARGUMENT,"All inputs format type should be the same");
+    }
+
+    return HAILO_SUCCESS;
+}
+
+std::string Yolov5BboxOnlyOpMetadata::get_op_description()
+{
+    auto nms_config_info = fmt::format("Classes: {}",
+                            nms_config().number_of_classes);
+    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}",
+        OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, static_cast<int>(m_yolov5_config.image_height), static_cast<int>(m_yolov5_config.image_width));
+    return config_info;
+}
+
+
+Expected<std::shared_ptr<OpMetadata>> Yolov5BboxOnlyOpMetadata::create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                            const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                            const NmsPostProcessConfig &nms_post_process_config,
+                                                            const YoloPostProcessConfig &yolov5_post_process_config,
+                                                            const std::string &network_name)
+{
+    auto op_metadata = std::shared_ptr<Yolov5BboxOnlyOpMetadata>(new (std::nothrow) Yolov5BboxOnlyOpMetadata(inputs_metadata, outputs_metadata,
+        nms_post_process_config, yolov5_post_process_config, network_name));
+    CHECK_AS_EXPECTED(op_metadata != nullptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    auto status = op_metadata->validate_params();
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return std::shared_ptr<OpMetadata>(std::move(op_metadata));
+}
+
+hailo_status YOLOv5BboxOnlyPostProcessOp::execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
+{
+    const auto &inputs_metadata = m_metadata->inputs_metadata();
+    const auto &yolo_config = m_metadata->yolov5_config();
+    CHECK(inputs.size() == yolo_config.anchors.size(), HAILO_INVALID_ARGUMENT,
+        "Anchors vector count must be equal to data vector count. Anchors size is {}, data size is {}",
+            yolo_config.anchors.size(), inputs.size());
+
+    auto dst_ptr = (float32_t*)outputs.begin()->second.data();
+
+    size_t next_bbox_output_offset = YOLOV5_BBOX_ONLY_BBOXES_INDEX;
+
+    for (const auto &name_to_input : inputs) {
+        hailo_status status = HAILO_UNINITIALIZED;
+        auto &name = name_to_input.first;
+        assert(contains(inputs_metadata, name));
+        auto &input_metadata = inputs_metadata.at(name);
+        assert(contains(yolo_config.anchors, name));
+        if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT8) {
+            status = add_bboxes<float32_t, uint8_t>(dst_ptr, next_bbox_output_offset, name_to_input.second,
+                input_metadata.quant_info, input_metadata.shape, input_metadata.padded_shape, yolo_config.anchors.at(name));
+        } else if (input_metadata.format.type == HAILO_FORMAT_TYPE_UINT16) {
+            status = add_bboxes<float32_t, uint16_t>(dst_ptr, next_bbox_output_offset, name_to_input.second,
+                input_metadata.quant_info, input_metadata.shape, input_metadata.padded_shape, yolo_config.anchors.at(name));
+        } else {
+            CHECK_SUCCESS(HAILO_INVALID_ARGUMENT, "YOLO post-process received invalid input type {}", input_metadata.format.type);
+        }
+        CHECK_SUCCESS(status);
+    }
+    return HAILO_SUCCESS;
+}
+
+} // namespace net_flow
+} // namespace hailort
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp
new file mode 100644
index 00000000..282ff34a
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp
@@ -0,0 +1,101 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolov5_bbox_only_post_process.hpp
+ * @brief YOLOV5 bbox only post process
+ * Output format of yolov5_bbox_only is NHWC - [1, total_proposals, 5 + number_of_classes]
+ * The bboxes entry in the output of yolov5_bbox_only is a list of bboxes, such that each of them looks like this:
+ * (y_min, x_min, y_max, x_max, objectness, score_per_class)
+ *
+ **/
+
+#ifndef _HAILO_YOLOV5_BBOX_ONLY_POST_PROCESS_HPP_
+#define _HAILO_YOLOV5_BBOX_ONLY_POST_PROCESS_HPP_
+
+#include "net_flow/ops/yolov5_post_process.hpp"
+#include "net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp"
+
+namespace hailort
+{
+
+static const uint32_t YOLOV5_BBOX_NUM_OF_VALUES = 5;
+namespace net_flow
+{
+
+class YOLOv5BboxOnlyPostProcessOp : public YOLOv5PostProcessOp
+{
+public:
+    static Expected<std::shared_ptr<Op>> create(std::shared_ptr<Yolov5BboxOnlyOpMetadata> metadata);
+
+    hailo_status execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs) override;
+
+private:
+
+    YOLOv5BboxOnlyPostProcessOp(std::shared_ptr<Yolov5BboxOnlyOpMetadata> metadata) :
+        YOLOv5PostProcessOp(static_cast<std::shared_ptr<Yolov5OpMetadata>>(metadata))
+    {}
+
+    static const uint32_t YOLOV5_BBOX_ONLY_BBOXES_INDEX = 0;
+
+    template<typename DstType = float32_t, typename SrcType>
+    void add_classes_scores(hailo_quant_info_t &quant_info, DstType* dst_data, size_t &next_bbox_output_offset,
+        SrcType* src_data, uint32_t entry_idx, uint32_t class_start_idx, uint32_t padded_width)
+    {
+        const auto &nms_config = m_metadata->nms_config();
+
+        for (uint32_t class_index = 0; class_index < nms_config.number_of_classes; class_index++) {
+            auto class_entry_idx = entry_idx + ((class_start_idx + class_index) * padded_width);
+            auto class_confidence = dequantize_and_sigmoid<DstType, SrcType>(
+            src_data[class_entry_idx], quant_info);
+            dst_data[next_bbox_output_offset++] = class_confidence;
+        }
+    }
+
+    template<typename DstType = float32_t, typename SrcType>
+    hailo_status add_bboxes(DstType *dst_ptr, size_t &next_bbox_output_offset,
+        const MemoryView &input_buffer, hailo_quant_info_t quant_info, hailo_3d_image_shape_t shape,
+        hailo_3d_image_shape_t padded_shape, const std::vector<int> &layer_anchors)
+    {
+        const uint32_t X_OFFSET = X_INDEX * padded_shape.width;
+        const uint32_t Y_OFFSET = Y_INDEX * padded_shape.width;
+        const uint32_t W_OFFSET = W_INDEX * padded_shape.width;
+        const uint32_t H_OFFSET = H_INDEX * padded_shape.width;
+        const uint32_t OBJECTNESS_OFFSET = OBJECTNESS_INDEX * padded_shape.width;
+
+        auto num_of_anchors = get_num_of_anchors(layer_anchors);
+
+        uint32_t entry_size = get_entry_size();
+        auto number_of_entries = padded_shape.height * padded_shape.width * num_of_anchors;
+
+        auto buffer_size = number_of_entries * entry_size * sizeof(SrcType);
+        CHECK(buffer_size == input_buffer.size(), HAILO_INVALID_ARGUMENT,
+            "Failed to extract proposals, buffer_size should be {}, but is {}", buffer_size, input_buffer.size());
+
+        auto input_row_size = padded_shape.width * padded_shape.features;
+        SrcType *input_data = (SrcType*)input_buffer.data();
+        for (uint32_t row = 0; row < shape.height; row++) {
+            for (uint32_t col = 0; col < shape.width; col++) {
+                for (uint32_t anchor = 0; anchor < num_of_anchors; anchor++) {
+                    auto entry_idx = (input_row_size * row) + col + ((anchor * entry_size) * padded_shape.width);
+                    auto objectness = dequantize_and_sigmoid<DstType, SrcType>(input_data[entry_idx + OBJECTNESS_OFFSET], quant_info);
+                    auto bbox = decode_bbox(input_data, entry_idx, X_OFFSET, Y_OFFSET, W_OFFSET, H_OFFSET, 
+                        quant_info, anchor, layer_anchors, col, row, shape);
+                    memcpy(&dst_ptr[next_bbox_output_offset], &bbox, sizeof(hailo_bbox_float32_t) - sizeof(DstType)); // copy y_min, x_min, y_max, x_max
+                    next_bbox_output_offset += (sizeof(hailo_bbox_float32_t) / sizeof(float32_t)) - 1;
+                    dst_ptr[next_bbox_output_offset++] = objectness;
+
+                    add_classes_scores(quant_info, dst_ptr, next_bbox_output_offset, input_data, entry_idx,
+                        CLASSES_START_INDEX, padded_shape.width);
+                }
+            }
+        }
+        return HAILO_SUCCESS;
+    }
+};
+
+} /* namespace net_flow */
+} /* namespace hailort */
+
+#endif /* _HAILO_YOLOV5_BBOX_ONLY_POST_PROCESS_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
index 0e08b419..70151993 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
@@ -43,11 +43,16 @@ hailo_status Yolov5OpMetadata::validate_format_info()
     return NmsOpMetadata::validate_format_info();
 }
 
+Expected<hailo_vstream_info_t> Yolov5OpMetadata::get_output_vstream_info()
+{
+    return NmsOpMetadata::get_output_vstream_info();
+}
+
 std::string Yolov5OpMetadata::get_op_description()
 {
     auto nms_config_info = get_nms_config_description();
-    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}",
-        OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, m_yolov5_config.image_height, m_yolov5_config.image_width);
+    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}",
+        OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, static_cast<int>(m_yolov5_config.image_height), static_cast<int>(m_yolov5_config.image_width));
     return config_info;
 }
 
@@ -72,7 +77,7 @@ hailo_status YOLOv5PostProcessOp::execute(const std::map<std::string, MemoryView
 
     clear_before_frame();
     for (const auto &name_to_input : inputs) {
-        hailo_status status;
+        hailo_status status = HAILO_UNINITIALIZED;
         auto &name = name_to_input.first;
         assert(contains(inputs_metadata, name));
         auto &input_metadata = inputs_metadata.at(name);
@@ -112,6 +117,15 @@ uint32_t YOLOv5PostProcessOp::get_entry_size()
     return (CLASSES_START_INDEX + m_metadata->nms_config().number_of_classes);
 }
 
+size_t YOLOv5PostProcessOp::get_num_of_anchors(const std::vector<int> &layer_anchors)
+{
+    // Each layer anchors vector is structured as {w,h} pairs.
+    // For example, if we have a vector of size 6 (default YOLOv5 vector) then we have 3 anchors for this layer.
+    assert(layer_anchors.size() % 2 == 0);
+    size_t num_of_anchors = (layer_anchors.size() / 2);
+    return num_of_anchors;
+}
+
 } // namespace net_flow
 } // namespace hailort
 
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
index 903a1da1..15c9b6b7 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
@@ -14,7 +14,7 @@
 #define _HAILO_YOLO_POST_PROCESS_HPP_
 
 #include "net_flow/ops/nms_post_process.hpp"
-#include "net_flow/ops/yolov5_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov5_op_metadata.hpp"
 
 namespace hailort
 {
@@ -29,6 +29,7 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp
     static Expected<std::shared_ptr<Op>> create(std::shared_ptr<Yolov5OpMetadata> metadata);
 
     hailo_status execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs) override;
+    static size_t get_num_of_anchors(const std::vector<int> &layer_anchors);
 
 protected:
     hailo_bbox_float32_t decode(float32_t tx, float32_t ty, float32_t tw, float32_t th,
@@ -48,7 +49,18 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp
     static const uint32_t OBJECTNESS_INDEX = 4;
     static const uint32_t CLASSES_START_INDEX = 5;
 
-
+    template<typename DstType = float32_t, typename SrcType>
+    hailo_bbox_float32_t decode_bbox(SrcType* data, uint32_t entry_idx, const uint32_t X_OFFSET, const uint32_t Y_OFFSET,
+        const uint32_t W_OFFSET, const uint32_t H_OFFSET, hailo_quant_info_t quant_info, uint32_t anchor,
+        const std::vector<int> &layer_anchors, uint32_t col, uint32_t row, hailo_3d_image_shape_t shape)
+    {
+    auto tx = dequantize_and_sigmoid<DstType, SrcType>(data[entry_idx + X_OFFSET], quant_info);
+    auto ty = dequantize_and_sigmoid<DstType, SrcType>(data[entry_idx + Y_OFFSET], quant_info);
+    auto tw = dequantize_and_sigmoid<DstType, SrcType>(data[entry_idx + W_OFFSET], quant_info);
+    auto th = dequantize_and_sigmoid<DstType, SrcType>(data[entry_idx + H_OFFSET], quant_info);
+    return decode(tx, ty, tw, th, layer_anchors[anchor * 2], layer_anchors[anchor * 2 + 1], col, row,
+        shape.width, shape.height);
+    }
 
     template<typename DstType = float32_t, typename SrcType>
     void check_threshold_and_add_detection(hailo_bbox_float32_t bbox, hailo_quant_info_t &quant_info,
@@ -126,10 +138,7 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp
 
         const auto &nms_config = m_metadata->nms_config();
 
-        // Each layer anchors vector is structured as {w,h} pairs.
-        // For example, if we have a vector of size 6 (default YOLOv5 vector) then we have 3 anchors for this layer.
-        assert(layer_anchors.size() % 2 == 0);
-        const size_t num_of_anchors = (layer_anchors.size() / 2);
+        auto num_of_anchors = get_num_of_anchors(layer_anchors);
 
         uint32_t entry_size = get_entry_size();
         auto number_of_entries = padded_shape.height * padded_shape.width * num_of_anchors;
@@ -149,12 +158,8 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp
                         continue;
                     }
 
-                    auto tx = dequantize_and_sigmoid<DstType, SrcType>(data[entry_idx + X_OFFSET], quant_info);
-                    auto ty = dequantize_and_sigmoid<DstType, SrcType>(data[entry_idx + Y_OFFSET], quant_info);
-                    auto tw = dequantize_and_sigmoid<DstType, SrcType>(data[entry_idx + W_OFFSET], quant_info);
-                    auto th = dequantize_and_sigmoid<DstType, SrcType>(data[entry_idx + H_OFFSET], quant_info);
-                    auto bbox = decode(tx, ty, tw, th, layer_anchors[anchor * 2], layer_anchors[anchor * 2 + 1], col, row,
-                        shape.width, shape.height);
+                    auto bbox = decode_bbox(data, entry_idx, X_OFFSET, Y_OFFSET, W_OFFSET, H_OFFSET,
+                        quant_info, anchor, layer_anchors, col, row, shape);
 
                     decode_classes_scores(bbox, quant_info, data, entry_idx,
                         CLASSES_START_INDEX, objectness, padded_shape.width);
@@ -164,12 +169,12 @@ class YOLOv5PostProcessOp : public NmsPostProcessOp
 
         return HAILO_SUCCESS;
     }
-private:
-   std::shared_ptr<Yolov5OpMetadata> m_metadata;
+
+    std::shared_ptr<Yolov5OpMetadata> m_metadata;
 
 };
 
-} // namespace net_flow
-} // namespace hailort
+} /* namespace net_flow */
+} /* namespace hailort */
 
-#endif // _HAILO_YOLO_POST_PROCESS_HPP_
+#endif /* _HAILO_YOLO_POST_PROCESS_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
index 539a9856..e89082b7 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
@@ -16,9 +16,11 @@
 #else
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wconversion"
+#pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
 #define STB_IMAGE_RESIZE_IMPLEMENTATION
 #include "stb_image_resize.h"
+#include <Eigen/Dense>
 #if defined(_MSC_VER)
 #pragma warning(pop)
 #else
@@ -30,6 +32,9 @@ namespace hailort
 namespace net_flow
 {
 
+constexpr uint32_t VECTOR_DIM = 1;
+using Eigen_Vector32f = Eigen::Matrix<float32_t, MASK_COEFFICIENT_SIZE, VECTOR_DIM>;
+
 Expected<std::shared_ptr<OpMetadata>> Yolov5SegOpMetadata::create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
     const std::unordered_map<std::string, BufferMetaData> &outputs_metadata, const NmsPostProcessConfig &nms_post_process_config,
     const YoloPostProcessConfig &yolo_config, const YoloV5SegPostProcessConfig &yolo_seg_config,
@@ -45,6 +50,13 @@ Expected<std::shared_ptr<OpMetadata>> Yolov5SegOpMetadata::create(const std::uno
     return std::shared_ptr<OpMetadata>(std::move(op_metadata));
 }
 
+hailo_status Yolov5SegOpMetadata::validate_params()
+{
+    CHECK(!nms_config().bbox_only, HAILO_INVALID_ARGUMENT, "YOLOv5SegPostProcessOp: bbox_only is not supported for YOLOv5Seg model");
+
+    return Yolov5OpMetadata::validate_params();
+}
+
 hailo_status Yolov5SegOpMetadata::validate_format_info()
 {
     for (const auto& output_metadata : m_outputs_metadata) {
@@ -90,7 +102,7 @@ Expected<hailo_vstream_info_t> Yolov5SegOpMetadata::get_output_vstream_info()
     auto vstream_info = NmsOpMetadata::get_output_vstream_info();
     CHECK_EXPECTED(vstream_info);
 
-    vstream_info->nms_shape.max_mask_size = static_cast<uint32_t>(yolov5_config().image_height * yolov5_config().image_width);
+    vstream_info->nms_shape.max_accumulated_mask_size = m_yolo_seg_config.max_accumulated_mask_size;
     return vstream_info.release();
 }
 
@@ -105,8 +117,6 @@ Expected<std::shared_ptr<Op>> Yolov5SegPostProcess::create(std::shared_ptr<Yolov
     auto transformed_proto_layer_frame_size = HailoRTCommon::get_shape_size(proto_layer_metadata.shape) * sizeof(float32_t);
     auto transformed_proto_buffer = Buffer::create(transformed_proto_layer_frame_size);
     CHECK_EXPECTED(transformed_proto_buffer);
-    auto dequantized_proto_buffer = Buffer::create(transformed_proto_layer_frame_size);
-    CHECK_EXPECTED(dequantized_proto_buffer);
     auto mask_mult_result_buffer = Buffer::create(proto_layer_metadata.shape.height * proto_layer_metadata.shape.width * sizeof(float32_t));
     CHECK_EXPECTED(mask_mult_result_buffer);
 
@@ -115,19 +125,18 @@ Expected<std::shared_ptr<Op>> Yolov5SegPostProcess::create(std::shared_ptr<Yolov
     CHECK_EXPECTED(resized_buffer);
 
     auto op = std::shared_ptr<Yolov5SegPostProcess>(new (std::nothrow) Yolov5SegPostProcess(std::move(metadata),
-        mask_mult_result_buffer.release(), resized_buffer.release(), transformed_proto_buffer.release(), dequantized_proto_buffer.release()));
+        mask_mult_result_buffer.release(), resized_buffer.release(), transformed_proto_buffer.release()));
     CHECK_NOT_NULL_AS_EXPECTED(op, HAILO_OUT_OF_HOST_MEMORY);
 
     return std::shared_ptr<Op>(std::move(op));
 }
 
 Yolov5SegPostProcess::Yolov5SegPostProcess(std::shared_ptr<Yolov5SegOpMetadata> metadata,
-    Buffer &&mask_mult_result_buffer, Buffer &&resized_mask, Buffer &&transformed_proto_buffer, Buffer &&dequantized_proto_buffer)
+    Buffer &&mask_mult_result_buffer, Buffer &&resized_mask, Buffer &&transformed_proto_buffer)
     : YOLOv5PostProcessOp(static_cast<std::shared_ptr<Yolov5OpMetadata>>(metadata)), m_metadata(metadata),
     m_mask_mult_result_buffer(std::move(mask_mult_result_buffer)),
     m_resized_mask_to_image_dim(std::move(resized_mask)),
-    m_transformed_proto_buffer(std::move(transformed_proto_buffer)),
-    m_dequantized_proto_buffer(std::move(dequantized_proto_buffer))
+    m_transformed_proto_buffer(std::move(transformed_proto_buffer))
 {}
 
 hailo_status Yolov5SegPostProcess::execute(const std::map<std::string, MemoryView> &inputs, std::map<std::string, MemoryView> &outputs)
@@ -138,7 +147,7 @@ hailo_status Yolov5SegPostProcess::execute(const std::map<std::string, MemoryVie
 
     clear_before_frame();
     for (const auto &name_to_input : inputs) {
-        hailo_status status;
+        hailo_status status = HAILO_UNINITIALIZED;
         auto &name = name_to_input.first;
         assert(contains(inputs_metadata, name));
         auto &input_metadata = inputs_metadata.at(name);
@@ -182,18 +191,18 @@ uint32_t Yolov5SegPostProcess::get_entry_size()
 
 void Yolov5SegPostProcess::mult_mask_vector_and_proto_matrix(const DetectionBbox &detection)
 {
-    float32_t *proto_layer = (float32_t*)m_transformed_proto_buffer.data();
-    float32_t *mult_result = (float32_t*)m_mask_mult_result_buffer.data();
+    static auto shape = get_proto_layer_shape();
+    static uint32_t proto_mat_cols = shape.height * shape.width;
 
-    auto proto_layer_shape = get_proto_layer_shape();
-    uint32_t mult_size = proto_layer_shape.height * proto_layer_shape.width;
-    for (uint32_t i = 0; i < mult_size; i++) {
-        float32_t sum = 0.0f;
-        for (uint32_t j = 0; j < proto_layer_shape.features; j++) {
-            sum += detection.m_coefficients[j] * proto_layer[j * mult_size + i];
-        }
-        mult_result[i] = sigmoid(sum);
-    }
+    Eigen::Map<Eigen::Matrix<float, MASK_COEFFICIENT_SIZE, Eigen::Dynamic, Eigen::RowMajor>> proto_layer(
+        (float32_t*)m_transformed_proto_buffer.data(), MASK_COEFFICIENT_SIZE, proto_mat_cols);
+
+    Eigen_Vector32f coefficients(detection.m_coefficients.data());
+    auto mult_result = (coefficients.transpose() * proto_layer);
+
+    Eigen::Map<Eigen::Matrix<float, VECTOR_DIM, Eigen::Dynamic, Eigen::RowMajor>> result(
+        (float32_t*)m_mask_mult_result_buffer.data(), VECTOR_DIM, proto_mat_cols);
+    result = 1.0f / (1.0f + (-1*mult_result).array().exp());
 }
 
 hailo_status Yolov5SegPostProcess::crop_and_copy_mask(const DetectionBbox &detection, MemoryView &buffer, uint32_t buffer_offset)
@@ -210,10 +219,10 @@ hailo_status Yolov5SegPostProcess::crop_and_copy_mask(const DetectionBbox &detec
         static_cast<uint32_t>(yolov5_config.image_height), 0, 1, STBIR_ALPHA_CHANNEL_NONE, 0,
         STBIR_EDGE_CLAMP, STBIR_FILTER_TRIANGLE, STBIR_COLORSPACE_LINEAR, NULL);
 
-    auto x_min = static_cast<uint32_t>(std::ceil(detection.m_bbox.x_min * yolov5_config.image_width));
-    auto x_max = static_cast<uint32_t>(std::ceil(detection.m_bbox.x_max * yolov5_config.image_width));
-    auto y_min = static_cast<uint32_t>(std::ceil(detection.m_bbox.y_min * yolov5_config.image_height));
-    auto y_max = static_cast<uint32_t>(std::ceil(detection.m_bbox.y_max * yolov5_config.image_height));
+    auto x_min = static_cast<uint32_t>(MAX(std::ceil(detection.m_bbox.x_min * yolov5_config.image_width), 0.0f));
+    auto x_max = static_cast<uint32_t>(MIN(std::ceil(detection.m_bbox.x_max * yolov5_config.image_width), yolov5_config.image_width));
+    auto y_min = static_cast<uint32_t>(MAX(std::ceil(detection.m_bbox.y_min * yolov5_config.image_height), 0.0f));
+    auto y_max = static_cast<uint32_t>(MIN(std::ceil(detection.m_bbox.y_max * yolov5_config.image_height), yolov5_config.image_height));
     auto box_width = detection.get_bbox_width(yolov5_config.image_width);
 
     uint8_t *dst_mask = (uint8_t*)(buffer.data() + buffer_offset);
@@ -245,22 +254,26 @@ hailo_status Yolov5SegPostProcess::calc_and_copy_mask(const DetectionBbox &detec
 Expected<uint32_t> Yolov5SegPostProcess::copy_detection_to_result_buffer(MemoryView &buffer, DetectionBbox &detection,
     uint32_t buffer_offset)
 {
-    uint32_t copied_bytes_amount = 0;
+    uint32_t detection_size = sizeof(detection.m_bbox_with_mask);
+    uint32_t mask_size = static_cast<uint32_t>(detection.m_bbox_with_mask.mask_size);
+    CHECK((buffer_offset + detection_size + mask_size) < buffer.size(), HAILO_INSUFFICIENT_BUFFER,
+        "The given buffer is too small to contain all detections." \
+        " The output buffer will contain the highest scored detections that could be filled." \
+        " One can use `set_nms_max_accumulated_mask_size` to change the output buffer size.");
 
     // Copy bbox
-    uint32_t size_to_copy = sizeof(detection.m_bbox_with_mask);
-    assert((buffer_offset + size_to_copy) <= buffer.size());
-    detection.m_bbox_with_mask.mask = (buffer.data() + buffer_offset + size_to_copy);
+    uint32_t copied_bytes_amount = 0;
+    detection.m_bbox_with_mask.mask = (buffer.data() + buffer_offset + detection_size);
 
     *(hailo_detection_with_byte_mask_t*)(buffer.data() + buffer_offset) =
         *(hailo_detection_with_byte_mask_t*)&(detection.m_bbox_with_mask);
-    buffer_offset += size_to_copy;
-    copied_bytes_amount += size_to_copy;
+    buffer_offset += detection_size;
+    copied_bytes_amount += detection_size;
 
     // Calc and copy mask
     auto status = calc_and_copy_mask(detection, buffer, buffer_offset);
     CHECK_SUCCESS_AS_EXPECTED(status);
-    copied_bytes_amount += static_cast<uint32_t>(detection.m_bbox_with_mask.mask_size);
+    copied_bytes_amount += mask_size;
 
     m_classes_detections_count[detection.m_class_id]--;
     return copied_bytes_amount;
@@ -268,6 +281,7 @@ Expected<uint32_t> Yolov5SegPostProcess::copy_detection_to_result_buffer(MemoryV
 
 hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &buffer)
 {
+    auto status = HAILO_SUCCESS;
     const auto &nms_config = m_metadata->nms_config();
     uint32_t ignored_detections_count = 0;
     uint16_t detections_count = 0;
@@ -292,6 +306,10 @@ hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &bu
         }
 
         auto copied_bytes_amount = copy_detection_to_result_buffer(buffer, detection, buffer_offset);
+        if (HAILO_INSUFFICIENT_BUFFER == copied_bytes_amount.status()) {
+            status = copied_bytes_amount.status();
+            break;
+        }
         CHECK_EXPECTED_AS_STATUS(copied_bytes_amount);
         buffer_offset += copied_bytes_amount.release();
         detections_count++;
@@ -305,7 +323,7 @@ hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &bu
             ignored_detections_count, nms_config.max_proposals_per_class);
     }
 
-    return HAILO_SUCCESS;
+    return status;
 }
 
 } /* namespace net_flow */
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
index ae3a7b2a..94816fbd 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
@@ -13,7 +13,7 @@
 #include "hailo/hailort.h"
 #include "net_flow/ops/yolov5_post_process.hpp"
 #include "transform/transform_internal.hpp"
-#include "net_flow/ops/yolov5_seg_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp"
 
 namespace hailort
 {
@@ -45,23 +45,36 @@ class Yolov5SegPostProcess : public YOLOv5PostProcessOp
         return m_metadata->inputs_metadata().at(m_metadata->yolov5seg_config().proto_layer_name).shape;
     };
 
+    template<typename DstType = float32_t, typename SrcType>
+    static void transform__d2h_NHCW_to_NCHW_with_dequantize(SrcType *src_ptr, hailo_3d_image_shape_t shape,
+        DstType *dst_ptr, hailo_quant_info_t quant_info)
+    {
+        assert(nullptr != src_ptr);
+        assert(nullptr != dst_ptr);
+
+        uint32_t width_size = shape.width;
+        for (uint32_t r = 0; r < shape.height; r++) {
+            for (uint32_t c = 0; c < shape.features; c++) {
+                SrcType *src = src_ptr + shape.features * shape.width * r + shape.width * c;
+                DstType *dst = dst_ptr + shape.width * shape.height * c + shape.width * r;
+                Quantization::dequantize_output_buffer<DstType, SrcType>(src, dst, width_size, quant_info);
+            }
+        }
+    }
+
     // Transform proto layer - To multiply between the box mask coefficients (of shape (1, 32)), in the proto layer,
     // we change the proto layer shape to be (features=32, height * width)
     template<typename DstType = float32_t, typename SrcType>
     void transform_proto_layer(SrcType *src_buffer, const hailo_quant_info_t &quant_info)
     {
         hailo_3d_image_shape_t shape = get_proto_layer_shape();
-
-         // TODO: HRT-11734 Improve performance - Make both funcs in one run?
-        Quantization::dequantize_output_buffer<float32_t, SrcType>(src_buffer, (float32_t*)m_dequantized_proto_buffer.data(),
-            HailoRTCommon::get_shape_size(shape), quant_info);
-        TransformContextUtils::transform__d2h_NHCW_to_NCHW<float32_t>((float32_t*)m_dequantized_proto_buffer.data(), &shape,
-            (float32_t*)m_transformed_proto_buffer.data(), &shape);
+        transform__d2h_NHCW_to_NCHW_with_dequantize<DstType, SrcType>(src_buffer, shape,
+            (float32_t*)m_transformed_proto_buffer.data(), quant_info);
     }
 
 private:
     Yolov5SegPostProcess(std::shared_ptr<Yolov5SegOpMetadata> metadata, Buffer &&mask_mult_result_buffer,
-        Buffer &&resized_mask, Buffer &&transformed_proto_buffer, Buffer &&dequantized_proto_buffer);
+        Buffer &&resized_mask, Buffer &&transformed_proto_buffer);
 
     hailo_status fill_nms_with_byte_mask_format(MemoryView &buffer);
     void mult_mask_vector_and_proto_matrix(const DetectionBbox &detection);
@@ -76,9 +89,7 @@ class Yolov5SegPostProcess : public YOLOv5PostProcessOp
     Buffer m_mask_mult_result_buffer;
     Buffer m_resized_mask_to_image_dim;
 
-    // TODO: HRT-11734 - Try use one buffer for both actions
     Buffer m_transformed_proto_buffer;
-    Buffer m_dequantized_proto_buffer;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
index 788146af..b700ed58 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
@@ -34,8 +34,8 @@ Expected<std::shared_ptr<OpMetadata>> Yolov8OpMetadata::create(const std::unorde
 std::string Yolov8OpMetadata::get_op_description()
 {
     auto nms_config_info = get_nms_config_description();
-    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}",
-                        OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, m_yolov8_config.image_height, m_yolov8_config.image_width);
+    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}",
+                        OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, static_cast<int>(m_yolov8_config.image_height), static_cast<int>(m_yolov8_config.image_width));
     return config_info;
 }
 
@@ -43,6 +43,8 @@ hailo_status Yolov8OpMetadata::validate_params()
 {
     CHECK_SUCCESS(NmsOpMetadata::validate_params());
 
+    CHECK(!nms_config().bbox_only, HAILO_INVALID_ARGUMENT, "YOLOV8PostProcessOp: bbox_only is not supported for YOLOV8 model");
+
     // We go over the inputs metadata and check that it includes all of the regs and clss
     for (const auto &layer_names : m_yolov8_config.reg_to_cls_inputs) {
         CHECK(contains(m_inputs_metadata, layer_names.reg), HAILO_INVALID_ARGUMENT,
@@ -98,7 +100,7 @@ hailo_status YOLOV8PostProcessOp::execute(const std::map<std::string, MemoryView
 
     clear_before_frame();
     for (const auto &reg_to_cls_name : yolov8_config.reg_to_cls_inputs) {
-        hailo_status status;
+        hailo_status status = HAILO_UNINITIALIZED;
         assert(contains(inputs, reg_to_cls_name.cls));
         assert(contains(inputs, reg_to_cls_name.reg));
 
@@ -119,43 +121,6 @@ hailo_status YOLOV8PostProcessOp::execute(const std::map<std::string, MemoryView
     return hailo_nms_format(outputs.begin()->second);
 }
 
-template<typename DstType, typename SrcType>
-hailo_bbox_float32_t YOLOV8PostProcessOp::get_bbox(uint32_t row, uint32_t col, uint32_t stride, const hailo_3d_image_shape_t &reg_padded_shape,
-    const hailo_quant_info_t &reg_quant_info, SrcType *reg_data, std::vector<std::vector<DstType>> &d_matrix, DstType class_confidence)
-{
-    auto reg_row_size = reg_padded_shape.width * reg_padded_shape.features;
-    auto reg_feature_size = reg_padded_shape.width;
-    auto reg_idx = (reg_row_size * row) + col;
-
-    // For each HxW - reshape from features to 4 x (features/4) + dequantize
-    // For example - reshape from 64 to 4X16 - 4 vectors of 16 values
-    for (uint32_t feature = 0; feature < reg_padded_shape.features; feature++) {
-        auto &tmp_vector = d_matrix.at(feature / (reg_padded_shape.features / NUM_OF_D_VALUES));
-        tmp_vector[feature % (reg_padded_shape.features / NUM_OF_D_VALUES)] = Quantization::dequantize_output<DstType, SrcType>(reg_data[reg_idx + feature*reg_feature_size], reg_quant_info);
-    }
-
-    // Performing softmax operation on each of the vectors
-    for (uint32_t vector_index = 0; vector_index < d_matrix.size(); vector_index++) {
-        auto &tmp_vector = d_matrix.at(vector_index);
-        SoftmaxPostProcessOp::softmax(tmp_vector.data(), tmp_vector.data(), tmp_vector.size());
-    }
-
-    // Performing dot product on each vector
-    // (A, B, C, ..., F, G) -> 0*A + 1*B + 2*C + ... + 14*F + 15*G
-    for (uint32_t vector_index = 0; vector_index < NUM_OF_D_VALUES; vector_index++) {
-        m_d_values_matrix[vector_index] = dot_product(d_matrix.at(vector_index));
-    }
-
-    // The decode function extract x_min, y_min, x_max, y_max from d1, d2, d3, d4
-    const auto &d1 = m_d_values_matrix.at(0);
-    const auto &d2 = m_d_values_matrix.at(1);
-    const auto &d3 = m_d_values_matrix.at(2);
-    const auto &d4 = m_d_values_matrix.at(3);
-    auto bbox = decode(d1, d2, d3, d4, col, row, stride);
-    bbox.score = class_confidence;
-    return bbox;
-}
-
 hailo_bbox_float32_t YOLOV8PostProcessOp::decode(float32_t d1, float32_t d2, float32_t d3, float32_t d4,
     uint32_t col, uint32_t row, uint32_t stride) const
 {
@@ -190,4 +155,4 @@ float32_t YOLOV8PostProcessOp::dot_product(std::vector<float> &values)
 }
 
 }
-}
\ No newline at end of file
+}
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
index 25d01965..d0433f92 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
@@ -12,61 +12,13 @@
 #define _HAILO_YOLOV8_POST_PROCESS_HPP_
 
 #include "net_flow/ops/nms_post_process.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops/softmax_post_process.hpp"
+#include "net_flow/ops_metadata/yolov8_op_metadata.hpp"
 namespace hailort
 {
 namespace net_flow
 {
 
-struct Yolov8MatchingLayersNames
-{
-    // Regression layer
-    std::string reg;
-
-    // Classifications layer
-    std::string cls;
-
-    uint32_t stride;
-};
-
-struct Yolov8PostProcessConfig
-{
-    // The image height.
-    float32_t image_height = 0;
-
-    // The image width.
-    float32_t image_width = 0;
-
-    // A vector off two strings that represents the relations between the outputs names.
-    std::vector<Yolov8MatchingLayersNames> reg_to_cls_inputs;
-};
-
-class Yolov8OpMetadata : public NmsOpMetadata
-{
-public:
-    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                                                        const NmsPostProcessConfig &nms_post_process_config,
-                                                        const Yolov8PostProcessConfig &yolov8_post_process_config,
-                                                        const std::string &network_name);
-    hailo_status validate_format_info() override;
-    std::string get_op_description() override;
-    Yolov8PostProcessConfig &yolov8_config() { return m_yolov8_config;};
-
-private:
-    Yolov8PostProcessConfig m_yolov8_config;
-    Yolov8OpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                       const NmsPostProcessConfig &nms_post_process_config,
-                       const Yolov8PostProcessConfig &yolov8_post_process_config,
-                       const std::string &network_name)
-        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOV8-Post-Process", network_name, OperationType::YOLOV8)
-        , m_yolov8_config(yolov8_post_process_config)
-    {}
-
-    hailo_status validate_params() override;
-};
-
 class YOLOV8PostProcessOp : public NmsPostProcessOp
 {
 public:
@@ -84,8 +36,44 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp
     {
         for (const auto &input_metadata : m_metadata->inputs_metadata()) {
             m_d_matrix[input_metadata.first] = std::vector<std::vector<float32_t>>(NUM_OF_D_VALUES,
-                                                    std::vector<float32_t>(input_metadata.second.padded_shape.features / NUM_OF_D_VALUES));
+                                                    std::vector<float32_t>(input_metadata.second.shape.features / NUM_OF_D_VALUES));
+        }
+    }
+
+    template<typename DstType = float32_t, typename SrcType>
+    hailo_bbox_float32_t get_bbox(uint32_t row, uint32_t col, uint32_t stride, const hailo_3d_image_shape_t &reg_padded_shape,
+        const hailo_3d_image_shape_t &reg_shape, const hailo_quant_info_t &reg_quant_info, SrcType *reg_data,
+        std::vector<std::vector<DstType>> &d_matrix, DstType class_confidence = 0)
+    {
+        auto reg_row_size = reg_padded_shape.width * reg_padded_shape.features; // should be the padded values - we use it to get to the relevant row
+        auto reg_feature_size = reg_padded_shape.width; // Also should be the padded value - we use it to get to the relevant feature
+        auto reg_idx = (reg_row_size * row) + col;
+
+        // For each HxW - reshape from features to 4 x (features/4) + dequantize
+        // For example - reshape from 64 to 4X16 - 4 vectors of 16 values
+        for (uint32_t feature = 0; feature < reg_shape.features; feature++) {
+            auto &tmp_vector = d_matrix.at(feature / (reg_shape.features / NUM_OF_D_VALUES));
+            tmp_vector[feature % (reg_shape.features / NUM_OF_D_VALUES)] = Quantization::dequantize_output<DstType, SrcType>(reg_data[reg_idx + feature*reg_feature_size], reg_quant_info);
+        }
+
+        // Performing softmax operation on each of the vectors
+        for (uint32_t vector_index = 0; vector_index < d_matrix.size(); vector_index++) {
+            auto &tmp_vector = d_matrix.at(vector_index);
+            SoftmaxPostProcessOp::softmax(tmp_vector.data(), tmp_vector.data(), tmp_vector.size());
+        }
+        // Performing dot product on each vector
+        // (A, B, C, ..., F, G) -> 0*A + 1*B + 2*C + ... + 14*F + 15*G
+        for (uint32_t vector_index = 0; vector_index < NUM_OF_D_VALUES; vector_index++) {
+            m_d_values_matrix[vector_index] = dot_product(d_matrix.at(vector_index));
         }
+        // The decode function extract x_min, y_min, x_max, y_max from d1, d2, d3, d4
+        const auto &d1 = m_d_values_matrix.at(0);
+        const auto &d2 = m_d_values_matrix.at(1);
+        const auto &d3 = m_d_values_matrix.at(2);
+        const auto &d4 = m_d_values_matrix.at(3);
+        auto bbox = decode(d1, d2, d3, d4, col, row, stride);
+        bbox.score = class_confidence;
+        return bbox;
     }
 
     static const uint32_t CLASSES_START_INDEX = 0;
@@ -101,6 +89,8 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp
 
         assert(contains(inputs_metadata, layers_names.reg));
         assert(contains(inputs_metadata, layers_names.cls));
+        const auto &reg_shape = inputs_metadata.at(layers_names.reg).shape;
+        const auto &cls_shape = inputs_metadata.at(layers_names.cls).shape;
         const auto &reg_padded_shape = inputs_metadata.at(layers_names.reg).padded_shape;
         const auto &cls_padded_shape = inputs_metadata.at(layers_names.cls).padded_shape;
         const auto &reg_quant_info = inputs_metadata.at(layers_names.reg).quant_info;
@@ -119,14 +109,14 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp
         CHECK(buffer_size == cls_buffer.size(), HAILO_INVALID_ARGUMENT,
             "Failed to extract_detections, cls {} buffer_size should be {}, but is {}", layers_names.cls, buffer_size, cls_buffer.size());
 
-        // Format is NHCW -> each row size is C size * W size
+        // Format is NHCW -> each row size is (padded C size) * (padded W size)
         auto cls_row_size = cls_padded_shape.features * cls_padded_shape.width;
 
         SrcType *reg_data = (SrcType*)reg_buffer.data();
         SrcType *cls_data = (SrcType*)cls_buffer.data();
 
-        for (uint32_t row = 0; row < cls_padded_shape.height; row++) {
-            for (uint32_t col = 0; col < cls_padded_shape.width; col++) {
+        for (uint32_t row = 0; row < cls_shape.height; row++) {
+            for (uint32_t col = 0; col < cls_shape.width; col++) {
                 auto cls_idx = (cls_row_size * row) + col;
 
                 if (nms_config.cross_classes) {
@@ -137,7 +127,7 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp
                         // If passes threshold - get the relevant bbox and add this detection
                         assert(contains(m_d_matrix, layers_names.reg));
                         auto &d_matrix = m_d_matrix.at(layers_names.reg);
-                        auto bbox = get_bbox<DstType, SrcType>(row, col, stride, reg_padded_shape, reg_quant_info,
+                        auto bbox = get_bbox<DstType, SrcType>(row, col, stride, reg_padded_shape, reg_shape, reg_quant_info,
                                                                 (SrcType*)reg_data, d_matrix, max_id_score_pair.second);
                         m_detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
                         m_classes_detections_count[max_id_score_pair.first]++;
@@ -153,7 +143,7 @@ class YOLOV8PostProcessOp : public NmsPostProcessOp
                             // If passes threshold - get the relevant bbox and add this detection
                             assert(contains(m_d_matrix, layers_names.reg));
                             auto &d_matrix = m_d_matrix.at(layers_names.reg);
-                            auto bbox = get_bbox<DstType, SrcType>(row, col, stride, reg_padded_shape, reg_quant_info, 
+                            auto bbox = get_bbox<DstType, SrcType>(row, col, stride, reg_padded_shape, reg_shape, reg_quant_info,
                                                                     (SrcType*)reg_data, d_matrix, class_confidence);
                             m_detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
                             m_classes_detections_count[curr_class_idx]++;
diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
index 8f67829b..98812cd7 100644
--- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
@@ -32,8 +32,8 @@ Expected<std::shared_ptr<OpMetadata>> YoloxOpMetadata::create(const std::unorder
 std::string YoloxOpMetadata::get_op_description()
 {
     auto nms_config_info = get_nms_config_description();
-    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:.2f}, Image width: {:.2f}",
-                        OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, m_yolox_config.image_height, m_yolox_config.image_width);
+    auto config_info = fmt::format("Op {}, Name: {}, {}, Image height: {:d}, Image width: {:d}",
+                        OpMetadata::get_operation_type_str(m_type), m_name, nms_config_info, static_cast<int>(m_yolox_config.image_height), static_cast<int>(m_yolox_config.image_width));
     return config_info;
 }
 
@@ -41,6 +41,8 @@ hailo_status YoloxOpMetadata::validate_params()
 {
     CHECK_SUCCESS(NmsOpMetadata::validate_params());
 
+    CHECK(!nms_config().bbox_only, HAILO_INVALID_ARGUMENT, "YOLOXPostProcessOp: bbox_only is not supported for YOLOX model");
+
     // Validate regs, clss and objs matching layers have same shape
     for (const auto &layer_names : m_yolox_config.input_names) {
         CHECK(contains(m_inputs_metadata, layer_names.reg), HAILO_INVALID_ARGUMENT,
@@ -106,7 +108,7 @@ hailo_status YOLOXPostProcessOp::execute(const std::map<std::string, MemoryView>
     
     clear_before_frame();
     for (const auto &layers_names_triplet : yolox_config.input_names) {
-        hailo_status status;
+        hailo_status status = HAILO_UNINITIALIZED;
         assert(contains(inputs, layers_names_triplet.cls));
         assert(contains(inputs, layers_names_triplet.obj));
         assert(contains(inputs, layers_names_triplet.reg));
diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
index 3850f157..64f74277 100644
--- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
@@ -12,63 +12,13 @@
 #define _HAILO_YOLOX_POST_PROCESS_HPP_
 
 #include "net_flow/ops/nms_post_process.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/yolox_op_metadata.hpp"
 
 namespace hailort
 {
 namespace net_flow
 {
 
-struct YoloxMatchingLayersNames
-{
-    // Regression layer
-    std::string reg;
-
-    // Objectness layer
-    std::string obj;
-
-    // Classifications layer
-    std::string cls;
-};
-
-struct YoloxPostProcessConfig
-{
-    // The image height.
-    float32_t image_height = 0;
-
-    // The image width.
-    float32_t image_width = 0;
-
-    // A vector off three strings that represents the relations between the outputs names.
-    std::vector<YoloxMatchingLayersNames> input_names;
-};
-
-class YoloxOpMetadata : public NmsOpMetadata
-{
-public:
-    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                                                        const NmsPostProcessConfig &nms_post_process_config,
-                                                        const YoloxPostProcessConfig &yolox_post_process_config,
-                                                        const std::string &network_name);
-    hailo_status validate_format_info() override;
-    std::string get_op_description() override;
-    YoloxPostProcessConfig &yolox_config() { return m_yolox_config;};
-
-private:
-    YoloxPostProcessConfig m_yolox_config;
-    YoloxOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
-                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
-                       const NmsPostProcessConfig &nms_post_process_config,
-                       const YoloxPostProcessConfig &yolox_post_process_config,
-                       const std::string &network_name)
-        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOX-Post-Process", network_name, OperationType::YOLOX)
-        , m_yolox_config(yolox_post_process_config)
-    {}
-
-    hailo_status validate_params() override;
-};
-
 class YOLOXPostProcessOp : public NmsPostProcessOp
 {
 public:
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp
new file mode 100644
index 00000000..185d04db
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp
@@ -0,0 +1,50 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file argmax_op_metadata.hpp
+ * @brief Argmax op metadata
+ *
+ **/
+
+#ifndef _HAILO_ARGMAX_OP_METADATA_HPP_
+#define _HAILO_ARGMAX_OP_METADATA_HPP_
+
+#include "net_flow/ops_metadata/op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+constexpr std::size_t ARGMAX_OUTPUT_FEATURES_SIZE {1};
+constexpr std::size_t ARGMAX_NUMBER_OF_SRCS {1};
+constexpr std::size_t ARGMAX_NUMBER_OF_DSTS {1};
+
+class ArgmaxOpMetadata : public OpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const std::string &network_name);
+    std::string get_op_description() override;
+    hailo_status validate_format_info() override;
+    static hailo_format_t expand_output_format_autos(const hailo_format_t &output_format, const hailo_format_t &input_format);
+
+    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
+
+private:
+    ArgmaxOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                        const std::string &network_name)
+        : OpMetadata(inputs_metadata, outputs_metadata, "Argmax-Post-Process", network_name, OperationType::ARGMAX)
+    {}
+
+    hailo_status validate_params() override;
+};
+
+} /* namespace net_flow */
+} /* namespace hailort */
+
+#endif /* _HAILO_ARGMAX_POST_PROCESS_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp
new file mode 100644
index 00000000..60971265
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp
@@ -0,0 +1,92 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file nms_op_metadata.hpp
+ * @brief NMS op metadata
+ *
+ **/
+
+#ifndef _HAILO_NET_FLOW_NMS_OP_METADATA_HPP_
+#define _HAILO_NET_FLOW_NMS_OP_METADATA_HPP_
+
+#include "net_flow/ops_metadata/op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+struct NmsPostProcessConfig
+{
+    // User given confidence threshold for a bbox. A bbox will be consider as detection if the
+    // (objectness * class_score) is higher then the confidence_threshold.
+    double nms_score_th = 0;
+
+    // User given IoU threshold (intersection over union). This threshold is for performing
+    // Non-maximum suppression (Removing overlapping boxes).
+    double nms_iou_th = 0;
+
+    // Maximum amount of bboxes per nms class.
+    uint32_t max_proposals_per_class = 0;
+
+    // The model's number of classes. (This depends on the dataset that the model trained on).
+    uint32_t number_of_classes = 0;
+
+    // Toggle background class removal from results
+    bool background_removal = false;
+
+    // Index of background class for background removal
+    uint32_t background_removal_index = 0;
+
+    // Indicates whether or not NMS performs IoU over different classes for the same box.
+    // If set to false - NMS won't intersect different classes, and a box could have multiple labels.
+    bool cross_classes = false;
+
+    // Indicates whether only the bbox decoding is being done
+    bool bbox_only = false;
+};
+
+static const float32_t REMOVED_CLASS_SCORE = 0.0f;
+
+class NmsOpMetadata : public OpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                    const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                    const NmsPostProcessConfig &nms_post_process_config,
+                                                    const std::string &network_name,
+                                                    const OperationType type,
+                                                    const std::string &name);
+    virtual ~NmsOpMetadata() = default;
+    std::string get_nms_config_description();
+    hailo_status validate_format_info() override;
+    NmsPostProcessConfig &nms_config() { return m_nms_config;};
+    hailo_nms_info_t nms_info();
+    std::string get_op_description() override;
+    static hailo_format_t expand_output_format_autos_by_op_type(const hailo_format_t &output_format, OperationType type, bool bbox_only);
+
+    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
+
+protected:
+    NmsOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                    const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                    const NmsPostProcessConfig &nms_post_process_config,
+                    const std::string &name,
+                    const std::string &network_name,
+                    const OperationType type)
+        : OpMetadata(inputs_metadata, outputs_metadata, name, network_name, type),
+            m_nms_config(nms_post_process_config)
+    {}
+
+    hailo_status validate_params() override;
+
+private:
+    NmsPostProcessConfig m_nms_config;
+};
+
+} /* namespace net_flow */
+} /* namespace hailort */
+
+#endif /* _HAILO_NET_FLOW_NMS_OP_METADATA_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/ops/op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp
similarity index 97%
rename from hailort/libhailort/src/net_flow/ops/op_metadata.hpp
rename to hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp
index d07a4440..44b8defb 100644
--- a/hailort/libhailort/src/net_flow/ops/op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp
@@ -112,7 +112,7 @@ class OpMetadata
     virtual hailo_status validate_params() = 0;
 };
 
-}
-}
+} /* namespace net_flow */
+} /* namespace hailort */
 
-#endif
\ No newline at end of file
+#endif /* _HAILO_OP_META_DATA_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp
new file mode 100644
index 00000000..20a77997
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp
@@ -0,0 +1,49 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file softmax_op_metadata.hpp
+ * @brief: Softmax op metadata
+ *
+ **/
+
+#ifndef _HAILO_SOFTMAX_OP_METADATA_HPP_
+#define _HAILO_SOFTMAX_OP_METADATA_HPP_
+
+#include "net_flow/ops_metadata/op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+constexpr std::size_t SOFTMAX_NUMBER_OF_SRCS {1};
+constexpr std::size_t SOFTMAX_NUMBER_OF_DSTS {1};
+
+class SoftmaxOpMetadata : public OpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const std::string &network_name);
+    std::string get_op_description() override;
+    hailo_status validate_format_info() override;
+    static hailo_format_t expand_output_format_autos(const hailo_format_t &output_format, const hailo_format_t &input_format);
+
+    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
+
+private:
+    SoftmaxOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                        const std::string &network_name)
+        : OpMetadata(inputs_metadata, outputs_metadata, "Softmax-Post-Process", network_name, OperationType::SOFTMAX)
+    {}
+
+    hailo_status validate_params() override;
+};
+
+} /* namespace net_flow */
+} /* namespace hailort */
+
+#endif /* _HAILO_SOFTMAX_OP_METADATA_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp
new file mode 100644
index 00000000..bb952357
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp
@@ -0,0 +1,80 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file ssd_op_metadata.hpp
+ * @brief SSD op metadata
+ *
+ **/
+
+#ifndef _HAILO_SSD_OP_METADATA_HPP_
+#define _HAILO_SSD_OP_METADATA_HPP_
+
+#include "net_flow/ops_metadata/op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+struct SSDPostProcessConfig
+{
+    // The image height.
+    float32_t image_height = 0;
+
+    // The image width.
+    float32_t image_width = 0;
+
+    uint32_t centers_scale_factor = 0;
+
+    uint32_t bbox_dimensions_scale_factor = 0;
+
+    uint32_t ty_index = 0;
+    uint32_t tx_index = 0;
+    uint32_t th_index = 0;
+    uint32_t tw_index = 0;
+
+    std::map<std::string, std::string> reg_to_cls_inputs;
+
+    // A vector of anchors, each element in the vector represents the anchors for a specific layer
+    // Each layer anchors vector is structured as {w,h} pairs.
+    // Each anchor is mapped by 2 keys:
+    //     1. reg input
+    //     2. cls input
+    std::map<std::string, std::vector<float32_t>> anchors;
+
+    // Indicates whether boxes should be normalized (and clipped)
+    bool normalize_boxes = false;
+};
+
+class SSDOpMetadata : public NmsOpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const NmsPostProcessConfig &nms_post_process_config,
+                                                        const SSDPostProcessConfig &ssd_post_process_config,
+                                                        const std::string &network_name);
+    std::string get_op_description() override;
+    hailo_status validate_format_info() override;
+    SSDPostProcessConfig &ssd_config() { return m_ssd_config;};
+
+private:
+    SSDPostProcessConfig m_ssd_config;
+    SSDOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                       const NmsPostProcessConfig &nms_post_process_config,
+                       const SSDPostProcessConfig &ssd_post_process_config,
+                       const std::string &network_name)
+        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "SSD-Post-Process", network_name, OperationType::SSD)
+        , m_ssd_config(ssd_post_process_config)
+    {}
+
+    hailo_status validate_params() override;
+};
+
+} /* namespace net_flow */
+} /* namespace hailort */
+
+#endif /* _HAILO_SSD_OP_METADATA_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp
new file mode 100644
index 00000000..191f6386
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp
@@ -0,0 +1,48 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolov5_bbox_only_op_metadata.hpp
+ * @brief YOLOv5 Bbox Only Post-Process op metadata
+ **/
+
+#ifndef _HAILO_YOLOV5_BBOX_ONLY_OP_METADATA_HPP_
+#define _HAILO_YOLOV5_BBOX_ONLY_OP_METADATA_HPP_
+
+#include "hailo/hailort.h"
+#include "net_flow/ops_metadata/yolov5_op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+class Yolov5BboxOnlyOpMetadata : public Yolov5OpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const NmsPostProcessConfig &nms_post_process_config,
+                                                        const YoloPostProcessConfig &yolov5_config,
+                                                        const std::string &network_name);
+    hailo_status validate_format_info() override;
+    std::string get_op_description() override;
+    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
+
+private:
+    Yolov5BboxOnlyOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                       const NmsPostProcessConfig &nms_post_process_config,
+                       const YoloPostProcessConfig &yolo_config,
+                       const std::string &network_name)
+        : Yolov5OpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOv5Bbox-Only-Post-Process",
+            network_name, yolo_config, OperationType::YOLOV5)
+    {}
+
+};
+
+} /* namespace hailort */
+} /* namespace net_flow */
+
+#endif /* _HAILO_YOLOV5_BBOX_ONLY_OP_METADATA_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp
similarity index 91%
rename from hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp
rename to hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp
index 145f84eb..caf70632 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp
@@ -10,7 +10,7 @@
 #ifndef _HAILO_YOLO_OP_METADATA_HPP_
 #define _HAILO_YOLO_OP_METADATA_HPP_
 
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/op_metadata.hpp"
 
 namespace hailort
 {
@@ -41,6 +41,7 @@ class Yolov5OpMetadata : public NmsOpMetadata
     std::string get_op_description() override;
     hailo_status validate_format_info() override;
     YoloPostProcessConfig &yolov5_config() { return m_yolov5_config;};
+    virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
 
 protected:
     Yolov5OpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
@@ -56,15 +57,14 @@ class Yolov5OpMetadata : public NmsOpMetadata
 
     hailo_status validate_params() override;
 
-private:
     YoloPostProcessConfig m_yolov5_config;
 
 };
 
-} // namespace net_flow
-} // namespace hailort
+} /* namespace net_flow */
+} /* namespace hailort */
 
-#endif // _HAILO_YOLOV5_OP_METADATA_HPP_
+#endif /* _HAILO_YOLOV5_OP_METADATA_HPP_ */
 
 
 
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp
similarity index 92%
rename from hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp
rename to hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp
index 60398356..39213a7e 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp
@@ -11,7 +11,7 @@
 #define _HAILO_YOLOV5_SEG_OP_METADATA_HPP_
 
 #include "hailo/hailort.h"
-#include "net_flow/ops/yolov5_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov5_op_metadata.hpp"
 
 namespace hailort
 {
@@ -22,6 +22,7 @@ struct YoloV5SegPostProcessConfig
 {
     // User given mask threshold. A pixel will consider part of the mask if it's value is higher then the mask_threshold.
     double mask_threshold;
+    uint32_t max_accumulated_mask_size;
     std::string proto_layer_name;
 };
 
@@ -38,6 +39,7 @@ class Yolov5SegOpMetadata : public Yolov5OpMetadata
     std::string get_op_description() override;
     YoloV5SegPostProcessConfig &yolov5seg_config() { return m_yolo_seg_config;};
     virtual Expected<hailo_vstream_info_t> get_output_vstream_info() override;
+    hailo_status validate_params() override;
 
 private:
     Yolov5SegOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
@@ -57,4 +59,4 @@ class Yolov5SegOpMetadata : public Yolov5OpMetadata
 } /* namespace hailort */
 } /* namespace net_flow */
 
-#endif /* _HAILO_YOLOV5_SEG_POST_PROCESS_HPP_ */
\ No newline at end of file
+#endif /* _HAILO_YOLOV5_SEG_POST_PROCESS_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp
new file mode 100644
index 00000000..078cbfe4
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp
@@ -0,0 +1,71 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolov8_op_metadata.hpp
+ * @brief YOLOV8 op metadata
+ **/
+#ifndef _HAILO_YOLOV8_OP_METADATA_HPP_
+#define _HAILO_YOLOV8_OP_METADATA_HPP_
+
+#include "net_flow/ops_metadata/nms_op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+    struct Yolov8MatchingLayersNames
+{
+    // Regression layer
+    std::string reg;
+
+    // Classifications layer
+    std::string cls;
+
+    uint32_t stride;
+};
+
+struct Yolov8PostProcessConfig
+{
+    // The image height.
+    float32_t image_height = 0;
+
+    // The image width.
+    float32_t image_width = 0;
+
+    // A vector off two strings that represents the relations between the outputs names.
+    std::vector<Yolov8MatchingLayersNames> reg_to_cls_inputs;
+};
+
+class Yolov8OpMetadata : public NmsOpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const NmsPostProcessConfig &nms_post_process_config,
+                                                        const Yolov8PostProcessConfig &yolov8_post_process_config,
+                                                        const std::string &network_name);
+    hailo_status validate_format_info() override;
+    std::string get_op_description() override;
+    Yolov8PostProcessConfig &yolov8_config() { return m_yolov8_config;};
+
+private:
+    Yolov8PostProcessConfig m_yolov8_config;
+    Yolov8OpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                       const NmsPostProcessConfig &nms_post_process_config,
+                       const Yolov8PostProcessConfig &yolov8_post_process_config,
+                       const std::string &network_name)
+        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOV8-Post-Process", network_name, OperationType::YOLOV8)
+        , m_yolov8_config(yolov8_post_process_config)
+    {}
+
+    hailo_status validate_params() override;
+};
+
+} /* namespace net_flow */
+} /* namespace hailort */
+
+#endif /* _HAILO_YOLOV8_OP_METADATA_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp
new file mode 100644
index 00000000..4b0ada23
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp
@@ -0,0 +1,72 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file yolox_op_metadata.hpp
+ * @brief YOLOX op metadata
+ **/
+#ifndef _HAILO_YOLOX_OP_METADATA_HPP_
+#define _HAILO_YOLOX_OP_METADATA_HPP_
+
+#include "net_flow/ops_metadata/op_metadata.hpp"
+
+namespace hailort
+{
+namespace net_flow
+{
+
+struct YoloxMatchingLayersNames
+{
+    // Regression layer
+    std::string reg;
+
+    // Objectness layer
+    std::string obj;
+
+    // Classifications layer
+    std::string cls;
+};
+
+struct YoloxPostProcessConfig
+{
+    // The image height.
+    float32_t image_height = 0;
+
+    // The image width.
+    float32_t image_width = 0;
+
+    // A vector off three strings that represents the relations between the outputs names.
+    std::vector<YoloxMatchingLayersNames> input_names;
+};
+
+class YoloxOpMetadata : public NmsOpMetadata
+{
+public:
+    static Expected<std::shared_ptr<OpMetadata>> create(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                                                        const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                                                        const NmsPostProcessConfig &nms_post_process_config,
+                                                        const YoloxPostProcessConfig &yolox_post_process_config,
+                                                        const std::string &network_name);
+    hailo_status validate_format_info() override;
+    std::string get_op_description() override;
+    YoloxPostProcessConfig &yolox_config() { return m_yolox_config;};
+
+private:
+    YoloxPostProcessConfig m_yolox_config;
+    YoloxOpMetadata(const std::unordered_map<std::string, BufferMetaData> &inputs_metadata,
+                       const std::unordered_map<std::string, BufferMetaData> &outputs_metadata,
+                       const NmsPostProcessConfig &nms_post_process_config,
+                       const YoloxPostProcessConfig &yolox_post_process_config,
+                       const std::string &network_name)
+        : NmsOpMetadata(inputs_metadata, outputs_metadata, nms_post_process_config, "YOLOX-Post-Process", network_name, OperationType::YOLOX)
+        , m_yolox_config(yolox_post_process_config)
+    {}
+
+    hailo_status validate_params() override;
+};
+
+} /* namespace hailort */
+} /* namespace net_flow */
+
+#endif /* _HAILO_YOLOX_SEG_OP_METADATA_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
index a1e63526..0b0d7015 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
@@ -12,11 +12,13 @@
 #include "common/utils.hpp"
 #include "common/os_utils.hpp"
 #include "hailo/event.hpp"
+#include "utils/dma_buffer_utils.hpp"
 #include "hailo/hailort_defaults.hpp"
 #include "hailo/hailort_common.hpp"
 #include "net_flow/pipeline/async_infer_runner.hpp"
+#include "net_flow/pipeline/infer_model_internal.hpp"
 #include "net_flow/pipeline/pipeline_internal.hpp"
-#include "net_flow/ops/op_metadata.hpp"
+#include "net_flow/ops_metadata/op_metadata.hpp"
 
 namespace hailort
 {
@@ -80,8 +82,8 @@ The flow in case of shutdown is:
 
 */
 {
-    if (HAILO_STREAM_ABORTED_BY_USER == error_status) {
-        LOGGER__INFO("Pipeline was aborted by user. Shutting it down");
+    if (HAILO_STREAM_ABORT == error_status) {
+        LOGGER__INFO("Pipeline was aborted. Shutting it down");
     } else {
         LOGGER__ERROR("Shutting down the pipeline with status {}", error_status);
     }
@@ -131,6 +133,11 @@ const ElementBuildParams AsyncPipeline::get_build_params()
     return m_build_params;
 }
 
+std::shared_ptr<std::atomic<hailo_status>> AsyncPipeline::get_pipeline_status()
+{
+    return m_build_params.pipeline_status;
+}
+
 void AsyncPipeline::set_as_multi_planar()
 {
     m_is_multi_planar = true;
@@ -148,7 +155,7 @@ Expected<std::shared_ptr<AsyncInferRunnerImpl>> AsyncInferRunnerImpl::create(std
     auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
     CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
 
-    auto async_pipeline_expected = PipelineBuilder::create_pipeline(net_group, inputs_formats, outputs_formats, timeout, pipeline_status);
+    auto async_pipeline_expected = AsyncPipelineBuilder::create_pipeline(net_group, inputs_formats, outputs_formats, timeout, pipeline_status);
     CHECK_EXPECTED(async_pipeline_expected);
 
     auto async_infer_runner_ptr = make_shared_nothrow<AsyncInferRunnerImpl>(async_pipeline_expected.release(), pipeline_status);
@@ -208,15 +215,16 @@ hailo_status AsyncInferRunnerImpl::start_pipeline()
 
 void AsyncInferRunnerImpl::abort()
 {
+    std::unique_lock<std::mutex> lock(m_mutex);
     m_is_aborted = true;
-    m_async_pipeline->shutdown(HAILO_STREAM_ABORTED_BY_USER);
+    m_async_pipeline->shutdown(HAILO_STREAM_ABORT);
     return;
 }
 
 Expected<bool> AsyncInferRunnerImpl::can_push_buffers()
 {
     for (auto &last_element : m_async_pipeline->get_last_elements()) {
-        auto can_push_buffer = last_element.second->can_push_buffer_upstream(last_element.first);
+        auto can_push_buffer = last_element.second->can_push_buffer_upstream();
         CHECK_EXPECTED(can_push_buffer);
         if (!can_push_buffer.release()) {
             return false;
@@ -224,7 +232,7 @@ Expected<bool> AsyncInferRunnerImpl::can_push_buffers()
     }
 
     for (auto &entry_element : m_async_pipeline->get_entry_elements()) {
-        auto can_push_buffer = entry_element.second->can_push_buffer_downstream(entry_element.first);
+        auto can_push_buffer = entry_element.second->can_push_buffer_downstream();
         CHECK_EXPECTED(can_push_buffer);
         if (!can_push_buffer.release()) {
             return false;
@@ -234,28 +242,170 @@ Expected<bool> AsyncInferRunnerImpl::can_push_buffers()
     return true;
 }
 
-hailo_status AsyncInferRunnerImpl::async_infer()
+hailo_status AsyncInferRunnerImpl::set_buffers(std::unordered_map<std::string, PipelineBuffer> &inputs,
+    std::unordered_map<std::string, std::pair<MemoryView, TransferDoneCallbackAsyncInfer>> &outputs)
+{
+    for (auto &last_element : m_async_pipeline->get_last_elements()) {
+        // TODO: handle the non-recoverable case where one buffer is enqueued successfully and the second isn't (HRT-11783)
+        auto status = last_element.second->enqueue_execution_buffer(outputs.at(last_element.first).first,
+            outputs.at(last_element.first).second);
+        CHECK_SUCCESS(status);
+    }
+
+    for (auto &entry_element : m_async_pipeline->get_entry_elements()) {
+        entry_element.second->sinks()[0].run_push_async(std::move(inputs.at(entry_element.first)));
+    }
+
+    return HAILO_SUCCESS;
+}
+
+void AsyncInferRunnerImpl::set_pix_buffer_inputs(std::unordered_map<std::string, PipelineBuffer> &inputs, hailo_pix_buffer_t userptr_pix_buffer,
+    TransferDoneCallbackAsyncInfer input_done, const std::string &input_name)
+{
+    if (1 == userptr_pix_buffer.number_of_planes) {
+        inputs[input_name] = PipelineBuffer(MemoryView(userptr_pix_buffer.planes[0].user_ptr, userptr_pix_buffer.planes[0].bytes_used), input_done);
+    } else if (m_async_pipeline->is_multi_planar()) {
+        // If model is multi-planar
+        inputs[input_name] = PipelineBuffer(userptr_pix_buffer, input_done);
+    } else {
+        // Other cases - return error, as on async flow we do not support copy to new buffer
+        LOGGER__ERROR("HEF was compiled for single input layer, while trying to pass non-contiguous planes buffers.");
+        inputs[input_name] = PipelineBuffer(HAILO_INVALID_OPERATION, input_done);
+    }
+
+}
+
+Expected<hailo_pix_buffer_t> AsyncInferRunnerImpl::convert_dma_pix_buffer_to_userptr_pix_buffer(const hailo_pix_buffer_t &dma_pix_buffer)
+{
+    hailo_pix_buffer_t userptr_pix_buffer;
+    userptr_pix_buffer.index = dma_pix_buffer.index;
+    userptr_pix_buffer.number_of_planes = dma_pix_buffer.number_of_planes;
+    userptr_pix_buffer.memory_type = HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR;
+
+    for (uint32_t i = 0; i < dma_pix_buffer.number_of_planes; i++ ) {
+        auto current_plane  = dma_pix_buffer.planes[i];
+        hailo_dma_buffer_t dma_buffer = {current_plane.fd, current_plane.bytes_used};
+
+        auto dma_buffer_memview_expected = DmaBufferUtils::mmap_dma_buffer_read(dma_buffer);
+        CHECK_EXPECTED_AS_STATUS(dma_buffer_memview_expected);
+        auto dma_buffer_memview = dma_buffer_memview_expected.release();
+
+        hailo_pix_buffer_plane_t new_plane;
+        new_plane.bytes_used = current_plane.bytes_used;
+        new_plane.plane_size = current_plane.plane_size;
+        new_plane.user_ptr = dma_buffer_memview.data();
+
+        userptr_pix_buffer.planes[i] = new_plane;
+    }
+
+    return userptr_pix_buffer;
+}
+
+hailo_status AsyncInferRunnerImpl::run(ConfiguredInferModel::Bindings &bindings, TransferDoneCallbackAsyncInfer transfer_done)
 {
-    hailo_status status = m_async_pipeline->get_build_params().pipeline_status->load();
+    std::unique_lock<std::mutex> lock(m_mutex);
+    hailo_status status = m_async_pipeline->get_pipeline_status()->load();
     CHECK_SUCCESS(status, "Can't handle infer request since Pipeline status is {}.", status);
 
-    auto pools_are_ready = can_push_buffers();
-    CHECK_EXPECTED_AS_STATUS(pools_are_ready);
-    CHECK(pools_are_ready.release(), HAILO_QUEUE_IS_FULL, "Can't handle infer request since a queue in the pipeline is full.");
+    TRY(auto are_pools_ready, can_push_buffers());
+    CHECK(are_pools_ready, HAILO_QUEUE_IS_FULL, "Can't handle infer request since a queue in the pipeline is full.");
 
+    std::unordered_map<std::string, std::pair<MemoryView, TransferDoneCallbackAsyncInfer>> outputs;
     for (auto &last_element : m_async_pipeline->get_last_elements()) {
-        assert(contains(m_output_buffers, last_element.first));
-        auto output_buffer = m_output_buffers.at(last_element.first);
-        auto read_done = m_read_dones.at(last_element.first);
-        // TODO: handle the non-recoverable case where one buffer is enqueued successfully and the second isn't (HRT-11783)
-        status = last_element.second->enqueue_execution_buffer(output_buffer, read_done);
-        CHECK_SUCCESS(status);
+        auto buff_type = bindings.output(last_element.first)->m_pimpl->get_type();
+        if (BufferType::DMA_BUFFER == buff_type) {
+            TRY(auto dma_buffer, bindings.output(last_element.first)->get_dma_buffer(), "Couldnt find output buffer for '{}'", last_element.first);
+
+            TRY(auto dma_buffer_memview, DmaBufferUtils::mmap_dma_buffer_write(dma_buffer));
+
+            auto output_done = [dma_buffer_memview, dma_buffer=dma_buffer, transfer_done](hailo_status status) {
+                auto mumap_status = DmaBufferUtils::munmap_dma_buffer_write(dma_buffer, dma_buffer_memview);
+                if (HAILO_SUCCESS != mumap_status) {
+                    LOGGER__ERROR("Failed to unmap dma buffer");
+                    status = HAILO_FILE_OPERATION_FAILURE;
+                }
+                transfer_done(status);
+            };
+            std::pair<MemoryView, TransferDoneCallbackAsyncInfer> buffer_cb_pair (dma_buffer_memview, output_done);
+            outputs[last_element.first] = buffer_cb_pair;
+
+        } else {
+            TRY(auto buffer, bindings.output(last_element.first)->get_buffer(), "Couldnt find output buffer for '{}'", last_element.first);
+
+            std::pair<MemoryView, TransferDoneCallbackAsyncInfer> buffer_cb_pair (buffer, transfer_done);
+            outputs[last_element.first] = buffer_cb_pair;
+        }
     }
 
+    std::unordered_map<std::string, PipelineBuffer> inputs;
     for (auto &entry_element : m_async_pipeline->get_entry_elements()) {
-        assert(contains(m_input_buffers, entry_element.first));
-        entry_element.second->sinks()[0].run_push_async(std::move(m_input_buffers.at(entry_element.first)));
+        auto buff_type = bindings.input(entry_element.first)->m_pimpl->get_type();
+
+        switch (buff_type) {
+        case BufferType::VIEW:
+        {
+            TRY(auto buffer, bindings.input(entry_element.first)->get_buffer(), "Couldnt find input buffer for '{}'", entry_element.first);
+            inputs[entry_element.first] = PipelineBuffer(buffer, transfer_done);
+            break;
+        }
+        case BufferType::DMA_BUFFER:
+        {
+            TRY(auto dma_buffer, bindings.input(entry_element.first)->get_dma_buffer(), "Couldnt find input buffer for '{}'", entry_element.first);
+
+            TRY(auto dma_buffer_memview, DmaBufferUtils::mmap_dma_buffer_read(dma_buffer));
+
+            auto input_done = [dma_buffer_memview, dma_buffer, transfer_done](hailo_status status) {
+                auto mumap_status = DmaBufferUtils::munmap_dma_buffer_read(dma_buffer, dma_buffer_memview);
+                if (HAILO_SUCCESS != mumap_status) {
+                    // Note: we overide the status even if it was not success before (but either way it's set to non-success)
+                    LOGGER__ERROR("Failed to unmap dma buffer");
+                    status = mumap_status;
+                }
+                transfer_done(status);
+            };
+            inputs[entry_element.first] = PipelineBuffer(dma_buffer_memview, input_done);
+            break;
+        }
+        case BufferType::PIX_BUFFER:
+        {
+            // TODO: handle a case in which the pix_buffer is DMA buffers (HRT-12771)
+            TRY(auto pix_buffer, bindings.input(entry_element.first)->get_pix_buffer(), "Couldnt find input buffer for '{}'", entry_element.first);
+
+            if (HAILO_PIX_BUFFER_MEMORY_TYPE_DMABUF == pix_buffer.memory_type) {
+                TRY(auto userptr_pix_buffer, convert_dma_pix_buffer_to_userptr_pix_buffer(pix_buffer));
+
+                auto input_done = [userptr_pix_buffer, transfer_done, dma_pix_buffer=pix_buffer](hailo_status status) {
+                    for (uint32_t i = 0; i < dma_pix_buffer.number_of_planes; i++ ) {
+                        auto plane_in_dma_buffer  = dma_pix_buffer.planes[i];
+                        hailo_dma_buffer_t dma_buffer = {plane_in_dma_buffer.fd, plane_in_dma_buffer.bytes_used};
+
+                        auto dma_buffer_memview = MemoryView(userptr_pix_buffer.planes[i].user_ptr, userptr_pix_buffer.planes[i].bytes_used);
+
+                        auto mumap_status = DmaBufferUtils::munmap_dma_buffer_read(dma_buffer, dma_buffer_memview);
+                        if (HAILO_SUCCESS != mumap_status) {
+                            // Note: we overide the status even if it was not success before (but either way it's set to non-success)
+                            LOGGER__ERROR("Failed to unmap dma buffer");
+                            status = mumap_status;
+                        }
+                    }
+                    transfer_done(status);
+                };
+
+                set_pix_buffer_inputs(inputs, userptr_pix_buffer, input_done, entry_element.first);
+            } else {
+                set_pix_buffer_inputs(inputs, pix_buffer, transfer_done, entry_element.first);
+            }
+            break;
+        }
+
+        default:
+            CHECK(false, HAILO_NOT_FOUND, "Couldnt find input buffer for '{}'", entry_element.first);
+        }
     }
+
+    status = set_buffers(inputs, outputs);
+    CHECK_SUCCESS(status);
+
     return HAILO_SUCCESS;
 }
 
@@ -284,32 +434,6 @@ std::unordered_map<std::string, std::shared_ptr<PipelineElement>> AsyncInferRunn
     return m_async_pipeline->get_last_elements();
 }
 
-void AsyncInferRunnerImpl::set_input(const std::string &input_name, MemoryView &&input_buffer, TransferDoneCallbackAsyncInfer &write_done)
-{
-    m_input_buffers[input_name] = PipelineBuffer(std::move(input_buffer), write_done);
-}
-
-void AsyncInferRunnerImpl::set_input(const std::string &input_name, hailo_pix_buffer_t input_buffer, TransferDoneCallbackAsyncInfer &write_done)
-{
-    // If only one plane is passed, address it as memview
-    if (1 == input_buffer.number_of_planes) {
-        m_input_buffers[input_name] = PipelineBuffer(MemoryView(input_buffer.planes[0].user_ptr, input_buffer.planes[0].bytes_used), write_done);
-    } else if (m_async_pipeline->is_multi_planar()) {
-        // If model is multi-planar
-        m_input_buffers[input_name] = PipelineBuffer(std::move(input_buffer), write_done);
-    } else {
-        // Other cases - return error, as on async flow we do not support copy to new buffer
-        LOGGER__ERROR("HEF was compiled for single input layer, while trying to pass non-contiguous planes buffers.");
-        m_input_buffers[input_name] = PipelineBuffer(HAILO_INVALID_OPERATION, write_done);
-    }
-}
-
-void AsyncInferRunnerImpl::set_output(const std::string &output_name, MemoryView &&output_buffer, TransferDoneCallbackAsyncInfer &read_done)
-{
-    m_output_buffers[output_name] = std::move(output_buffer);
-    m_read_dones[output_name] = read_done;
-}
-
 std::vector<std::shared_ptr<PipelineElement>> AsyncInferRunnerImpl::get_pipeline() const
 {
     return m_async_pipeline->get_pipeline();
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
index 3467677f..2d0db048 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
@@ -10,9 +10,10 @@
 #ifndef _HAILO_ASYNC_INFER_RUNNER_HPP_
 #define _HAILO_ASYNC_INFER_RUNNER_HPP_
 
+#include "hailo/infer_model.hpp"
 #include "network_group/network_group_internal.hpp"
 #include "net_flow/pipeline/pipeline.hpp"
-#include "net_flow/pipeline/pipeline_builder.hpp"
+#include "net_flow/pipeline/async_pipeline_builder.hpp"
 #include "net_flow/pipeline/vstream_internal.hpp"
 #include "net_flow/ops/op.hpp"
 
@@ -39,6 +40,8 @@ class AsyncPipeline
     const std::unordered_map<std::string, std::shared_ptr<PipelineElement>>& get_last_elements() const;
     const std::shared_ptr<AsyncHwElement> get_async_hw_element();
     const ElementBuildParams get_build_params();
+    std::shared_ptr<std::atomic<hailo_status>> get_pipeline_status();
+
     void set_as_multi_planar();
     bool is_multi_planar();
 
@@ -64,21 +67,18 @@ class AsyncInferRunnerImpl
     virtual ~AsyncInferRunnerImpl();
     AsyncInferRunnerImpl(std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
 
-    hailo_status async_infer();
+    hailo_status run(ConfiguredInferModel::Bindings &bindings, TransferDoneCallbackAsyncInfer transfer_done);
+    hailo_status set_buffers(std::unordered_map<std::string, PipelineBuffer> &inputs,
+        std::unordered_map<std::string, std::pair<MemoryView, TransferDoneCallbackAsyncInfer>> &outputs);
 
     void abort();
 
     Expected<bool> can_push_buffers();
 
-    // TODO: consider removing the methods below (needed for unit testing)
     void add_element_to_pipeline(std::shared_ptr<PipelineElement> pipeline_element);
     void add_entry_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &input_name);
     void add_last_element(std::shared_ptr<PipelineElement> pipeline_element, const std::string &output_name);
 
-    void set_input(const std::string &input_name, MemoryView &&input_buffer, TransferDoneCallbackAsyncInfer &write_done);
-    void set_input(const std::string &input_name, hailo_pix_buffer_t input_buffer, TransferDoneCallbackAsyncInfer &write_done);
-    void set_output(const std::string &output_name, MemoryView &&output_buffer, TransferDoneCallbackAsyncInfer &read_done);
-
     std::unordered_map<std::string, std::shared_ptr<PipelineElement>> get_entry_elements();
     std::unordered_map<std::string, std::shared_ptr<PipelineElement>> get_last_elements();
 
@@ -91,13 +91,15 @@ class AsyncInferRunnerImpl
     hailo_status start_pipeline();
     hailo_status stop_pipeline();
 
+    static Expected<hailo_pix_buffer_t> convert_dma_pix_buffer_to_userptr_pix_buffer(const hailo_pix_buffer_t &dma_pix_buffer);
+    void set_pix_buffer_inputs(std::unordered_map<std::string, PipelineBuffer> &inputs, hailo_pix_buffer_t userptr_pix_buffer,
+        TransferDoneCallbackAsyncInfer input_done, const std::string &input_name);
+
     std::shared_ptr<AsyncPipeline> m_async_pipeline;
-    std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
-    std::unordered_map<std::string, MemoryView> m_output_buffers;
-    std::unordered_map<std::string, TransferDoneCallbackAsyncInfer> m_read_dones;
     volatile bool m_is_activated;
     volatile bool m_is_aborted;
     std::shared_ptr<std::atomic<hailo_status>> m_pipeline_status;
+    std::mutex m_mutex;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
similarity index 74%
rename from hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp
rename to hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
index 6a6615c2..72f88a30 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
@@ -3,25 +3,26 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
- * @file pipeline_builder.cpp
+ * @file async_pipeline_builder.cpp
  * @brief Async pipeline builder impl
  **/
 
-#include "pipeline_builder.hpp"
+#include "async_pipeline_builder.hpp"
 #include "hailo/hailort.h"
 #include "net_flow/ops/yolov5_seg_post_process.hpp"
+#include "net_flow/ops/yolov5_bbox_only_post_process.hpp"
 #include "net_flow/ops/yolov8_post_process.hpp"
 #include "net_flow/ops/argmax_post_process.hpp"
 #include "net_flow/ops/softmax_post_process.hpp"
 #include "net_flow/ops/yolox_post_process.hpp"
 #include "net_flow/ops/ssd_post_process.hpp"
-
+#include "net_flow/pipeline/vstream_builder.hpp"
 #include <algorithm>
 
 namespace hailort
 {
 
-Expected<std::unordered_map<std::string, hailo_format_t>> PipelineBuilder::expand_auto_input_formats(std::shared_ptr<ConfiguredNetworkGroup>net_group,
+Expected<std::unordered_map<std::string, hailo_format_t>> AsyncPipelineBuilder::expand_auto_input_formats(std::shared_ptr<ConfiguredNetworkGroup>net_group,
     const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
     std::unordered_map<std::string, hailo_format_t> expanded_input_format;
@@ -52,7 +53,7 @@ Expected<std::unordered_map<std::string, hailo_format_t>> PipelineBuilder::expan
     return expanded_input_format;
 }
 
-Expected<std::unordered_map<std::string, hailo_format_t>> PipelineBuilder::expand_auto_output_formats(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+Expected<std::unordered_map<std::string, hailo_format_t>> AsyncPipelineBuilder::expand_auto_output_formats(std::shared_ptr<ConfiguredNetworkGroup> net_group,
     const std::unordered_map<std::string, hailo_format_t> &outputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
     std::unordered_map<std::string, hailo_format_t> expanded_output_format;
@@ -71,7 +72,7 @@ Expected<std::unordered_map<std::string, hailo_format_t>> PipelineBuilder::expan
     return expanded_output_format;
 }
 
-hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+hailo_status AsyncPipelineBuilder::create_pre_async_hw_elements_per_input(std::shared_ptr<ConfiguredNetworkGroup> net_group,
     const std::vector<std::string> &stream_names, const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
     const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
@@ -82,8 +83,11 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared
     std::shared_ptr<PixBufferElement> multi_plane_splitter = nullptr;
     std::shared_ptr<PipelineElement> last_element_connected_to_pipeline = nullptr;
 
-    auto entry_queue_elem_expected = add_push_queue_element(PipelineObject::create_element_name("EntryPushQueueElement", vstream_name, 0),
-        async_pipeline, nullptr, 0);
+    auto is_empty = true;
+    auto interacts_with_hw = true; // We want the entry queue size to be the size of queues interacts with HW
+    auto is_entry = true;
+    auto entry_queue_elem_expected = add_push_queue_element(PipelineObject::create_element_name("EntryPushQEl", vstream_name, 0),
+        async_pipeline, 0, is_empty, interacts_with_hw, nullptr, 0, is_entry);
     CHECK_EXPECTED_AS_STATUS(entry_queue_elem_expected);
     auto entry_queue_elem = entry_queue_elem_expected.release();
     async_pipeline->add_entry_element(entry_queue_elem, vstream_name);
@@ -114,9 +118,11 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared
         auto sink_index = static_cast<uint8_t>(sink_index_expected.release());
 
         if(is_multi_planar) {
+            is_empty = true;
+            interacts_with_hw = false;
             auto post_split_push_queue = add_push_queue_element(
-                PipelineObject::create_element_name("PostSplitPushQueue", stream_name, sink_index),
-                async_pipeline, nullptr);
+                PipelineObject::create_element_name("PostSplitPushQEl", stream_name, sink_index),
+                async_pipeline, 0, is_empty, interacts_with_hw, nullptr);
             CHECK_EXPECTED_AS_STATUS(post_split_push_queue);
             CHECK_SUCCESS(PipelinePad::link_pads(multi_plane_splitter, post_split_push_queue.value(), plane_index++));
 
@@ -133,17 +139,18 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared
         CHECK_EXPECTED_AS_STATUS(should_transform);
 
         if (should_transform.value()) {
-            bool is_dma_able = true;
             auto pre_infer_elem = PreInferElement::create(input_stream_info.shape, src_format,
                 input_stream_info.hw_shape, input_stream_info.format, { input_stream_info.quant_info },
-                PipelineObject::create_element_name("PreInferElement", stream_name, input_stream_info.index),
-                async_pipeline->get_build_params(), PipelineDirection::PUSH, is_dma_able, async_pipeline);
+                PipelineObject::create_element_name("PreInferEl", stream_name, input_stream_info.index),
+                async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
             CHECK_EXPECTED_AS_STATUS(pre_infer_elem);
             async_pipeline->add_element_to_pipeline(pre_infer_elem.value());
             CHECK_SUCCESS(PipelinePad::link_pads(last_element_connected_to_pipeline, pre_infer_elem.value()));
 
-            auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement", stream_name, input_stream_info.index),
-                async_pipeline, pre_infer_elem.value());
+            is_empty = false;
+            interacts_with_hw = true;
+            auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl", stream_name, input_stream_info.index),
+                async_pipeline, input_stream_info.hw_frame_size, is_empty, interacts_with_hw, pre_infer_elem.value());
             CHECK_EXPECTED_AS_STATUS(queue_elem);
             CHECK_SUCCESS(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value()));
             CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), async_pipeline->get_async_hw_element(), 0, sink_index));
@@ -155,7 +162,7 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements_per_input(std::shared
     return HAILO_SUCCESS;
 }
 
-hailo_status PipelineBuilder::create_pre_async_hw_elements(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+hailo_status AsyncPipelineBuilder::create_pre_async_hw_elements(std::shared_ptr<ConfiguredNetworkGroup> net_group,
     const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
     std::shared_ptr<AsyncPipeline> async_pipeline)
 {
@@ -170,19 +177,24 @@ hailo_status PipelineBuilder::create_pre_async_hw_elements(std::shared_ptr<Confi
     return HAILO_SUCCESS;
 }
 
-Expected<std::shared_ptr<PostInferElement>> PipelineBuilder::add_post_infer_element(const hailo_format_t &output_format,
+Expected<std::shared_ptr<PostInferElement>> AsyncPipelineBuilder::add_post_infer_element(const hailo_format_t &output_format,
     const hailo_nms_info_t &nms_info, std::shared_ptr<AsyncPipeline> async_pipeline, const hailo_3d_image_shape_t &src_image_shape,
     const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos,
-    bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
+    std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
 {
-    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement", final_elem->name(), static_cast<uint8_t>(final_elem_source_index)),
-        async_pipeline, final_elem, final_elem_source_index);
+    auto pre_transform_frame_size = (HailoRTCommon::is_nms(src_format.order)) ?
+        HailoRTCommon::get_nms_hw_frame_size(nms_info) : HailoRTCommon::get_periph_frame_size(src_image_shape, src_format);
+    auto is_empty = false;
+    auto interacts_with_hw = true;
+    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl", final_elem->name(),
+        static_cast<uint8_t>(final_elem_source_index)), async_pipeline, pre_transform_frame_size, is_empty, interacts_with_hw,
+        final_elem, final_elem_source_index);
     CHECK_EXPECTED(queue_elem);
 
     auto post_infer_elem = PostInferElement::create(src_image_shape, src_format, dst_image_shape, output_format,
-        dst_quant_infos, nms_info, PipelineObject::create_element_name("PostInferElement",
+        dst_quant_infos, nms_info, PipelineObject::create_element_name("PostInferEl",
         final_elem->name(), static_cast<uint8_t>(final_elem_source_index)), async_pipeline->get_build_params(),
-        PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+        PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED(post_infer_elem);
 
     async_pipeline->add_element_to_pipeline(post_infer_elem.value());
@@ -191,10 +203,11 @@ Expected<std::shared_ptr<PostInferElement>> PipelineBuilder::add_post_infer_elem
     return post_infer_elem.release();
 }
 
-Expected<std::shared_ptr<AsyncPushQueueElement>> PipelineBuilder::add_push_queue_element(const std::string &queue_name, std::shared_ptr<AsyncPipeline> async_pipeline,
-    std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
+Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPipelineBuilder::add_push_queue_element(const std::string &queue_name, std::shared_ptr<AsyncPipeline> async_pipeline,
+    size_t frame_size, bool is_empty, bool interacts_with_hw, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index, bool is_entry)
 {
-    auto push_queue_elem = AsyncPushQueueElement::create(queue_name, async_pipeline->get_build_params(), async_pipeline, PipelineDirection::PUSH);
+    auto push_queue_elem = AsyncPushQueueElement::create(queue_name, async_pipeline->get_build_params(), frame_size,
+        is_empty, interacts_with_hw, async_pipeline, is_entry);
     CHECK_EXPECTED(push_queue_elem);
 
     async_pipeline->add_element_to_pipeline(push_queue_elem.value());
@@ -207,16 +220,16 @@ Expected<std::shared_ptr<AsyncPushQueueElement>> PipelineBuilder::add_push_queue
     return push_queue_elem.release();
 }
 
-Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> PipelineBuilder::add_nms_to_detections_convert_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> AsyncPipelineBuilder::add_nms_to_detections_convert_element(std::shared_ptr<AsyncPipeline> async_pipeline,
     const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
+    std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
 {
     auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
     assert(nullptr != metadata);
 
     auto nms_to_detections_element = ConvertNmsToDetectionsElement::create(metadata->nms_info(),
         PipelineObject::create_element_name(element_name, output_stream_name, stream_index),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED(nms_to_detections_element);
 
     async_pipeline->add_element_to_pipeline(nms_to_detections_element.value());
@@ -225,16 +238,16 @@ Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> PipelineBuilder::add_nm
     return nms_to_detections_element.release();
 }
 
-Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> PipelineBuilder::add_remove_overlapping_bboxes_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> AsyncPipelineBuilder::add_remove_overlapping_bboxes_element(std::shared_ptr<AsyncPipeline> async_pipeline,
     const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
+    std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
 {
     auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
     assert(nullptr != metadata);
 
     auto remove_overlapping_bboxes_element = RemoveOverlappingBboxesElement::create(metadata->nms_config(),
         PipelineObject::create_element_name(element_name, output_stream_name, stream_index),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED(remove_overlapping_bboxes_element);
 
     async_pipeline->add_element_to_pipeline(remove_overlapping_bboxes_element.value());
@@ -243,16 +256,16 @@ Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> PipelineBuilder::add_r
     return remove_overlapping_bboxes_element;
 }
 
-Expected<std::shared_ptr<FillNmsFormatElement>> PipelineBuilder::add_fill_nms_format_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+Expected<std::shared_ptr<FillNmsFormatElement>> AsyncPipelineBuilder::add_fill_nms_format_element(std::shared_ptr<AsyncPipeline> async_pipeline,
     const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    const hailo_format_t &output_format, const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
+    std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
 {
     auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
     assert(nullptr != metadata);
 
-    auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_info(), output_format, metadata->nms_config(),
+    auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_config(),
         PipelineObject::create_element_name(element_name, output_stream_name, stream_index),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED(fill_nms_format_element);
 
     async_pipeline->add_element_to_pipeline(fill_nms_format_element.value());
@@ -261,11 +274,11 @@ Expected<std::shared_ptr<FillNmsFormatElement>> PipelineBuilder::add_fill_nms_fo
     return fill_nms_format_element;
 }
 
-Expected<std::shared_ptr<LastAsyncElement>> PipelineBuilder::add_last_async_element(std::shared_ptr<AsyncPipeline> async_pipeline,
-    const std::string &output_format_name, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
+Expected<std::shared_ptr<LastAsyncElement>> AsyncPipelineBuilder::add_last_async_element(std::shared_ptr<AsyncPipeline> async_pipeline,
+    const std::string &output_format_name, size_t frame_size, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index)
 {
-    auto last_async_element = LastAsyncElement::create(PipelineObject::create_element_name("LastAsyncElement",
-        final_elem->name(), static_cast<uint8_t>(final_elem_source_index)), async_pipeline->get_build_params(), async_pipeline);
+    auto last_async_element = LastAsyncElement::create(PipelineObject::create_element_name("LastAsyncEl",
+        final_elem->name(), static_cast<uint8_t>(final_elem_source_index)), async_pipeline->get_build_params(), frame_size, async_pipeline);
     CHECK_EXPECTED(last_async_element);
 
     async_pipeline->add_element_to_pipeline(last_async_element.value());
@@ -276,7 +289,7 @@ Expected<std::shared_ptr<LastAsyncElement>> PipelineBuilder::add_last_async_elem
     return last_async_element.release();
 }
 
-Expected<std::pair<std::string, hailo_format_t>> PipelineBuilder::get_output_format_from_edge_info_name(const std::string &edge_info_name,
+Expected<std::pair<std::string, hailo_format_t>> AsyncPipelineBuilder::get_output_format_from_edge_info_name(const std::string &edge_info_name,
     const std::unordered_map<std::string, hailo_format_t> &outputs_formats)
 {
     for (auto &output_format : outputs_formats) {
@@ -287,19 +300,21 @@ Expected<std::pair<std::string, hailo_format_t>> PipelineBuilder::get_output_for
     return make_unexpected(HAILO_NOT_FOUND);
 }
 
-hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_stream_name, std::shared_ptr<AsyncPipeline> async_pipeline,
+hailo_status AsyncPipelineBuilder::add_output_demux_flow(const std::string &output_stream_name, std::shared_ptr<AsyncPipeline> async_pipeline,
     const std::unordered_map<std::string, hailo_format_t> &outputs_formats, std::shared_ptr<ConfiguredNetworkGroup> net_group,
     const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
+    CHECK(contains(named_stream_infos, output_stream_name), HAILO_INTERNAL_FAILURE);
+    const auto &stream_info = named_stream_infos.at(output_stream_name);
+
     auto source_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_name);
     CHECK_EXPECTED_AS_STATUS(source_index);
-    const bool is_dma_able_hw_async = true;
-    // async_hw element must be filled with buffers like an edge element in order to support large batch sizes
-    auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *source_index);
-    CHECK_SUCCESS(status);
 
-    CHECK(contains(named_stream_infos, output_stream_name), HAILO_INTERNAL_FAILURE);
-    const auto &stream_info = named_stream_infos.at(output_stream_name);
+    auto is_empty = false;
+    auto interacts_with_hw = true;
+    auto hw_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_post_hw", stream_info.name, stream_info.index),
+        async_pipeline, stream_info.hw_frame_size, is_empty, interacts_with_hw, async_pipeline->get_async_hw_element(), *source_index);
+    CHECK_EXPECTED_AS_STATUS(hw_queue_elem);
 
     auto layer_info = net_group->get_layer_info(output_stream_name);
     CHECK_EXPECTED_AS_STATUS(layer_info);
@@ -311,20 +326,22 @@ hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_st
     CHECK_ARG_NOT_NULL(demuxer_ptr);
 
     auto demux_elem = TransformDemuxElement::create(demuxer_ptr,
-        PipelineObject::create_element_name("TransformDemuxElement", output_stream_name, stream_info.index),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline);
+        PipelineObject::create_element_name("TransformDemuxEl", output_stream_name, stream_info.index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED_AS_STATUS(demux_elem);
     async_pipeline->add_element_to_pipeline(demux_elem.value());
 
-    CHECK_SUCCESS(PipelinePad::link_pads(async_pipeline->get_async_hw_element(), demux_elem.value(), *source_index, 0));
+    CHECK_SUCCESS(PipelinePad::link_pads(hw_queue_elem.value(), demux_elem.value()));
 
     uint8_t i = 0;
     for (auto &edge_info : demuxer_ptr->get_edges_stream_info()) {
         auto output_format_expected = get_output_format_from_edge_info_name(edge_info.name, outputs_formats);
         CHECK_EXPECTED_AS_STATUS(output_format_expected);
 
-        auto demux_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_demux", edge_info.name, i), async_pipeline,
-            demux_elem.value(), i);
+        is_empty = false;
+        interacts_with_hw = false;
+        auto demux_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_demux", edge_info.name, i), async_pipeline,
+            edge_info.hw_frame_size, is_empty, interacts_with_hw, demux_elem.value(), i);
         CHECK_EXPECTED_AS_STATUS(demux_queue_elem);
 
         auto should_transform = OutputTransformContext::is_transformation_required(edge_info.hw_shape, 
@@ -332,17 +349,20 @@ hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_st
         CHECK_EXPECTED_AS_STATUS(should_transform);
 
         if (should_transform.value()) {
-            status = demux_elem.value()->fill_buffer_pool(false, async_pipeline->get_build_params().buffer_pool_size_internal, i);
-            CHECK_SUCCESS(status);
-
             auto post_infer_elem = add_post_infer_element(output_format_expected.value().second, edge_info.nms_info,
-                async_pipeline, edge_info.hw_shape, edge_info.format, edge_info.shape, {edge_info.quant_info}, true, demux_queue_elem.value());
+                async_pipeline, edge_info.hw_shape, edge_info.format, edge_info.shape, {edge_info.quant_info}, demux_queue_elem.value());
             CHECK_EXPECTED_AS_STATUS(post_infer_elem);
 
-            auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, post_infer_elem.value());
+            auto post_transform_frame_size = (HailoRTCommon::is_nms(edge_info.format.order)) ?
+                HailoRTCommon::get_nms_host_frame_size(edge_info.nms_info, output_format_expected.value().second) :
+                HailoRTCommon::get_frame_size(edge_info.shape, output_format_expected.value().second);
+
+            auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, post_transform_frame_size,
+                post_infer_elem.value());
             CHECK_EXPECTED_AS_STATUS(last_async_element);
         } else {
-            auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, demux_queue_elem.value());
+            auto last_async_element = add_last_async_element(async_pipeline, output_format_expected.value().first, edge_info.hw_frame_size,
+                demux_queue_elem.value());
             CHECK_EXPECTED_AS_STATUS(last_async_element);
         }
         i++;
@@ -350,7 +370,7 @@ hailo_status PipelineBuilder::add_output_demux_flow(const std::string &output_st
     return HAILO_SUCCESS;
 }
 
-Expected<bool> PipelineBuilder::should_transform(const hailo_stream_info_t &stream_info, const std::vector<hailo_quant_info_t> &stream_quant_infos, 
+Expected<bool> AsyncPipelineBuilder::should_transform(const hailo_stream_info_t &stream_info, const std::vector<hailo_quant_info_t> &stream_quant_infos, 
     const hailo_format_t &output_format)
 {
     auto should_transform = OutputTransformContext::is_transformation_required(stream_info.hw_shape,
@@ -359,19 +379,10 @@ Expected<bool> PipelineBuilder::should_transform(const hailo_stream_info_t &stre
     return should_transform.release();
 }
 
-hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector<std::string> &output_streams_names,
+hailo_status AsyncPipelineBuilder::add_nms_fuse_flow(const std::vector<std::string> &output_streams_names,
     const std::pair<std::string, hailo_format_t> &output_format, std::shared_ptr<AsyncPipeline> async_pipeline,
     const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
-    const bool is_dma_able_hw_async = true;
-    for (const auto &stream_name : output_streams_names) {
-        auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
-        CHECK_EXPECTED_AS_STATUS(output_index);
-        // async_hw element must be filled with buffers like an edge element in order to support large batch sizes
-        auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *output_index);
-        CHECK_SUCCESS(status);
-    }
-
     std::vector<hailo_nms_info_t> nms_infos;
     nms_infos.reserve(output_streams_names.size());
     hailo_stream_info_t first_defused_stream_info = {};
@@ -388,9 +399,8 @@ hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector<std::string> &
     // To get the fused layer name and src stream format, we use the stream info of one of the defuses
     auto fused_layer_name = first_defused_stream_info.nms_info.defuse_info.original_name;
 
-    bool is_last_copy_element = true;
-    auto nms_elem = NmsMuxElement::create(nms_infos, PipelineObject::create_element_name("NmsMuxElement", fused_layer_name, 0),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+    auto nms_elem = NmsMuxElement::create(nms_infos, PipelineObject::create_element_name("NmsMuxEl", fused_layer_name, 0),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED_AS_STATUS(nms_elem);
 
     async_pipeline->add_element_to_pipeline(nms_elem.value());
@@ -403,8 +413,11 @@ hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector<std::string> &
         auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
         CHECK_EXPECTED_AS_STATUS(output_index);
 
-        auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index),
-            async_pipeline, async_pipeline->get_async_hw_element(), output_index.value());
+        auto is_empty = false;
+        auto interacts_with_hw = true;
+        auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_nms", curr_stream_info.name, curr_stream_info.index),
+            async_pipeline, curr_stream_info.hw_frame_size, is_empty, interacts_with_hw,
+            async_pipeline->get_async_hw_element(), output_index.value());
         CHECK_EXPECTED_AS_STATUS(queue_elem);
 
         CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), nms_elem.value(), 0, i));
@@ -414,30 +427,23 @@ hailo_status PipelineBuilder::add_nms_fuse_flow(const std::vector<std::string> &
     // TODO(HRT-11078): Fix multi qp for fused NMS
     auto stream_quant_infos = std::vector<hailo_quant_info_t>(1, first_defused_stream_info.quant_info);
 
-    auto should_transform_expected = should_transform(first_defused_stream_info, stream_quant_infos, output_format.second);
-    CHECK_EXPECTED_AS_STATUS(should_transform_expected);
-
-    if (should_transform_expected.value()) {
-        auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info();
+    // On NMS models we always need tp post-infer
+    auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info();
 
-        hailo_status status = nms_elem.value()->fill_buffer_pool(false, async_pipeline->get_build_params().buffer_pool_size_internal, first_defused_stream_info.name);
-        CHECK_SUCCESS(status);
+    auto post_infer_elem = add_post_infer_element(output_format.second, fused_layer_nms_info, async_pipeline,
+        first_defused_stream_info.hw_shape, first_defused_stream_info.format, first_defused_stream_info.shape, stream_quant_infos, nms_elem.value());
+    CHECK_EXPECTED_AS_STATUS(post_infer_elem);
 
-        auto post_infer_elem = add_post_infer_element(output_format.second, fused_layer_nms_info, async_pipeline,
-            first_defused_stream_info.hw_shape, first_defused_stream_info.format, first_defused_stream_info.shape, stream_quant_infos, true, nms_elem.value());
-        CHECK_EXPECTED_AS_STATUS(post_infer_elem);
+    auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(fused_layer_nms_info, output_format.second);
 
-        auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_infer_elem.value());
-        CHECK_EXPECTED_AS_STATUS(last_async_element);
-    } else {
-        auto last_async_element = add_last_async_element(async_pipeline, output_format.first, nms_elem.value());
-        CHECK_EXPECTED_AS_STATUS(last_async_element);
-    }
+    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size,
+        post_infer_elem.value());
+    CHECK_EXPECTED_AS_STATUS(last_async_element);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status PipelineBuilder::add_softmax_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+hailo_status AsyncPipelineBuilder::add_softmax_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
     const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata,
     const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
@@ -459,11 +465,14 @@ hailo_status PipelineBuilder::add_softmax_flow(std::shared_ptr<AsyncPipeline> as
     auto stream_quant_infos = std::vector<hailo_quant_info_t>(1, stream_info.quant_info);
 
     auto post_infer_elem = add_post_infer_element(output_format_expanded, {}, async_pipeline, stream_info.hw_shape, stream_info.format,
-        stream_info.shape, stream_quant_infos, false, async_pipeline->get_async_hw_element(), hw_async_elem_index.value());
+        stream_info.shape, stream_quant_infos, async_pipeline->get_async_hw_element(), hw_async_elem_index.value());
     CHECK_EXPECTED_AS_STATUS(post_infer_elem);
 
-    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_softmax", async_pipeline->get_async_hw_element()->name(),
-        static_cast<uint8_t>(hw_async_elem_index.value())), async_pipeline, post_infer_elem.value());
+    auto is_empty = false;
+    auto interacts_with_hw = false;
+    const auto post_transform_frame_size = HailoRTCommon::get_frame_size(stream_info.shape, output_format_expanded);
+    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_softmax", async_pipeline->get_async_hw_element()->name(),
+        static_cast<uint8_t>(hw_async_elem_index.value())), async_pipeline, post_transform_frame_size, is_empty, interacts_with_hw, post_infer_elem.value());
     CHECK_EXPECTED_AS_STATUS(queue_elem);
 
     // Updating metadata according to user request
@@ -483,20 +492,21 @@ hailo_status PipelineBuilder::add_softmax_flow(std::shared_ptr<AsyncPipeline> as
 
     auto softmax_op = op_expected.release();
     auto softmax_element = SoftmaxPostProcessElement::create(softmax_op,
-        PipelineObject::create_element_name("SoftmaxPostProcessElement", stream_name, stream_info.index),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline);
+        PipelineObject::create_element_name("SoftmaxPPEl", stream_name, stream_info.index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED_AS_STATUS(softmax_element);
 
     async_pipeline->add_element_to_pipeline(softmax_element.value());
     CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), softmax_element.value()));
 
-    auto last_async_element = add_last_async_element(async_pipeline, updated_output_format.first, softmax_element.value());
+    auto last_async_element = add_last_async_element(async_pipeline, updated_output_format.first, post_transform_frame_size,
+        softmax_element.value());
     CHECK_EXPECTED_AS_STATUS(last_async_element);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status PipelineBuilder::add_argmax_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+hailo_status AsyncPipelineBuilder::add_argmax_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
     const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata,
     const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
@@ -509,14 +519,17 @@ hailo_status PipelineBuilder::add_argmax_flow(std::shared_ptr<AsyncPipeline> asy
     auto hw_async_elem_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
     CHECK_EXPECTED_AS_STATUS(hw_async_elem_index);
 
-    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_argmax", async_pipeline->get_async_hw_element()->name(),
-        static_cast<uint8_t>(hw_async_elem_index.value())), async_pipeline, async_pipeline->get_async_hw_element());
+    auto is_empty = false;
+    auto interacts_with_hw = true;
+    auto queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_argmax", async_pipeline->get_async_hw_element()->name(),
+        static_cast<uint8_t>(hw_async_elem_index.value())), async_pipeline, stream_info.hw_frame_size, is_empty, interacts_with_hw,
+        async_pipeline->get_async_hw_element(), hw_async_elem_index.value());
     CHECK_EXPECTED_AS_STATUS(queue_elem);
 
     // Updating metadata according to user request
     auto op_input_format = argmax_op_metadata->inputs_metadata().begin()->second.format;
     auto updated_outputs_metadata = argmax_op_metadata.get()->outputs_metadata();
-    updated_outputs_metadata.begin()->second.format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(output_format.second, op_input_format);;
+    updated_outputs_metadata.begin()->second.format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(output_format.second, op_input_format);
     auto metadata = std::dynamic_pointer_cast<net_flow::ArgmaxOpMetadata>(argmax_op_metadata);
     assert(nullptr != metadata);
     metadata->set_outputs_metadata(updated_outputs_metadata);
@@ -525,23 +538,26 @@ hailo_status PipelineBuilder::add_argmax_flow(std::shared_ptr<AsyncPipeline> asy
     auto op_expected = net_flow::ArgmaxPostProcessOp::create(metadata);
     CHECK_EXPECTED_AS_STATUS(op_expected);
     auto argmax_op = op_expected.release();
-    bool is_last_copy_element = true;
 
     auto argmax_element = ArgmaxPostProcessElement::create(argmax_op,
-        PipelineObject::create_element_name("ArgmaxPostProcessElement", stream_name, stream_info.index),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+        PipelineObject::create_element_name("ArgmaxPPEl", stream_name, stream_info.index),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED_AS_STATUS(argmax_element);
 
     async_pipeline->add_element_to_pipeline(argmax_element.value());
     CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), argmax_element.value()));
 
-    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, argmax_element.value());
+    const auto post_transform_frame_size = HailoRTCommon::get_frame_size(updated_outputs_metadata.begin()->second.shape,
+        updated_outputs_metadata.begin()->second.format);
+
+    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size,
+        argmax_element.value());
     CHECK_EXPECTED_AS_STATUS(last_async_element);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+hailo_status AsyncPipelineBuilder::add_nms_flow(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
     const std::pair<std::string, hailo_format_t> &output_format, const std::shared_ptr<hailort::net_flow::Op> &nms_op,
     const hailo_vstream_info_t &vstream_info, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
@@ -549,10 +565,15 @@ hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr<AsyncPipeline> async_
     CHECK(contains(named_stream_infos, first_stream_name), HAILO_INTERNAL_FAILURE);
     const auto &first_stream_info = named_stream_infos.at(first_stream_name);
 
+    auto nms_op_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(nms_op->metadata());
+    assert(nullptr != nms_op_metadata);
+
     CHECK(output_format.second.type == HAILO_FORMAT_TYPE_FLOAT32, HAILO_INVALID_ARGUMENT,
         "NMS output format type must be HAILO_FORMAT_TYPE_FLOAT32");
-    CHECK(HailoRTCommon::is_nms(output_format.second.order), HAILO_INVALID_ARGUMENT,
-        "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK");
+    if(!nms_op_metadata->nms_config().bbox_only){
+        CHECK(HailoRTCommon::is_nms(output_format.second.order), HAILO_INVALID_ARGUMENT,
+            "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK");
+    }
 
     std::unordered_map<std::string, net_flow::BufferMetaData> inputs_metadata;
     std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
@@ -580,8 +601,8 @@ hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr<AsyncPipeline> async_
     };
     outputs_metadata.insert({nms_op->outputs_metadata().begin()->first, output_metadata});
 
-    auto nms_elem = NmsPostProcessMuxElement::create(nms_op, PipelineObject::create_element_name("NmsPostProcessMuxElement", nms_op->get_name(), 0),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, true, async_pipeline);
+    auto nms_elem = NmsPostProcessMuxElement::create(nms_op, PipelineObject::create_element_name("NmsPPMuxEl", nms_op->get_name(), 0),
+        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED_AS_STATUS(nms_elem);
 
     async_pipeline->add_element_to_pipeline(nms_elem.value());
@@ -608,20 +629,30 @@ hailo_status PipelineBuilder::add_nms_flow(std::shared_ptr<AsyncPipeline> async_
         auto source_id = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(curr_stream_name);
         CHECK_EXPECTED_AS_STATUS(source_id);
 
-        auto nms_source_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index),
-            async_pipeline, async_pipeline->get_async_hw_element(), source_id.value());
+        auto is_empty = false;
+        auto interacts_with_hw = true;
+        auto nms_source_queue_elem = add_push_queue_element(PipelineObject::create_element_name("PushQEl_nms", curr_stream_info.name,
+            curr_stream_info.index), async_pipeline, curr_stream_info.hw_frame_size, is_empty, interacts_with_hw,
+            async_pipeline->get_async_hw_element(), source_id.value());
         CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem);
 
         CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i));
         nms_elem.value()->add_sink_name(curr_stream_name);
     }
-    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, nms_elem.value());
+    uint32_t post_transform_frame_size;
+    if(nms_op_metadata->nms_config().bbox_only){
+        post_transform_frame_size = HailoRTCommon::get_frame_size(vstream_info, output_format.second);
+    } else {
+        post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(vstream_info.nms_shape, output_format.second);
+    }
+    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size,
+        nms_elem.value());
     CHECK_EXPECTED_AS_STATUS(last_async_element);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status PipelineBuilder::add_iou_flow( std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+hailo_status AsyncPipelineBuilder::add_iou_flow( std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
     const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
     const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
@@ -630,58 +661,62 @@ hailo_status PipelineBuilder::add_iou_flow( std::shared_ptr<AsyncPipeline> async
     CHECK(contains(named_stream_infos, output_stream_name), HAILO_INTERNAL_FAILURE);
     const auto &output_stream_info = named_stream_infos.at(output_stream_name);
 
-    auto output_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(output_stream_name);
-        CHECK_EXPECTED_AS_STATUS(output_index);
-
-    auto hw_read_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_hw_read", output_stream_name, output_stream_info.index),
-        async_pipeline, async_pipeline->get_async_hw_element() , output_index.value());
-    CHECK_EXPECTED_AS_STATUS(hw_read_queue_element);
-
     // TODO (HRT-11078): Fix multi qp for PP
     auto stream_quant_infos = std::vector<hailo_quant_info_t>(1, output_stream_info.quant_info); //output_stream_base->get_quant_infos();
 
     auto post_infer_element = add_post_infer_element(output_format.second, output_stream_info.nms_info,
-        async_pipeline, output_stream_info.hw_shape, output_stream_info.format, output_stream_info.shape, stream_quant_infos, false, hw_read_queue_element.value());
+        async_pipeline, output_stream_info.hw_shape, output_stream_info.format, output_stream_info.shape, stream_quant_infos,
+        async_pipeline->get_async_hw_element());
     CHECK_EXPECTED_AS_STATUS(post_infer_element);
 
-    auto pre_nms_convert_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_nms_convert", output_stream_name,
-        output_stream_info.index), async_pipeline, post_infer_element.value());
+    auto is_empty = false;
+    auto interacts_with_hw = false;
+    const auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(output_stream_info.nms_info, output_format.second);
+    auto pre_nms_convert_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQEl_pre_nms_convert", output_stream_name,
+        output_stream_info.index), async_pipeline, post_transform_frame_size, is_empty, interacts_with_hw, post_infer_element.value());
     CHECK_EXPECTED_AS_STATUS(pre_nms_convert_queue_element);
 
     auto nms_to_detections_element = add_nms_to_detections_convert_element(async_pipeline, output_stream_name, output_stream_info.index,
-        "NmsFormatToDetectionsElement", iou_op_metadata, false, pre_nms_convert_queue_element.value());
+        "NmsFormatToDetectionsEl", iou_op_metadata, pre_nms_convert_queue_element.value());
     CHECK_EXPECTED_AS_STATUS(nms_to_detections_element);
 
-    auto pre_remove_overlapping_bboxes_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_bboxes_removing",
-        output_stream_name, output_stream_info.index), async_pipeline, nms_to_detections_element.value());
+    auto pre_remove_overlapping_bboxes_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQEl_pre_bboxes_removing",
+        output_stream_name, output_stream_info.index), async_pipeline, 0, is_empty, interacts_with_hw, nms_to_detections_element.value());
     CHECK_EXPECTED_AS_STATUS(pre_remove_overlapping_bboxes_element_queue_element);
 
     auto remove_overlapping_bboxes_element = add_remove_overlapping_bboxes_element(async_pipeline, output_stream_name, output_stream_info.index,
-        "RemoveOverlappingBboxesElement", iou_op_metadata, false, pre_remove_overlapping_bboxes_element_queue_element.value());
+        "RemoveOverlappingBboxesEl", iou_op_metadata, pre_remove_overlapping_bboxes_element_queue_element.value());
     CHECK_EXPECTED_AS_STATUS(remove_overlapping_bboxes_element);
 
-    auto pre_fill_nms_format_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQueueElement_pre_fill_nms_format", output_stream_name,
-        output_stream_info.index), async_pipeline, remove_overlapping_bboxes_element.value());
+    auto pre_fill_nms_format_element_queue_element = add_push_queue_element(PipelineObject::create_element_name("PushQEl_pre_fill_nms_format",
+        output_stream_name, output_stream_info.index), async_pipeline, 0, is_empty, interacts_with_hw, remove_overlapping_bboxes_element.value());
     CHECK_EXPECTED_AS_STATUS(pre_fill_nms_format_element_queue_element);
 
     auto fill_nms_format_element = add_fill_nms_format_element(async_pipeline, output_stream_name, output_stream_info.index,
-        "FillNmsFormatElement", iou_op_metadata, output_format.second, true, pre_fill_nms_format_element_queue_element.value());
+        "FillNmsFormatEl", iou_op_metadata, pre_fill_nms_format_element_queue_element.value());
     CHECK_EXPECTED_AS_STATUS(fill_nms_format_element);
 
-    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, fill_nms_format_element.value());
+    auto output_vstream_info = iou_op_metadata->get_output_vstream_info();
+    CHECK_EXPECTED_AS_STATUS(output_vstream_info);
+    const auto final_frame_size = HailoRTCommon::get_frame_size(*output_vstream_info, output_format.second);
+
+    auto last_async_element = add_last_async_element(async_pipeline, output_format.first, final_frame_size, fill_nms_format_element.value());
     CHECK_EXPECTED_AS_STATUS(last_async_element);
 
     return HAILO_SUCCESS;
 }
 
-hailo_status PipelineBuilder::add_nms_flows(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
+hailo_status AsyncPipelineBuilder::add_nms_flows(std::shared_ptr<AsyncPipeline> async_pipeline, const std::vector<std::string> &output_streams_names,
     const std::pair<std::string, hailo_format_t> &output_format, const net_flow::PostProcessOpMetadataPtr &op_metadata,
     const std::vector<hailo_vstream_info_t> &vstreams_infos, const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
     assert(1 <= op_metadata->outputs_metadata().size());
     auto updated_outputs_metadata = op_metadata->outputs_metadata();
+    auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
+    assert(nullptr != nms_metadata);
     std::pair<std::string, hailo_format_t> expanded_output_format = {output_format.first,
-        net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(output_format.second, op_metadata->type())};
+        net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(output_format.second, op_metadata->type(), 
+        nms_metadata->nms_config().bbox_only)};
     updated_outputs_metadata.begin()->second.format = expanded_output_format.second;
 
     op_metadata->set_outputs_metadata(updated_outputs_metadata);
@@ -714,10 +749,19 @@ hailo_status PipelineBuilder::add_nms_flows(std::shared_ptr<AsyncPipeline> async
     {
         auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5OpMetadata>(op_metadata);
         assert(nullptr != metadata);
-        auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata);
-        CHECK_EXPECTED_AS_STATUS(op_expected);
-        op = op_expected.release();
-        break;
+        if (metadata->nms_config().bbox_only) {
+            auto bbox_only_metadata = std::dynamic_pointer_cast<net_flow::Yolov5BboxOnlyOpMetadata>(op_metadata);
+            assert(nullptr != bbox_only_metadata);
+            auto op_expected = net_flow::YOLOv5BboxOnlyPostProcessOp::create(bbox_only_metadata);
+            CHECK_EXPECTED(op_expected);
+            op = op_expected.release();
+            break;
+        } else {
+            auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata);
+            CHECK_EXPECTED_AS_STATUS(op_expected);
+            op = op_expected.release();
+            break;
+        }
     }
     case (net_flow::OperationType::YOLOV5SEG):
     {
@@ -749,20 +793,11 @@ hailo_status PipelineBuilder::add_nms_flows(std::shared_ptr<AsyncPipeline> async
     return add_nms_flow(async_pipeline, output_streams_names, expanded_output_format, op, output_vstream_info, named_stream_infos);
 }
 
-hailo_status PipelineBuilder::add_ops_flows(std::shared_ptr<AsyncPipeline> async_pipeline,
+hailo_status AsyncPipelineBuilder::add_ops_flows(std::shared_ptr<AsyncPipeline> async_pipeline,
     const std::pair<std::string, hailo_format_t> &output_format, net_flow::PostProcessOpMetadataPtr &op_metadata,
     const std::vector<std::string> &output_streams_names, const std::vector<hailo_vstream_info_t> &vstreams_infos,
     const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos)
 {
-    const bool is_dma_able_hw_async = true;
-    for (const auto &stream_name : output_streams_names) {
-        auto source_index = async_pipeline->get_async_hw_element()->get_source_index_from_output_stream_name(stream_name);
-        CHECK_EXPECTED_AS_STATUS(source_index);
-        // async_hw element must be filled with buffers like an edge element in order to support large batch sizes
-        auto status = async_pipeline->get_async_hw_element()->fill_buffer_pool(is_dma_able_hw_async, async_pipeline->get_build_params().buffer_pool_size_edges, *source_index);
-        CHECK_SUCCESS(status);
-    }
-
     switch (op_metadata->type()) {
     case net_flow::OperationType::YOLOX:
     case net_flow::OperationType::YOLOV8:
@@ -784,7 +819,7 @@ hailo_status PipelineBuilder::add_ops_flows(std::shared_ptr<AsyncPipeline> async
     }
 }
 
-hailo_status PipelineBuilder::create_post_async_hw_elements(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+hailo_status AsyncPipelineBuilder::create_post_async_hw_elements(std::shared_ptr<ConfiguredNetworkGroup> net_group,
         const std::unordered_map<std::string, hailo_format_t> &expanded_outputs_formats, std::unordered_map<std::string, hailo_format_t> &original_outputs_formats,
         const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
@@ -852,19 +887,20 @@ hailo_status PipelineBuilder::create_post_async_hw_elements(std::shared_ptr<Conf
             CHECK_EXPECTED_AS_STATUS(should_transform_expected);
 
             if (should_transform_expected.value()) {
-                // async_hw element must be filled with buffers like an edge element in order to support large batch sizes
-                hailo_status status = async_pipeline->get_async_hw_element()->fill_buffer_pool(true, async_pipeline->get_build_params().buffer_pool_size_edges, *final_elem_source_index);
-                CHECK_SUCCESS(status);
-
                 auto post_infer_elem = add_post_infer_element(output_format.second, first_stream_info.nms_info, async_pipeline, first_stream_info.hw_shape,
-                    first_stream_info.format, first_stream_info.shape, stream_quant_infos, true, async_pipeline->get_async_hw_element(), final_elem_source_index.value());
+                    first_stream_info.format, first_stream_info.shape, stream_quant_infos, async_pipeline->get_async_hw_element(), final_elem_source_index.value());
                 CHECK_EXPECTED_AS_STATUS(post_infer_elem);
 
-                auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_infer_elem.value());
+                auto post_transform_frame_size = (HailoRTCommon::is_nms(first_stream_info.format.order)) ?
+                    HailoRTCommon::get_nms_host_frame_size(first_stream_info.nms_info, output_format.second) :
+                    HailoRTCommon::get_frame_size(first_stream_info.shape, output_format.second);
+
+                auto last_async_element = add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size,
+                    post_infer_elem.value());
                 CHECK_EXPECTED_AS_STATUS(last_async_element);
             } else {
-                auto last_async_element = add_last_async_element(async_pipeline, output_format.first, async_pipeline->get_async_hw_element(),
-                    final_elem_source_index.value());
+                auto last_async_element = add_last_async_element(async_pipeline, output_format.first, first_stream_info.hw_frame_size,
+                    async_pipeline->get_async_hw_element(), final_elem_source_index.value());
                 CHECK_EXPECTED_AS_STATUS(last_async_element);
             }
         }
@@ -872,7 +908,7 @@ hailo_status PipelineBuilder::create_post_async_hw_elements(std::shared_ptr<Conf
     return HAILO_SUCCESS;
 }
 
-Expected<std::shared_ptr<AsyncPipeline>> PipelineBuilder::create_pipeline(std::shared_ptr<ConfiguredNetworkGroup> net_group,
+Expected<std::shared_ptr<AsyncPipeline>> AsyncPipelineBuilder::create_pipeline(std::shared_ptr<ConfiguredNetworkGroup> net_group,
     const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
     const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
     const uint32_t timeout, std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
@@ -880,7 +916,7 @@ Expected<std::shared_ptr<AsyncPipeline>> PipelineBuilder::create_pipeline(std::s
     std::unordered_map<std::string, std::shared_ptr<PipelineElement>> entry_elements;
     std::unordered_map<std::string, std::shared_ptr<PipelineElement>> last_elements;
 
-    ElementBuildParams build_params;
+    ElementBuildParams build_params {};
 
     // Buffer pool sizes for pipeline elements should be:
     // * The minimum of the maximum queue size of all LL streams (input and output) - for edge elements
@@ -921,13 +957,9 @@ Expected<std::shared_ptr<AsyncPipeline>> PipelineBuilder::create_pipeline(std::s
 
     async_pipeline->set_build_params(build_params);
 
-    // all elements in async pipeline start as last elements, and in the end of this func all non-last-copy elements will be added buffers
-    bool is_last_copy_element = true;
-
     auto async_hw_elem = AsyncHwElement::create(named_stream_infos, build_params.timeout,
-        build_params.buffer_pool_size_edges, build_params.elem_stats_flags, build_params.vstream_stats_flags,
-        build_params.shutdown_event, "AsyncHwElement", build_params.pipeline_status, net_group,
-        PipelineDirection::PUSH, is_last_copy_element, async_pipeline);
+        build_params.elem_stats_flags, "AsyncHwEl", build_params.pipeline_status, net_group,
+        PipelineDirection::PUSH, async_pipeline);
     CHECK_EXPECTED(async_hw_elem);
     async_pipeline->add_element_to_pipeline(async_hw_elem.value());
     async_pipeline->set_async_hw_element(async_hw_elem.release());
@@ -940,10 +972,23 @@ Expected<std::shared_ptr<AsyncPipeline>> PipelineBuilder::create_pipeline(std::s
         async_pipeline);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
+    print_pipeline_elements_info(async_pipeline);
+
     return async_pipeline;
 }
 
-Expected<std::shared_ptr<PixBufferElement>> PipelineBuilder::create_multi_plane_splitter_element(const std::string &input_name,
+void AsyncPipelineBuilder::print_pipeline_elements_info(std::shared_ptr<hailort::AsyncPipeline> async_pipeline)
+{
+    auto async_entry_elements = async_pipeline->get_entry_elements();
+    std::vector<std::string> visited_elements;
+    visited_elements.reserve(async_pipeline->get_pipeline().size());
+
+    for (auto &element : async_entry_elements) {
+        element.second->print_deep_description(visited_elements);
+    }
+}
+
+Expected<std::shared_ptr<PixBufferElement>> AsyncPipelineBuilder::create_multi_plane_splitter_element(const std::string &input_name,
     hailo_format_order_t order, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     CHECK_AS_EXPECTED((HAILO_FORMAT_ORDER_NV12 == order) || (HAILO_FORMAT_ORDER_NV21 == order) || (HAILO_FORMAT_ORDER_I420 == order),
@@ -953,7 +998,7 @@ Expected<std::shared_ptr<PixBufferElement>> PipelineBuilder::create_multi_plane_
     auto duration_collector_expected = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
     CHECK_EXPECTED(duration_collector_expected);
 
-    auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufferElement",
+    auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufEl",
         input_name, 0), std::chrono::milliseconds(HAILO_INFINITE), duration_collector_expected.release(), pipeline_status, order,
         async_pipeline);
     CHECK_EXPECTED(planes_splitter);
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp
similarity index 89%
rename from hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp
rename to hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp
index 1ddf71c9..6120fe5e 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline_builder.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp
@@ -3,12 +3,12 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
 **/
 /**
- * @file pipeline_builder.hpp
+ * @file async_pipeline_builder.hpp
  * @brief Async Pipeline Builder
  **/
 
-#ifndef _HAILO_PIPELINE_BUILDER_HPP_
-#define _HAILO_PIPELINE_BUILDER_HPP_
+#ifndef _HAILO_ASYNC_PIPELINE_BUILDER_HPP_
+#define _HAILO_ASYNC_PIPELINE_BUILDER_HPP_
 
 #include "hailo/hailort.h"
 #include "network_group/network_group_internal.hpp"
@@ -20,10 +20,10 @@ namespace hailort
 {
 
 
-class PipelineBuilder final
+class AsyncPipelineBuilder final
 {
 public:
-    PipelineBuilder() = delete;
+    AsyncPipelineBuilder() = delete;
 
     static Expected<std::shared_ptr<AsyncPipeline>> create_pipeline(std::shared_ptr<ConfiguredNetworkGroup> net_group,
         const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
@@ -74,28 +74,31 @@ class PipelineBuilder final
 
     static Expected<std::shared_ptr<PostInferElement>> add_post_infer_element(const hailo_format_t &output_format, const hailo_nms_info_t &nms_info,
         std::shared_ptr<AsyncPipeline> async_pipeline, const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_image_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos, bool is_last_copy_element,
+        const hailo_3d_image_shape_t &dst_image_shape, const std::vector<hailo_quant_info_t> &dst_quant_infos,
         std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
     static Expected<std::shared_ptr<LastAsyncElement>> add_last_async_element(std::shared_ptr<AsyncPipeline> async_pipeline,
-        const std::string &output_format_name, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+        const std::string &output_format_name, size_t frame_size, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
     static Expected<std::shared_ptr<AsyncPushQueueElement>> add_push_queue_element(const std::string &queue_name, std::shared_ptr<AsyncPipeline> async_pipeline,
-        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+        size_t frame_size, bool is_empty, bool interacts_with_hw, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0,
+        bool is_entry = false);
     static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> add_nms_to_detections_convert_element(std::shared_ptr<AsyncPipeline> async_pipeline,
         const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
     static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> add_remove_overlapping_bboxes_element(std::shared_ptr<AsyncPipeline> async_pipeline,
         const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
     static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(std::shared_ptr<AsyncPipeline> async_pipeline,
         const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        const hailo_format_t &output_format, const bool is_last_copy_element, std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
     static Expected<std::shared_ptr<PixBufferElement>> create_multi_plane_splitter_element(const std::string &input_name,
         hailo_format_order_t order, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline);
 
     static Expected<bool> should_transform(const hailo_stream_info_t &stream_info, const std::vector<hailo_quant_info_t> &stream_quant_infos, 
         const hailo_format_t &output_format);
+
+    static void print_pipeline_elements_info(std::shared_ptr<hailort::AsyncPipeline> async_pipeline);
 };
 
 } /* namespace hailort */
 
-#endif /* _HAILO_PIPELINE_BUILDER_HPP_ */
+#endif /* _HAILO_ASYNC_PIPELINE_BUILDER_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp
new file mode 100644
index 00000000..f0b6e2b5
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp
@@ -0,0 +1,437 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file edge_elements.cpp
+ * @brief Implementation of the edge elements (sinks and sources)
+ **/
+
+#include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/pipeline/edge_elements.hpp"
+
+namespace hailort
+{
+
+PipelinePad &SinkElement::sink()
+{
+    return m_sinks[0];
+}
+
+std::vector<PipelinePad*> SinkElement::execution_pads()
+{
+    std::vector<PipelinePad*> result{&sink()};
+    return result;
+}
+
+hailo_status SinkElement::execute_terminate(hailo_status /*error_status*/)
+{
+    return HAILO_SUCCESS;
+}
+
+hailo_status SinkElement::execute_dequeue_user_buffers(hailo_status /*error_status*/)
+{
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<HwWriteElement>> HwWriteElement::create(std::shared_ptr<InputStreamBase> stream, const std::string &name,
+    hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    PipelineDirection pipeline_direction)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto got_flush_event = Event::create_shared(Event::State::not_signalled);
+    CHECK_EXPECTED(got_flush_event);
+
+    // On HwWriteElement the stream always owns the buffer, hence, we set the mode explicitly.
+    auto status = stream->set_buffer_mode(StreamBufferMode::OWNING);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    auto hw_write_elem_ptr = make_shared_nothrow<HwWriteElement>(stream, name,
+        duration_collector.release(), std::move(pipeline_status), got_flush_event.release(), pipeline_direction);
+    CHECK_AS_EXPECTED(nullptr != hw_write_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", hw_write_elem_ptr->description());
+
+    return hw_write_elem_ptr;
+}
+
+HwWriteElement::HwWriteElement(std::shared_ptr<InputStreamBase> stream, const std::string &name, DurationCollector &&duration_collector,
+                               std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction) :
+    SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr),
+    m_stream(stream), m_got_flush_event(got_flush_event)
+{}
+
+Expected<PipelineBuffer> HwWriteElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+{
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+hailo_status HwWriteElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    if (PipelineBuffer::Type::FLUSH == buffer.get_type()) {
+        hailo_status flush_status = m_stream->flush();
+        if (HAILO_STREAM_ABORT == flush_status) {
+            LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string());
+        } else if (HAILO_SUCCESS != flush_status) {
+            LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status);
+        }
+        hailo_status status = m_got_flush_event->signal();
+        CHECK_SUCCESS(status);
+        return HAILO_SUCCESS;
+    }
+
+    m_duration_collector.start_measurement();
+    const auto status = m_stream->write(MemoryView(buffer.data(), buffer.size()));
+    m_duration_collector.complete_measurement();
+
+    if (HAILO_STREAM_ABORT == status) {
+        LOGGER__INFO("Failed to send on input stream {} because stream was aborted", m_stream->to_string());
+        return HAILO_STREAM_ABORT;
+    }
+    CHECK_SUCCESS(status, "{} (H2D) failed with status={}", name(), status);
+
+    return HAILO_SUCCESS;
+}
+
+void HwWriteElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    LOGGER__ERROR("run_push_async is not supported for {}", name());
+    assert(false);
+}
+
+hailo_status HwWriteElement::execute_activate()
+{
+    return HAILO_SUCCESS;
+}
+
+hailo_status HwWriteElement::execute_deactivate()
+{
+    // The flush operation will block until all buffers currently in the pipeline will be processed.
+    // We assume that no buffers are sent after the call for deactivate.
+    hailo_status flush_status = m_stream->flush();
+    if (HAILO_STREAM_ABORT == flush_status) {
+        LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string());
+        return HAILO_SUCCESS;
+    } else if (HAILO_STREAM_NOT_ACTIVATED == flush_status) {
+        LOGGER__INFO("Failed flushing input stream {} because stream is not activated", m_stream->to_string());
+        return HAILO_SUCCESS;
+    } else if (HAILO_SUCCESS != flush_status) {
+        LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status);
+    }
+
+    auto abort_status = execute_abort();
+    CHECK(((abort_status == HAILO_SUCCESS) || (abort_status == HAILO_STREAM_NOT_ACTIVATED)), abort_status,
+        "Failed to abort stream in {}", name());
+    return HAILO_SUCCESS;
+}
+
+hailo_status HwWriteElement::execute_post_deactivate(bool should_clear_abort)
+{
+    if (should_clear_abort) {
+        auto status = execute_clear_abort();
+        CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
+            "Failed to clear abort stream in {}", name());
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status HwWriteElement::execute_clear()
+{
+    return HAILO_SUCCESS;
+}
+
+hailo_status HwWriteElement::execute_flush()
+{
+    hailo_status status = m_got_flush_event->wait(m_stream->get_timeout());
+    CHECK_SUCCESS(status);
+
+    status = m_got_flush_event->reset();
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status HwWriteElement::execute_abort()
+{
+    return m_stream->abort_impl();
+}
+
+hailo_status HwWriteElement::execute_clear_abort()
+{
+    return m_stream->clear_abort_impl();
+}
+
+std::string HwWriteElement::description() const
+{
+    std::stringstream element_description;
+    element_description << "(" << this->name() << " | hw_frame_size: " << m_stream->get_info().hw_frame_size << ")";   
+
+    return element_description.str();
+}
+
+Expected<std::shared_ptr<LastAsyncElement>> LastAsyncElement::create(const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+    hailo_vstream_stats_flags_t vstream_stats_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t queue_size,
+    size_t frame_size, EventPtr shutdown_event, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto is_empty = true; // LastAsync always holds user buffers, therefore its created empty
+    auto is_dma_able = false;
+    queue_size = queue_size * 2; // Multiplying by 2 to ensure dual-buffering when edge-element is the bottleneck
+    auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, elem_flags, vstream_stats_flags, is_empty, is_dma_able);
+    CHECK_EXPECTED(buffer_pool);
+
+    auto last_async_elem_ptr = make_shared_nothrow<LastAsyncElement>(name,
+        duration_collector.release(), std::move(pipeline_status), buffer_pool.release(), async_pipeline);
+    CHECK_NOT_NULL_AS_EXPECTED(last_async_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", last_async_elem_ptr->description());
+
+    return last_async_elem_ptr;
+}
+
+Expected<std::shared_ptr<LastAsyncElement>> LastAsyncElement::create(const std::string &name,
+    const ElementBuildParams &build_params, size_t frame_size, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return LastAsyncElement::create(name, build_params.elem_stats_flags, build_params.vstream_stats_flags, build_params.pipeline_status,
+        build_params.buffer_pool_size_edges, frame_size, build_params.shutdown_event, async_pipeline);
+}
+
+LastAsyncElement::LastAsyncElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    BufferPoolPtr buffer_pool, std::shared_ptr<AsyncPipeline> async_pipeline):
+    SinkElement(name, std::move(duration_collector), std::move(pipeline_status), PipelineDirection::PUSH, async_pipeline),
+    m_pool(buffer_pool)
+{}
+
+Expected<PipelineBuffer> LastAsyncElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+{
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+hailo_status LastAsyncElement::run_push(PipelineBuffer &&/*optional*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+void LastAsyncElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    // Call callback here on LastAsyncElement because if we wait for destructor to call callbacks they can be
+    // called out of order
+    buffer.call_exec_done();
+}
+
+hailo_status LastAsyncElement::execute_activate()
+{
+    return HAILO_SUCCESS;
+}
+
+hailo_status LastAsyncElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done)
+{
+    return m_pool->enqueue_buffer(mem_view, exec_done);
+}
+
+Expected<bool> LastAsyncElement::can_push_buffer_upstream()
+{
+    return !m_pool->is_full();
+}
+
+SourceElement::SourceElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
+{
+    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+}
+
+hailo_status LastAsyncElement::execute_dequeue_user_buffers(hailo_status error_status)
+{
+    auto empty_pool_status = empty_buffer_pool(m_pool, error_status, BUFFER_POOL_DEFAULT_QUEUE_TIMEOUT);
+    CHECK_SUCCESS(empty_pool_status);
+
+    return HAILO_SUCCESS;
+}
+
+PipelinePad &SourceElement::source()
+{
+    return m_sources[0];
+}
+
+std::vector<PipelinePad*> SourceElement::execution_pads()
+{
+    std::vector<PipelinePad*> result{&source()};
+    return result;
+}
+
+SinkElement::SinkElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
+{
+    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+}
+
+Expected<std::shared_ptr<HwReadElement>> HwReadElement::create(std::shared_ptr<OutputStreamBase> stream, const std::string &name, 
+    const ElementBuildParams &build_params, PipelineDirection pipeline_direction)
+{
+    // On HwReadElement the stream always owns the buffer, hence, we set the mode explicitly.
+    auto status = stream->set_buffer_mode(StreamBufferMode::OWNING);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    auto duration_collector = DurationCollector::create(build_params.elem_stats_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto pipeline_status = build_params.pipeline_status;
+
+    auto shutdown_event = Event::create_shared(Event::State::not_signalled);
+    CHECK_EXPECTED(shutdown_event);
+
+    auto hw_read_elem_ptr = make_shared_nothrow<HwReadElement>(stream, name, build_params.timeout,
+        duration_collector.release(), shutdown_event.release(), std::move(pipeline_status), pipeline_direction);
+    CHECK_AS_EXPECTED(nullptr != hw_read_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", hw_read_elem_ptr->description());
+
+    return hw_read_elem_ptr;
+}
+
+HwReadElement::HwReadElement(std::shared_ptr<OutputStreamBase> stream, const std::string &name,
+    std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+    EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    PipelineDirection pipeline_direction) :
+    SourceElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr),
+    m_stream(stream),
+    m_timeout(timeout),
+    m_shutdown_event(shutdown_event),
+    m_activation_wait_or_shutdown(stream->get_core_op_activated_event(), shutdown_event)
+{}
+
+uint32_t HwReadElement::get_invalid_frames_count()
+{
+    return m_stream->get_invalid_frames_count();
+}
+
+std::string HwReadElement::description() const
+{
+    std::stringstream element_description;
+    element_description << "(" << this->name() << " | hw_frame_size: " << m_stream->get_info().hw_frame_size << ")";   
+
+    return element_description.str();
+}
+
+hailo_status HwReadElement::execute_post_deactivate(bool should_clear_abort)
+{
+    if (should_clear_abort) {
+        auto status = execute_clear_abort();
+        CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status,
+            "Failed to clear abort stream in {}", name());
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status HwReadElement::execute_clear()
+{
+    return HAILO_SUCCESS;
+}
+
+hailo_status HwReadElement::execute_flush()
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+hailo_status HwReadElement::execute_abort()
+{
+    return m_stream->abort_impl();
+}
+
+hailo_status HwReadElement::execute_clear_abort()
+{
+    return m_stream->clear_abort_impl();
+}
+
+void HwReadElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    LOGGER__ERROR("run_push_async is not supported for {}", name());
+    assert(false);
+}
+
+hailo_status HwReadElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+Expected<PipelineBuffer> HwReadElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
+{
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        return make_unexpected(buffer.status());
+    }
+    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
+
+    while (true) {
+        if (!m_stream->is_scheduled()) {
+            auto status = m_activation_wait_or_shutdown.wait(m_timeout);
+            if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+                return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+            }
+            if (HAILO_TIMEOUT == status) {
+                return make_unexpected(HAILO_NETWORK_GROUP_NOT_ACTIVATED);
+            }
+            CHECK_SUCCESS_AS_EXPECTED(status);
+        } else {
+            auto status = m_activation_wait_or_shutdown.wait(std::chrono::milliseconds(0));
+            if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+                return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+            }
+        }
+
+        MemoryView buffer_view(buffer.value().as_view());
+        m_duration_collector.start_measurement();
+        auto status = m_stream->read(buffer_view);
+        if (HAILO_INVALID_FRAME == status) {
+            m_stream->increase_invalid_frames_count(1);
+            status = HAILO_SUCCESS;
+        }
+        if (HAILO_STREAM_NOT_ACTIVATED == status) {
+            // Try again
+            continue;
+        }
+        if (HAILO_STREAM_ABORT == status) {
+            LOGGER__INFO("Reading from stream was aborted!");
+            return make_unexpected(HAILO_STREAM_ABORT);
+        }
+        CHECK_SUCCESS_AS_EXPECTED(status, "{} (D2H) failed with status={}", name(), status);
+        m_duration_collector.complete_measurement();
+
+        return buffer.release();
+    }
+}
+
+hailo_status HwReadElement::execute_activate()
+{
+    CHECK_SUCCESS(m_shutdown_event->reset(), "Failed to reset shutdown event for {}", name());
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status HwReadElement::execute_deactivate()
+{
+    auto signal_shutdown_status = m_shutdown_event->signal();
+    if (HAILO_SUCCESS != signal_shutdown_status) {
+        LOGGER__ERROR("Signaling {} shutdown event failed with {}", name(), signal_shutdown_status);
+    }
+
+    auto abort_status = execute_abort();
+    if ((HAILO_SUCCESS != abort_status) && (HAILO_STREAM_NOT_ACTIVATED != abort_status)) {
+        LOGGER__ERROR("Abort {} failed with {}", name(), abort_status);
+        return abort_status;
+    }
+ 
+    return signal_shutdown_status;
+}
+
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp
new file mode 100644
index 00000000..090a7cad
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp
@@ -0,0 +1,143 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file edge_elements.hpp
+ * @brief all edge elements in the pipeline (sinks and sources)
+ **/
+
+#ifndef _HAILO_EDGE_ELEMENTS_HPP_
+#define _HAILO_EDGE_ELEMENTS_HPP_
+
+namespace hailort
+{
+// An element with one sink pad only (consumes data)
+class SinkElement : public PipelineElementInternal
+{
+public:
+    SinkElement(const std::string &name, DurationCollector &&duration_collector,
+                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    PipelinePad &sink();
+
+protected:
+    virtual std::vector<PipelinePad*> execution_pads() override;
+    virtual hailo_status execute_terminate(hailo_status error_status) override;
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
+};
+
+class HwWriteElement : public SinkElement
+{
+public:
+    static Expected<std::shared_ptr<HwWriteElement>> create(std::shared_ptr<InputStreamBase> stream, const std::string &name,
+        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PUSH);
+    HwWriteElement(std::shared_ptr<InputStreamBase> stream, const std::string &name, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction);
+    virtual ~HwWriteElement() = default;
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual hailo_status execute_activate() override;
+    virtual hailo_status execute_deactivate() override;
+    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
+    virtual hailo_status execute_clear() override;
+    virtual hailo_status execute_flush() override;
+    virtual hailo_status execute_abort() override;
+    virtual hailo_status execute_clear_abort() override;
+    virtual std::string description() const override;
+
+private:
+    std::shared_ptr<InputStreamBase> m_stream;
+    EventPtr m_got_flush_event;
+};
+
+class LastAsyncElement : public SinkElement
+{
+public:
+    static Expected<std::shared_ptr<LastAsyncElement>> create(const std::string &name,
+        hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_stats_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t queue_size, size_t frame_size,
+        EventPtr shutdown_event, std::shared_ptr<AsyncPipeline> async_pipeline);
+    static Expected<std::shared_ptr<LastAsyncElement>> create(const std::string &name, const ElementBuildParams &build_params,
+        size_t frame_size, std::shared_ptr<AsyncPipeline> async_pipeline);
+    LastAsyncElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        BufferPoolPtr buffer_pool, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~LastAsyncElement() = default;
+
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual hailo_status execute_activate() override;
+
+    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done) override;
+
+    virtual Expected<bool> can_push_buffer_upstream() override;
+
+    virtual hailo_status execute_post_deactivate(bool /*should_clear_abort*/) override { return HAILO_SUCCESS; };
+    virtual hailo_status execute_deactivate() override { return HAILO_SUCCESS; };
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
+
+    virtual BufferPoolPtr get_buffer_pool() const override
+    {
+        return m_pool;
+    }
+
+private:
+    BufferPoolPtr m_pool;
+};
+
+// An element with one source pad only (generates data)
+class SourceElement : public PipelineElementInternal
+{
+public:
+    SourceElement(const std::string &name, DurationCollector &&duration_collector,
+                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                  PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    PipelinePad &source();
+
+protected:
+    virtual std::vector<PipelinePad*> execution_pads() override;
+};
+
+class HwReadElement : public SourceElement
+{
+public:
+    static Expected<std::shared_ptr<HwReadElement>> create(std::shared_ptr<OutputStreamBase> stream, const std::string &name,
+        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL);
+    HwReadElement(std::shared_ptr<OutputStreamBase> stream, const std::string &name, std::chrono::milliseconds timeout,
+        DurationCollector &&duration_collector, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        PipelineDirection pipeline_direction);
+    virtual ~HwReadElement() = default;
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual hailo_status execute_activate() override;
+    virtual hailo_status execute_deactivate() override;
+    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
+    virtual hailo_status execute_clear() override;
+    virtual hailo_status execute_flush() override;
+    virtual hailo_status execute_abort() override;
+    virtual hailo_status execute_clear_abort() override;
+    uint32_t get_invalid_frames_count();
+    virtual std::string description() const override;
+
+    PipelinePad &next_pad_downstream()
+    {
+        return *m_sources[0].next();
+    }
+
+private:
+    std::shared_ptr<OutputStreamBase> m_stream;
+    std::chrono::milliseconds m_timeout;
+    EventPtr m_shutdown_event;
+    WaitOrShutdown m_activation_wait_or_shutdown;
+};
+
+
+} /* namespace hailort */
+
+#endif /* _HAILO_EDGE_ELEMENTS_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp
new file mode 100644
index 00000000..03cdc47e
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp
@@ -0,0 +1,771 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file filter_elements.cpp
+ * @brief Implementation of the filter elements
+ **/
+
+#include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/pipeline/filter_elements.hpp"
+
+namespace hailort
+{
+
+FilterElement::FilterElement(const std::string &name, DurationCollector &&duration_collector,
+                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                             PipelineDirection pipeline_direction,
+                             std::chrono::milliseconds timeout, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_timeout(timeout)
+{}
+
+hailo_status FilterElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    auto output = action(std::move(buffer), PipelineBuffer());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
+        return output.status();
+    }
+    CHECK_EXPECTED_AS_STATUS(output);
+
+    hailo_status status = next_pad().run_push(output.release());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("run_push of {} was shutdown!", name());
+        return status;
+    }
+    if (HAILO_STREAM_ABORT == status) {
+        LOGGER__INFO("run_push of {} was aborted!", name());
+        return status;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+void FilterElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    assert(m_pipeline_direction == PipelineDirection::PUSH);
+    if (HAILO_SUCCESS != buffer.action_status()) {
+        auto pool = next_pad().element().get_buffer_pool();
+        assert(pool);
+
+        auto buffer_from_pool = pool->get_available_buffer(PipelineBuffer(), m_timeout);
+        if (HAILO_SUCCESS != buffer_from_pool.status()) {
+            handle_non_recoverable_async_error(buffer_from_pool.status());
+        } else {
+            buffer_from_pool->set_action_status(buffer.action_status());
+            next_pad().run_push_async(buffer_from_pool.release());
+        }
+        return;
+    }
+
+    auto output = action(std::move(buffer), PipelineBuffer());
+    if (HAILO_SUCCESS == output.status()) {
+        next_pad().run_push_async(output.release());
+    } else {
+        next_pad().run_push_async(PipelineBuffer(output.status()));
+    }
+    return;
+}
+
+Expected<PipelineBuffer> FilterElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
+{
+    auto buffer = next_pad().run_pull();
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        LOGGER__INFO("run_pull in FilterElement was shutdown!");
+        return make_unexpected(buffer.status());
+    }
+    CHECK_EXPECTED(buffer);
+    return action(buffer.release(), std::move(optional));
+}
+
+PipelinePad &FilterElement::next_pad_downstream()
+{
+    return *m_sources[0].next();
+}
+
+PipelinePad &FilterElement::next_pad_upstream()
+{
+    return *m_sinks[0].prev();
+}
+
+Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+    const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
+    const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto transform_context = InputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format,
+        dst_quant_infos);
+    CHECK_EXPECTED(transform_context, "Failed Creating InputTransformContext");
+
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto pre_infer_elem_ptr = make_shared_nothrow<PreInferElement>(transform_context.release(),
+        name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction,
+        async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != pre_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", pre_infer_elem_ptr->description());
+
+    return pre_infer_elem_ptr;
+}
+
+Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+    const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
+    const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name,
+        std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.pipeline_elements_stats_flags,
+        pipeline_status, pipeline_direction, async_pipeline);
+}
+
+Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+    const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name,
+        build_params.timeout, build_params.elem_stats_flags, build_params.pipeline_status, pipeline_direction, async_pipeline);
+}
+
+PreInferElement::PreInferElement(std::unique_ptr<InputTransformContext> &&transform_context, const std::string &name, std::chrono::milliseconds timeout,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline),
+    m_transform_context(std::move(transform_context))
+{}
+
+Expected<PipelineBuffer> PreInferElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+{
+    LOGGER__ERROR("PreInferElement does not support run_pull operation");
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+PipelinePad &PreInferElement::next_pad()
+{
+    // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed)
+    return *m_sources[0].next();
+}
+
+std::string PreInferElement::description() const
+{
+    std::stringstream element_description;
+    element_description << "(" << this->name() << " | " << m_transform_context->description() << ")";
+    return element_description.str();
+}
+
+Expected<PipelineBuffer> PreInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+    if (PipelineBuffer::Type::FLUSH == input.get_type()) {
+        return std::move(input);
+    }
+
+    // Buffers are always taken from the next-pad-downstream
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto transformed_buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == transformed_buffer.status()) {
+        return make_unexpected(transformed_buffer.status());
+    }
+
+    if (!transformed_buffer) {
+        input.set_action_status(transformed_buffer.status());
+    }
+    CHECK_AS_EXPECTED(HAILO_TIMEOUT != transformed_buffer.status(), HAILO_TIMEOUT,
+        "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count());
+    CHECK_EXPECTED(transformed_buffer);
+
+    auto dst = transformed_buffer->as_view();
+    m_duration_collector.start_measurement();
+    const auto status = m_transform_context->transform(input.as_view(), dst);
+    m_duration_collector.complete_measurement();
+
+    input.set_action_status(status);
+    transformed_buffer->set_action_status(status);
+
+    auto metadata = input.get_metadata();
+
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    // Note: The latency to be measured starts as the input buffer is sent to the InputVStream (via write())
+    transformed_buffer->set_metadata(std::move(metadata));
+
+    return transformed_buffer.release();
+}
+
+Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> ConvertNmsToDetectionsElement::create(
+    const hailo_nms_info_t &nms_info, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto convert_nms_to_detections_elem_ptr = make_shared_nothrow<ConvertNmsToDetectionsElement>(std::move(nms_info),
+        name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != convert_nms_to_detections_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", convert_nms_to_detections_elem_ptr->description());
+
+    return convert_nms_to_detections_elem_ptr;
+}
+
+Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> ConvertNmsToDetectionsElement::create(
+    const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return ConvertNmsToDetectionsElement::create(nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status,
+        build_params.timeout, pipeline_direction, async_pipeline);
+}
+
+ConvertNmsToDetectionsElement::ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline),
+    m_nms_info(std::move(nms_info))
+{}
+
+hailo_status ConvertNmsToDetectionsElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
+        "ConvertNmsToDetectionsElement {} does not support run_push operation", name());
+    return FilterElement::run_push(std::move(buffer), sink);
+}
+
+PipelinePad &ConvertNmsToDetectionsElement::next_pad()
+{
+    if (PipelineDirection::PUSH == m_pipeline_direction){
+        return *m_sources[0].next();
+    }
+    return *m_sinks[0].prev();
+}
+
+Expected<PipelineBuffer> ConvertNmsToDetectionsElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+    // Buffers are always taken from the next-pad-downstream
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        return make_unexpected(buffer.status());
+    }
+
+    if (!buffer) {
+        input.set_action_status(buffer.status());
+    }
+    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
+
+    buffer->set_metadata(input.get_metadata());
+
+    m_duration_collector.start_measurement();
+
+    auto detections_pair = net_flow::NmsPostProcessOp::transform__d2h_NMS_DETECTIONS(input.data(), m_nms_info);
+    auto detections_pipeline_data = make_shared_nothrow<IouPipelineData>
+        (std::move(detections_pair.first),std::move(detections_pair.second));
+    buffer->set_additional_data(detections_pipeline_data);
+
+    m_duration_collector.complete_measurement();
+
+    return buffer.release();
+}
+
+Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(const net_flow::NmsPostProcessConfig nms_config,
+    const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto fill_nms_format_element = make_shared_nothrow<FillNmsFormatElement>(std::move(nms_config),
+        name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != fill_nms_format_element, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", fill_nms_format_element->description());
+
+    return fill_nms_format_element;
+}
+
+Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(const net_flow::NmsPostProcessConfig nms_config,
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return FillNmsFormatElement::create(nms_config, name, build_params.elem_stats_flags,
+        build_params.pipeline_status, build_params.timeout, pipeline_direction, async_pipeline);
+}
+
+FillNmsFormatElement::FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline),
+    m_nms_config(std::move(nms_config))
+{}
+
+hailo_status FillNmsFormatElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
+        "FillNmsFormatElement {} does not support run_push operation", name());
+    return FilterElement::run_push(std::move(buffer), sink);
+}
+
+PipelinePad &FillNmsFormatElement::next_pad()
+{
+    if (PipelineDirection::PUSH == m_pipeline_direction){
+        return *m_sources[0].next();
+    }
+    return *m_sinks[0].prev();
+}
+
+Expected<PipelineBuffer> FillNmsFormatElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+    // Buffers are always taken from the next-pad-downstream
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto buffer_expected = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer_expected.status()) {
+        return make_unexpected(buffer_expected.status());
+    }
+    if (!buffer_expected) {
+        input.set_action_status(buffer_expected.status());
+    }
+    CHECK_EXPECTED(buffer_expected, "{} (D2H) failed with status={}", name(), buffer_expected.status());
+    auto buffer = buffer_expected.release();
+
+    buffer.set_metadata(input.get_metadata());
+
+    m_duration_collector.start_measurement();
+
+    auto detections = input.get_metadata().get_additional_data<IouPipelineData>();
+    auto dst = buffer.as_view();
+    net_flow::NmsPostProcessOp::fill_nms_format_buffer(dst, detections->m_detections, detections->m_detections_classes_count,
+        m_nms_config);
+
+    m_duration_collector.complete_measurement();
+
+    return buffer;
+}
+
+Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape,
+    const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
+    const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
+    hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto transform_context = OutputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format,
+        dst_quant_infos, nms_info);
+    CHECK_EXPECTED(transform_context, "Failed Creating OutputTransformContext");
+
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto post_infer_elem_ptr = make_shared_nothrow<PostInferElement>(transform_context.release(), name,
+        duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != post_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", post_infer_elem_ptr->description());
+
+    return post_infer_elem_ptr;
+}
+
+Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info,
+        const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, nms_info,
+        name, vstream_params.pipeline_elements_stats_flags, pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms),
+        pipeline_direction, async_pipeline);
+}
+
+Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape,
+    const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
+    const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
+    const ElementBuildParams &build_params, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format,
+        dst_quant_infos, nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status,
+        build_params.timeout, pipeline_direction, async_pipeline);
+}
+
+PostInferElement::PostInferElement(std::unique_ptr<OutputTransformContext> &&transform_context, const std::string &name,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline),
+    m_transform_context(std::move(transform_context))
+{}
+
+Expected<PipelineBuffer> PostInferElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
+{
+    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
+        "PostInferElement {} does not support run_pull operation", name()
+    );
+    return FilterElement::run_pull(std::move(optional), source);
+}
+
+hailo_status PostInferElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
+        "PostInferElement {} does not support run_push operation", name());
+    return FilterElement::run_push(std::move(buffer), sink);
+}
+
+PipelinePad &PostInferElement::next_pad()
+{
+    if (PipelineDirection::PUSH == m_pipeline_direction){
+        return *m_sources[0].next();
+    }
+    return *m_sinks[0].prev();
+}
+
+std::string PostInferElement::description() const
+{
+    std::stringstream element_description;
+    element_description << "(" << this->name() << " | " << m_transform_context->description() << ")";
+    return element_description.str();
+}
+
+Expected<PipelineBuffer> PostInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+    // Buffers are always taken from the next-pad-downstream
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        return make_unexpected(buffer.status());
+    }
+
+    if (!buffer) {
+        input.set_action_status(buffer.status());
+    }
+    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
+
+    // Note: The latency to be measured starts as the buffer is read from the HW (it's 'input' in this case)
+    buffer->set_metadata(input.get_metadata());
+
+    auto dst = buffer->as_view();
+    m_duration_collector.start_measurement();
+    const auto status = m_transform_context->transform(input.as_view(), dst);
+    m_duration_collector.complete_measurement();
+
+    input.set_action_status(status);
+    buffer->set_action_status(status);
+
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return buffer.release();
+}
+
+Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> RemoveOverlappingBboxesElement::create(
+    const net_flow::NmsPostProcessConfig nms_config, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto convert_nms_removed_overlapping_elem_ptr = make_shared_nothrow<RemoveOverlappingBboxesElement>(std::move(nms_config),
+        name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != convert_nms_removed_overlapping_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", convert_nms_removed_overlapping_elem_ptr->description());
+
+    return convert_nms_removed_overlapping_elem_ptr;
+}
+
+Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> RemoveOverlappingBboxesElement::create(const net_flow::NmsPostProcessConfig nms_config,
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return RemoveOverlappingBboxesElement::create(nms_config, name,
+        build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout, pipeline_direction, async_pipeline);
+}
+
+RemoveOverlappingBboxesElement::RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline),
+    m_nms_config(std::move(nms_config))
+{}
+
+hailo_status RemoveOverlappingBboxesElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
+        "RemoveOverlappingBboxesElement {} does not support run_push operation", name());
+    return FilterElement::run_push(std::move(buffer), sink);
+}
+
+PipelinePad &RemoveOverlappingBboxesElement::next_pad()
+{
+    if (PipelineDirection::PUSH == m_pipeline_direction){
+        return *m_sources[0].next();
+    }
+    return *m_sinks[0].prev();
+}
+
+std::string RemoveOverlappingBboxesElement::description() const
+{
+    std::stringstream element_description;
+    element_description << "(" << this->name();
+    element_description << " | " << "IoU Threshold: " << this->m_nms_config.nms_iou_th << ")";
+    return element_description.str();
+}
+
+Expected<PipelineBuffer> RemoveOverlappingBboxesElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+    // Buffers are always taken from the next-pad-downstream
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        return make_unexpected(buffer.status());
+    }
+
+    if (!buffer) {
+        input.set_action_status(buffer.status());
+    }
+    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
+
+    buffer->set_metadata(input.get_metadata());
+
+    m_duration_collector.start_measurement();
+    auto detections_pipeline_data = input.get_metadata().get_additional_data<IouPipelineData>();
+
+    net_flow::NmsPostProcessOp::remove_overlapping_boxes(detections_pipeline_data->m_detections,
+        detections_pipeline_data->m_detections_classes_count, m_nms_config.nms_iou_th);
+    m_duration_collector.complete_measurement();
+
+    return buffer.release();
+}
+
+Expected<std::shared_ptr<ArgmaxPostProcessElement>> ArgmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> argmax_op,
+    const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+    auto argmax_elem_ptr = make_shared_nothrow<ArgmaxPostProcessElement>(argmax_op,
+        name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != argmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    LOGGER__INFO("Created {}", argmax_elem_ptr->description());
+    return argmax_elem_ptr;
+}
+
+Expected<std::shared_ptr<ArgmaxPostProcessElement>> ArgmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> argmax_op,
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return ArgmaxPostProcessElement::create(argmax_op, name,
+        build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout,
+        pipeline_direction, async_pipeline);
+}
+
+ArgmaxPostProcessElement::ArgmaxPostProcessElement(std::shared_ptr<net_flow::Op> argmax_op, const std::string &name,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline),
+    m_argmax_op(argmax_op)
+{}
+
+Expected<PipelineBuffer> ArgmaxPostProcessElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
+{
+    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
+        "ArgmaxPostProcessElement {} does not support run_pull operation", name());
+    return FilterElement::run_pull(std::move(optional), source);
+}
+
+hailo_status ArgmaxPostProcessElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
+        "ArgmaxPostProcessElement {} does not support run_push operation", name());
+    return FilterElement::run_push(std::move(buffer), sink);
+}
+
+PipelinePad &ArgmaxPostProcessElement::next_pad()
+{
+    if (PipelineDirection::PUSH == m_pipeline_direction){
+        return *m_sources[0].next();
+    }
+    return *m_sinks[0].prev();
+}
+
+std::string ArgmaxPostProcessElement::description() const
+{
+    std::stringstream element_description;
+    element_description << "(" << this->name() << " | " << m_argmax_op->metadata()->get_op_description() << ")";
+    return element_description.str();
+}
+
+Expected<PipelineBuffer> ArgmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+    // Buffers are always taken from the next-pad-downstream
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        return make_unexpected(buffer.status());
+    }
+
+    if (!buffer) {
+        input.set_action_status(buffer.status());
+    }
+    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
+
+    std::map<std::string, MemoryView> inputs;
+    std::map<std::string, MemoryView> outputs;
+    auto &input_name = m_argmax_op->inputs_metadata().begin()->first;
+    auto &output_name = m_argmax_op->outputs_metadata().begin()->first;
+    inputs.insert({input_name, input.as_view()});
+    outputs.insert({output_name, buffer->as_view()});
+    m_duration_collector.start_measurement();
+    auto post_process_result = m_argmax_op->execute(inputs, outputs);
+    m_duration_collector.complete_measurement();
+
+    input.set_action_status(post_process_result);
+    buffer->set_action_status(post_process_result);
+
+    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+
+    return buffer.release();
+}
+
+Expected<std::shared_ptr<SoftmaxPostProcessElement>> SoftmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> softmax_op,
+    const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+    auto softmax_elem_ptr = make_shared_nothrow<SoftmaxPostProcessElement>(softmax_op,
+        name, duration_collector.release(), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != softmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    LOGGER__INFO("Created {}", softmax_elem_ptr->description());
+    return softmax_elem_ptr;
+}
+
+Expected<std::shared_ptr<SoftmaxPostProcessElement>> SoftmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> softmax_op,
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return SoftmaxPostProcessElement::create(softmax_op, name, build_params.elem_stats_flags, build_params.pipeline_status,
+        build_params.timeout, pipeline_direction, async_pipeline);
+}
+
+SoftmaxPostProcessElement::SoftmaxPostProcessElement(std::shared_ptr<net_flow::Op> softmax_op, const std::string &name,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline),
+    m_softmax_op(softmax_op)
+{}
+
+Expected<PipelineBuffer> SoftmaxPostProcessElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
+{
+    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
+        "SoftmaxPostProcessElement {} does not support run_pull operation", name());
+    return FilterElement::run_pull(std::move(optional), source);
+}
+
+hailo_status SoftmaxPostProcessElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
+        "SoftmaxPostProcessElement {} does not support run_push operation", name());
+    return FilterElement::run_push(std::move(buffer), sink);
+}
+
+PipelinePad &SoftmaxPostProcessElement::next_pad()
+{
+    if (PipelineDirection::PUSH == m_pipeline_direction){
+        return *m_sources[0].next();
+    }
+    return *m_sinks[0].prev();
+}
+
+std::string SoftmaxPostProcessElement::description() const
+{
+    std::stringstream element_description;
+    element_description << "(" << this->name() << " | " << m_softmax_op->metadata()->get_op_description() << ")";
+    return element_description.str();
+}
+
+Expected<PipelineBuffer> SoftmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+    // Buffers are always taken from the next-pad-downstream
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        return make_unexpected(buffer.status());
+    }
+
+    if (!buffer) {
+        input.set_action_status(buffer.status());
+    }
+    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
+
+    std::map<std::string, MemoryView> inputs;
+    std::map<std::string, MemoryView> outputs;
+    auto &input_name = m_softmax_op->inputs_metadata().begin()->first;
+    auto &output_name = m_softmax_op->outputs_metadata().begin()->first;
+    inputs.insert({input_name, input.as_view()});
+    outputs.insert({output_name, buffer->as_view()});
+    m_duration_collector.start_measurement();
+    auto post_process_result = m_softmax_op->execute(inputs, outputs);
+    m_duration_collector.complete_measurement();
+
+    input.set_action_status(post_process_result);
+    buffer->set_action_status(post_process_result);
+
+    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+
+    return buffer.release();
+}
+
+Expected<std::shared_ptr<CopyBufferElement>> CopyBufferElement::create(const std::string &name,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+    auto elem_ptr = make_shared_nothrow<CopyBufferElement>(name, duration_collector.release(), std::move(pipeline_status),
+        timeout, pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", elem_ptr->description());
+
+    return elem_ptr;
+}
+
+CopyBufferElement::CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, 
+                                     std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
+                                     PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline)
+{}
+
+PipelinePad &CopyBufferElement::next_pad()
+{
+    if (PipelineDirection::PUSH == m_pipeline_direction){
+        return *m_sources[0].next();
+    }
+    return *m_sinks[0].prev();
+}
+
+Expected<PipelineBuffer> CopyBufferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+{
+    CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be passed to CopyBufferElement!");
+
+    CHECK_AS_EXPECTED(optional.size() == input.size(), HAILO_INVALID_ARGUMENT, "Optional buffer size does not equal to the input buffer size!");
+    memcpy(optional.data(), input.data(), optional.size());
+
+    return std::move(optional);
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp
new file mode 100644
index 00000000..e5f5abfe
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp
@@ -0,0 +1,268 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file filter_elements.hpp
+ * @brief all filter elements in the pipeline (single input, single output).
+ **/
+
+#ifndef _HAILO_FILTER_ELEMENTS_HPP_
+#define _HAILO_FILTER_ELEMENTS_HPP_
+
+#include "net_flow/pipeline/pipeline_internal.hpp"
+
+namespace hailort
+{
+
+class FilterElement : public IntermediateElement
+{
+public:
+    FilterElement(const std::string &name, DurationCollector &&duration_collector,
+                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                  PipelineDirection pipeline_direction, std::chrono::milliseconds timeout,
+                  std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~FilterElement() = default;
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+
+protected:
+    // The optional buffer functions as an output buffer that the user can write to instead of acquiring a new buffer
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) = 0;
+
+    PipelinePad &next_pad_downstream();
+    PipelinePad &next_pad_upstream();
+
+    std::chrono::milliseconds m_timeout;
+};
+
+class PreInferElement : public FilterElement
+{
+public:
+    static Expected<std::shared_ptr<PreInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
+        const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PUSH,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<PreInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
+        const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PUSH, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<PreInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PUSH,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    PreInferElement(std::unique_ptr<InputTransformContext> &&transform_context, const std::string &name, std::chrono::milliseconds timeout,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~PreInferElement() = default;
+
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual PipelinePad &next_pad() override;
+    virtual std::string description() const override;
+
+protected:
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+    std::unique_ptr<InputTransformContext> m_transform_context;
+};
+
+class RemoveOverlappingBboxesElement : public FilterElement
+{
+public:
+    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> create(
+        const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
+        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> create(const net_flow::NmsPostProcessConfig nms_config,
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, 
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, std::chrono::milliseconds timeout,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~RemoveOverlappingBboxesElement() = default;
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual PipelinePad &next_pad() override;
+    virtual std::string description() const override;
+
+    virtual hailo_status set_nms_iou_threshold(float32_t threshold)
+    {
+        m_nms_config.nms_iou_th = threshold;
+        return HAILO_SUCCESS;
+    }
+
+protected:
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+    net_flow::NmsPostProcessConfig m_nms_config;
+};
+
+class PostInferElement : public FilterElement
+{
+public:
+    static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape,
+        const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
+        const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
+        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
+        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
+        const std::vector<hailo_quant_info_t> &dst_quant_info, const hailo_nms_info_t &nms_info,
+        const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape,
+        const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
+        const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
+        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    PostInferElement(std::unique_ptr<OutputTransformContext> &&transform_context, const std::string &name,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~PostInferElement() = default;
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual PipelinePad &next_pad() override;
+    virtual std::string description() const override;
+
+protected:
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+    std::unique_ptr<OutputTransformContext> m_transform_context;
+};
+
+class ConvertNmsToDetectionsElement : public FilterElement
+{
+public:
+    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> create(const hailo_nms_info_t &nms_info, const std::string &name,
+        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> create(
+        const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, std::chrono::milliseconds timeout,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~ConvertNmsToDetectionsElement() = default;
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual PipelinePad &next_pad() override;
+
+protected:
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+    hailo_nms_info_t m_nms_info;
+};
+
+class FillNmsFormatElement : public FilterElement
+{
+public:
+    static Expected<std::shared_ptr<FillNmsFormatElement>> create(const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
+        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<FillNmsFormatElement>> create(const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
+        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, std::chrono::milliseconds timeout,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~FillNmsFormatElement() = default;
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual PipelinePad &next_pad() override;
+
+    virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override
+    {
+        m_nms_config.max_proposals_per_class = max_proposals_per_class;
+        return HAILO_SUCCESS;
+    }
+
+protected:
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+    net_flow::NmsPostProcessConfig m_nms_config;
+};
+
+class ArgmaxPostProcessElement : public FilterElement
+{
+public:
+    static Expected<std::shared_ptr<ArgmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> argmax_op,
+        const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<ArgmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> argmax_op,
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    ArgmaxPostProcessElement(std::shared_ptr<net_flow::Op> argmax_op, const std::string &name,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~ArgmaxPostProcessElement() = default;
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual PipelinePad &next_pad() override;
+    virtual std::string description() const override;
+    
+protected:
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+    std::shared_ptr<net_flow::Op> m_argmax_op;
+};
+
+class SoftmaxPostProcessElement : public FilterElement
+{
+public:
+    static Expected<std::shared_ptr<SoftmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> softmax_op,
+        const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<SoftmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> softmax_op,
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    SoftmaxPostProcessElement(std::shared_ptr<net_flow::Op> softmax_op, const std::string &name,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~SoftmaxPostProcessElement() = default;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual PipelinePad &next_pad() override;
+    virtual std::string description() const override;
+
+protected:
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+
+private:
+    std::shared_ptr<net_flow::Op> m_softmax_op;
+};
+
+class CopyBufferElement : public FilterElement
+{
+public:
+    static Expected<std::shared_ptr<CopyBufferElement>> create(const std::string &name, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual ~CopyBufferElement() = default;
+    virtual PipelinePad &next_pad() override;
+
+protected:
+    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
+};
+
+
+
+} /* namespace hailort */
+
+#endif /* _HAILO_FILTER_ELEMENTS_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
index 8f0d8b26..fa19401a 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
@@ -13,9 +13,12 @@
 #include "hailo/hailort_common.hpp"
 #include "hailo/vdevice.hpp"
 #include "hailo/infer_model.hpp"
+#include "vdevice/vdevice_internal.hpp"
+#include "hef/hef_internal.hpp"
 #include "net_flow/pipeline/infer_model_internal.hpp"
 #include "net_flow/pipeline/async_infer_runner.hpp"
 
+
 #define WAIT_FOR_ASYNC_IN_DTOR_TIMEOUT (std::chrono::milliseconds(10000))
 
 namespace hailort
@@ -86,6 +89,12 @@ void InferModel::InferStream::Impl::set_nms_max_proposals_per_class(uint32_t max
     m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
 }
 
+void InferModel::InferStream::Impl::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size)
+{
+    m_nms_max_accumulated_mask_size = max_accumulated_mask_size;
+    m_vstream_info.nms_shape.max_accumulated_mask_size = max_accumulated_mask_size;
+}
+
 InferModel::InferStream::InferStream(std::shared_ptr<InferModel::InferStream::Impl> pimpl) : m_pimpl(pimpl)
 {
 }
@@ -150,6 +159,11 @@ void InferModel::InferStream::set_nms_max_proposals_per_class(uint32_t max_propo
     m_pimpl->set_nms_max_proposals_per_class(max_proposals_per_class);
 }
 
+void InferModel::InferStream::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size)
+{
+    m_pimpl->set_nms_max_accumulated_mask_size(max_accumulated_mask_size);
+}
+
 InferModel::InferModel(VDevice &vdevice, Hef &&hef, std::unordered_map<std::string, InferModel::InferStream> &&inputs,
         std::unordered_map<std::string, InferModel::InferStream> &&outputs)
     : m_vdevice(vdevice), m_hef(std::move(hef)), m_inputs(std::move(inputs)), m_outputs(std::move(outputs)),
@@ -203,11 +217,8 @@ void InferModel::set_hw_latency_measurement_flags(hailo_latency_measurement_flag
     m_config_params.latency = latency;
 }
 
-// TODO: document that this will check validity of format tpyes/orders
-Expected<ConfiguredInferModel> InferModel::configure(const std::string &network_name)
+Expected<ConfiguredInferModel> InferModel::configure()
 {
-    CHECK_AS_EXPECTED(network_name.empty(), HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!");
-
     auto configure_params = m_vdevice.get().create_configure_params(m_hef);
     CHECK_EXPECTED(configure_params);
 
@@ -227,6 +238,15 @@ Expected<ConfiguredInferModel> InferModel::configure(const std::string &network_
     auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value());
     CHECK_EXPECTED(network_groups);
 
+    CHECK_AS_EXPECTED(1 == network_groups->size(), HAILO_INVALID_HEF,
+        "InferModel expects HEF with a single network group. found {}.", network_groups->size());
+
+    // TODO (HRT-11293) : Remove this check
+    TRY(auto internal_queue_size, network_groups.value()[0]->get_min_buffer_pool_size());
+    CHECK_AS_EXPECTED(internal_queue_size >= m_config_params.batch_size, HAILO_INVALID_OPERATION,
+        "Trying to configure a model with a batch={} bigger than internal_queue_size={}, which is not supported. Try using a smaller batch.",
+            m_config_params.batch_size, internal_queue_size);
+
     std::unordered_map<std::string, hailo_format_t> inputs_formats;
     std::unordered_map<std::string, hailo_format_t> outputs_formats;
 
@@ -249,6 +269,7 @@ Expected<ConfiguredInferModel> InferModel::configure(const std::string &network_
     CHECK_AS_EXPECTED(std::all_of(m_inputs.begin(), m_inputs.end(), [](const auto &input_pair) {
         return ((input_pair.second.m_pimpl->m_nms_score_threshold == INVALID_NMS_CONFIG) &&
                 (input_pair.second.m_pimpl->m_nms_iou_threshold == INVALID_NMS_CONFIG) &&
+                (input_pair.second.m_pimpl->m_nms_max_accumulated_mask_size == static_cast<uint32_t>(INVALID_NMS_CONFIG)) &&
                 (input_pair.second.m_pimpl->m_nms_max_proposals_per_class == static_cast<uint32_t>(INVALID_NMS_CONFIG)));
     }), HAILO_INVALID_OPERATION, "NMS config was changed for input");
 
@@ -256,6 +277,7 @@ Expected<ConfiguredInferModel> InferModel::configure(const std::string &network_
         auto &edge_name = output_pair.first;
         if ((output_pair.second.m_pimpl->m_nms_score_threshold == INVALID_NMS_CONFIG) &&
             (output_pair.second.m_pimpl->m_nms_iou_threshold == INVALID_NMS_CONFIG) &&
+            (output_pair.second.m_pimpl->m_nms_max_accumulated_mask_size == static_cast<uint32_t>(INVALID_NMS_CONFIG)) &&
             (output_pair.second.m_pimpl->m_nms_max_proposals_per_class == static_cast<uint32_t>(INVALID_NMS_CONFIG))) {
                 continue;
             }
@@ -271,25 +293,51 @@ Expected<ConfiguredInferModel> InferModel::configure(const std::string &network_
             auto status = network_groups.value()[0]->set_nms_max_bboxes_per_class(edge_name, output_pair.second.m_pimpl->m_nms_max_proposals_per_class);
             CHECK_SUCCESS_AS_EXPECTED(status);
         }
+        if (output_pair.second.m_pimpl->m_nms_max_accumulated_mask_size != static_cast<uint32_t>(INVALID_NMS_CONFIG)) {
+            auto status = network_groups.value()[0]->set_nms_max_accumulated_mask_size(edge_name, output_pair.second.m_pimpl->m_nms_max_accumulated_mask_size);
+            CHECK_SUCCESS_AS_EXPECTED(status);
+        }
     }
 
     auto configured_infer_model_pimpl = ConfiguredInferModelImpl::create(network_groups.value()[0], inputs_formats, outputs_formats,
-        get_input_names(), get_output_names());
+        get_input_names(), get_output_names(), m_vdevice);
     CHECK_EXPECTED(configured_infer_model_pimpl);
 
+    // The hef buffer is being used only when working with the service.
+    // TODO HRT-12636 - Besides clearing the hef buffer, clear also unnecessary members of Hef object.
+    // After HRT-12636 is done - The user can configure an infer model only once, with or without the service.
+    m_hef.pimpl->clear_hef_buffer();
+
     return ConfiguredInferModel(configured_infer_model_pimpl.release());
 }
 
 Expected<ConfiguredInferModel> InferModel::configure_for_ut(std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner,
-    const std::vector<std::string> &input_names, const std::vector<std::string> &output_names)
+    const std::vector<std::string> &input_names, const std::vector<std::string> &output_names,
+    std::shared_ptr<ConfiguredNetworkGroup> net_group)
 {
-    auto configure_params = m_vdevice.get().create_configure_params(m_hef);
-    CHECK_EXPECTED(configure_params);
+    if (nullptr == net_group) {
+        auto configure_params = m_vdevice.get().create_configure_params(m_hef);
+        CHECK_EXPECTED(configure_params);
 
-    auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value());
-    CHECK_EXPECTED(network_groups);
+        for (auto &network_group_name_params_pair : *configure_params) {
+            for (auto &stream_params_name_pair : network_group_name_params_pair.second.stream_params_by_name) {
+                stream_params_name_pair.second.flags = HAILO_STREAM_FLAGS_ASYNC;
+            }
 
-    auto configured_infer_model_pimpl = ConfiguredInferModelImpl::create_for_ut(network_groups.value()[0], async_infer_runner, input_names, output_names);
+            for (auto &network_name_params_pair : network_group_name_params_pair.second.network_params_by_name) {
+                network_name_params_pair.second.batch_size = m_config_params.batch_size;
+            }
+
+            network_group_name_params_pair.second.power_mode = m_config_params.power_mode;
+            network_group_name_params_pair.second.latency = m_config_params.latency;
+        }
+
+        auto network_groups = m_vdevice.get().configure(m_hef, configure_params.value());
+        CHECK_EXPECTED(network_groups);
+        net_group = network_groups.value()[0];
+    }
+
+    auto configured_infer_model_pimpl = ConfiguredInferModelImpl::create_for_ut(net_group, async_infer_runner, input_names, output_names);
     CHECK_EXPECTED(configured_infer_model_pimpl);
 
     return ConfiguredInferModel(configured_infer_model_pimpl.release());
@@ -378,9 +426,9 @@ Expected<AsyncInferJob> ConfiguredInferModel::run_async(ConfiguredInferModel::Bi
     return m_pimpl->run_async(bindings, callback);
 }
 
-Expected<LatencyMeasurementResult> ConfiguredInferModel::get_hw_latency_measurement(const std::string &network_name)
+Expected<LatencyMeasurementResult> ConfiguredInferModel::get_hw_latency_measurement()
 {
-    return m_pimpl->get_hw_latency_measurement(network_name);
+    return m_pimpl->get_hw_latency_measurement();
 }
 
 hailo_status ConfiguredInferModel::set_scheduler_timeout(const std::chrono::milliseconds &timeout)
@@ -403,14 +451,31 @@ Expected<size_t> ConfiguredInferModel::get_async_queue_size()
     return m_pimpl->get_async_queue_size();
 }
 
+void ConfiguredInferModel::shutdown()
+{
+    m_pimpl->abort();
+}
+
 Expected<std::shared_ptr<ConfiguredInferModelImpl>> ConfiguredInferModelImpl::create(std::shared_ptr<ConfiguredNetworkGroup> net_group,
     const std::unordered_map<std::string, hailo_format_t> &inputs_formats,
     const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
-    const std::vector<std::string> &input_names, const std::vector<std::string> &output_names, const uint32_t timeout)
+    const std::vector<std::string> &input_names, const std::vector<std::string> &output_names, VDevice &vdevice, const uint32_t timeout)
 {
     auto async_infer_runner = AsyncInferRunnerImpl::create(net_group, inputs_formats, outputs_formats, timeout);
     CHECK_EXPECTED(async_infer_runner);
 
+    auto &hw_elem = async_infer_runner.value()->get_async_pipeline()->get_async_hw_element();
+    for (auto &pool : hw_elem->get_hw_interacted_buffer_pools_h2d()) {
+        if (!pool->is_holding_user_buffers()) {
+            CHECK_SUCCESS_AS_EXPECTED(pool->map_to_vdevice(vdevice, HAILO_DMA_BUFFER_DIRECTION_H2D));
+        }
+    }
+    for (auto &pool : hw_elem->get_hw_interacted_buffer_pools_d2h()) {
+        if (!pool->is_holding_user_buffers()) {
+            CHECK_SUCCESS_AS_EXPECTED(pool->map_to_vdevice(vdevice, HAILO_DMA_BUFFER_DIRECTION_D2H));
+        }
+    }
+
     auto configured_infer_model_pimpl = make_shared_nothrow<ConfiguredInferModelImpl>(net_group, async_infer_runner.release(),
         input_names, output_names);
     CHECK_NOT_NULL_AS_EXPECTED(configured_infer_model_pimpl, HAILO_OUT_OF_HOST_MEMORY);
@@ -527,14 +592,48 @@ hailo_status ConfiguredInferModelImpl::run(ConfiguredInferModel::Bindings bindin
 hailo_status ConfiguredInferModelImpl::validate_bindings(ConfiguredInferModel::Bindings bindings)
 {
     for (const auto &input_name : m_input_names) {
-        if (BufferType::VIEW == bindings.input(input_name)->m_pimpl->get_type()) {
-            CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_buffer());
-        } else {
-            CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_pix_buffer());
+        auto buffer_type = bindings.input(input_name)->m_pimpl->get_type();
+        switch (buffer_type) {
+            case BufferType::VIEW:
+            {
+                CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_buffer());
+                break;
+            }
+            case BufferType::PIX_BUFFER:
+            {
+                CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_pix_buffer());
+                break;
+            }
+            case BufferType::DMA_BUFFER:
+            {
+                CHECK_EXPECTED_AS_STATUS(bindings.input(input_name)->get_dma_buffer());
+                break;
+            }
+            default:
+                CHECK(false, HAILO_NOT_FOUND, "Couldnt find input buffer for '{}'", input_name);
         }
     }
     for (const auto &output_name : m_output_names) {
-        CHECK_EXPECTED_AS_STATUS(bindings.output(output_name)->get_buffer());
+        auto buffer_type = bindings.output(output_name)->m_pimpl->get_type();
+        switch (buffer_type) {
+            case BufferType::VIEW:
+            {
+                CHECK_EXPECTED_AS_STATUS(bindings.output(output_name)->get_buffer());
+                break;
+            }
+            case BufferType::PIX_BUFFER:
+            {
+                CHECK(false, HAILO_NOT_SUPPORTED, "pix_buffer isn't supported for outputs in '{}'", output_name);
+                break;
+            }
+            case BufferType::DMA_BUFFER:
+            {
+                CHECK_EXPECTED_AS_STATUS(bindings.output(output_name)->get_dma_buffer());
+                break;
+            }
+            default:
+                CHECK(false, HAILO_NOT_FOUND, "Couldnt find output buffer for '{}'", output_name);
+        }
     }
 
     return HAILO_SUCCESS;
@@ -547,62 +646,40 @@ Expected<AsyncInferJob> ConfiguredInferModelImpl::run_async(ConfiguredInferModel
 
     auto job_pimpl = make_shared_nothrow<AsyncInferJob::Impl>(static_cast<uint32_t>(m_input_names.size() + m_output_names.size()));
     CHECK_NOT_NULL_AS_EXPECTED(job_pimpl, HAILO_OUT_OF_HOST_MEMORY);
-    AsyncInferJob job(job_pimpl);
 
     TransferDoneCallbackAsyncInfer transfer_done = [this, bindings, job_pimpl, callback](hailo_status status) {
         bool should_call_callback = job_pimpl->stream_done(status);
         if (should_call_callback) {
-            {
-                std::unique_lock<std::mutex> lock(m_mutex);
-                m_ongoing_parallel_transfers--;
-            }
-            m_cv.notify_all();
-
             auto final_status = (m_async_infer_runner->get_pipeline_status() == HAILO_SUCCESS) ?
                 job_pimpl->completion_status() : m_async_infer_runner->get_pipeline_status();
 
-            AsyncInferCompletionInfo completion_info(bindings, final_status);
+            AsyncInferCompletionInfo completion_info(final_status);
             callback(completion_info);
             job_pimpl->mark_callback_done();
-        }
-    };
 
-    for (const auto &input_name : m_input_names) {
-        auto buff_type = bindings.input(input_name)->m_pimpl->get_type();
-        if (BufferType::VIEW == buff_type) {
-            auto buffer = bindings.input(input_name)->get_buffer();
-            CHECK_EXPECTED(buffer, "Couldnt find input buffer for '{}'", input_name);
-            m_async_infer_runner->set_input(input_name, buffer.release(), transfer_done);
-        } else if (BufferType::PIX_BUFFER == buff_type) {
-            auto buffer = bindings.input(input_name)->get_pix_buffer();
-            CHECK_EXPECTED(buffer, "Couldnt find input buffer for '{}'", input_name);
-            m_async_infer_runner->set_input(input_name, buffer.release(), transfer_done);
-        } else {
-            CHECK_AS_EXPECTED(false, HAILO_NOT_FOUND, "Couldnt find input buffer for '{}'", input_name);
+            {
+                std::unique_lock<std::mutex> lock(m_mutex);
+                m_ongoing_parallel_transfers--;
+            }
+            m_cv.notify_all();
         }
-    }
-
-    for (const auto &output_name : m_output_names) {
-        auto buffer = bindings.output(output_name)->get_buffer();
-        CHECK_EXPECTED(buffer, "Couldnt find output buffer for '{}'", output_name);
-        m_async_infer_runner->set_output(output_name, buffer.release(), transfer_done);
-    }
+    };
 
     {
         std::unique_lock<std::mutex> lock(m_mutex);
-        auto status = m_async_infer_runner->async_infer();
+        auto status = m_async_infer_runner->run(bindings, transfer_done);
         CHECK_SUCCESS_AS_EXPECTED(status);
         m_ongoing_parallel_transfers++;
     }
-
     m_cv.notify_all();
 
+    AsyncInferJob job(job_pimpl);
     return job;
 }
 
-Expected<LatencyMeasurementResult> ConfiguredInferModelImpl::get_hw_latency_measurement(const std::string &network_name)
+Expected<LatencyMeasurementResult> ConfiguredInferModelImpl::get_hw_latency_measurement()
 {
-    return m_cng->get_latency_measurement(network_name);
+    return m_cng->get_latency_measurement();
 }
 
 hailo_status ConfiguredInferModelImpl::set_scheduler_timeout(const std::chrono::milliseconds &timeout)
@@ -683,7 +760,7 @@ hailo_status AsyncInferJob::Impl::wait(std::chrono::milliseconds timeout)
     bool was_successful = m_cv.wait_for(lock, timeout, [this] () -> bool {
         return (m_callback_called);
     });
-    CHECK(was_successful, HAILO_TIMEOUT, "Waiting for async job to finish has failed with timeout {}!", timeout.count());
+    CHECK(was_successful, HAILO_TIMEOUT, "Waiting for async job to finish has failed with timeout ({}ms)", timeout.count());
 
     return HAILO_SUCCESS;
 }
@@ -762,7 +839,7 @@ hailo_status ConfiguredInferModel::Bindings::InferStream::Impl::set_buffer(Memor
     return HAILO_SUCCESS;
 }
 
-Expected<MemoryView> ConfiguredInferModel::Bindings::InferStream::Impl::get_buffer()
+Expected<MemoryView> ConfiguredInferModel::Bindings::InferStream::Impl::get_buffer() const
 {
     CHECK_AS_EXPECTED(BufferType::VIEW == m_buffer_type, HAILO_INVALID_OPERATION,
         "Trying to get buffer as view for '{}', while it is not configured as view", m_name);
@@ -785,6 +862,22 @@ Expected<hailo_pix_buffer_t> ConfiguredInferModel::Bindings::InferStream::Impl::
     return cp;
 }
 
+hailo_status ConfiguredInferModel::Bindings::InferStream::Impl::set_dma_buffer(hailo_dma_buffer_t dma_buffer)
+{
+    m_buffer_type = BufferType::DMA_BUFFER;
+    m_dma_buffer = dma_buffer;
+
+    return HAILO_SUCCESS;
+}
+
+Expected<hailo_dma_buffer_t> ConfiguredInferModel::Bindings::InferStream::Impl::get_dma_buffer()
+{
+    CHECK_AS_EXPECTED(BufferType::DMA_BUFFER == m_buffer_type, HAILO_INVALID_OPERATION,
+        "Trying to get buffer as dma_buffer for '{}', while it is not configured as dma_buffer", m_name);
+    auto cp = m_dma_buffer;
+    return cp;
+}
+
 BufferType ConfiguredInferModel::Bindings::InferStream::Impl::get_type()
 {
     return m_buffer_type;
@@ -809,6 +902,11 @@ hailo_status ConfiguredInferModel::Bindings::InferStream::set_pix_buffer(const h
     return m_pimpl->set_pix_buffer(pix_buffer);
 }
 
+hailo_status ConfiguredInferModel::Bindings::InferStream::set_dma_buffer(hailo_dma_buffer_t dma_buffer)
+{
+    return m_pimpl->set_dma_buffer(dma_buffer);
+}
+
 Expected<MemoryView> ConfiguredInferModel::Bindings::InferStream::get_buffer()
 {
     return m_pimpl->get_buffer();
@@ -819,4 +917,9 @@ Expected<hailo_pix_buffer_t> ConfiguredInferModel::Bindings::InferStream::get_pi
     return m_pimpl->get_pix_buffer();
 }
 
+Expected<hailo_dma_buffer_t> ConfiguredInferModel::Bindings::InferStream::get_dma_buffer()
+{
+    return m_pimpl->get_dma_buffer();
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
index bdea8116..7f40dead 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
@@ -23,9 +23,11 @@ class ConfiguredInferModel::Bindings::InferStream::Impl
 public:
     Impl(const hailo_vstream_info_t &vstream_info);
     hailo_status set_buffer(MemoryView view);
-    Expected<MemoryView> get_buffer();
+    Expected<MemoryView> get_buffer() const;
     hailo_status set_pix_buffer(const hailo_pix_buffer_t &pix_buffer);
     Expected<hailo_pix_buffer_t> get_pix_buffer();
+    hailo_status set_dma_buffer(hailo_dma_buffer_t dma_buffer);
+    Expected<hailo_dma_buffer_t> get_dma_buffer();
     BufferType get_type();
 
     void set_stream_callback(TransferDoneCallbackAsyncInfer callback);
@@ -36,6 +38,7 @@ class ConfiguredInferModel::Bindings::InferStream::Impl
     union {
         MemoryView m_view;
         hailo_pix_buffer_t m_pix_buffer;
+        hailo_dma_buffer_t m_dma_buffer;
     };
     TransferDoneCallbackAsyncInfer m_stream_callback;
 };
@@ -45,8 +48,10 @@ class InferModel::InferStream::Impl
 public:
     Impl(const hailo_vstream_info_t &vstream_info) : m_vstream_info(vstream_info), m_user_buffer_format(vstream_info.format),
         m_nms_score_threshold(static_cast<float32_t>(INVALID_NMS_CONFIG)), m_nms_iou_threshold(static_cast<float32_t>(INVALID_NMS_CONFIG)),
-        m_nms_max_proposals_per_class(static_cast<uint32_t>(INVALID_NMS_CONFIG))
-    {}
+        m_nms_max_proposals_per_class(static_cast<uint32_t>(INVALID_NMS_CONFIG)), m_nms_max_accumulated_mask_size(static_cast<uint32_t>(INVALID_NMS_CONFIG))
+    {
+        m_user_buffer_format.flags = HAILO_FORMAT_FLAGS_NONE; // Init user's format flags to NONE for transposed models
+    }
 
     std::string name() const;
     hailo_3d_image_shape_t shape() const;
@@ -61,6 +66,7 @@ class InferModel::InferStream::Impl
     void set_nms_score_threshold(float32_t threshold);
     void set_nms_iou_threshold(float32_t threshold);
     void set_nms_max_proposals_per_class(uint32_t max_proposals_per_class);
+    void set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size);
 
 private:
     friend class InferModel;
@@ -71,6 +77,7 @@ class InferModel::InferStream::Impl
     float32_t m_nms_score_threshold;
     float32_t m_nms_iou_threshold;
     uint32_t m_nms_max_proposals_per_class;
+    uint32_t m_nms_max_accumulated_mask_size;
 };
 
 class AsyncInferJob::Impl
@@ -95,7 +102,8 @@ class ConfiguredInferModelImpl
 public:
     static Expected<std::shared_ptr<ConfiguredInferModelImpl>> create(std::shared_ptr<ConfiguredNetworkGroup> net_group,
         const std::unordered_map<std::string, hailo_format_t> &inputs_formats, const std::unordered_map<std::string, hailo_format_t> &outputs_formats,
-        const std::vector<std::string> &input_names, const std::vector<std::string> &output_names, const uint32_t timeout = HAILO_DEFAULT_VSTREAM_TIMEOUT_MS);
+        const std::vector<std::string> &input_names, const std::vector<std::string> &output_names, VDevice &vdevice,
+        const uint32_t timeout = HAILO_DEFAULT_VSTREAM_TIMEOUT_MS);
 
     ConfiguredInferModelImpl(std::shared_ptr<ConfiguredNetworkGroup> cng,
         std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner,
@@ -110,7 +118,7 @@ class ConfiguredInferModelImpl
     hailo_status run(ConfiguredInferModel::Bindings bindings, std::chrono::milliseconds timeout);
     Expected<AsyncInferJob> run_async(ConfiguredInferModel::Bindings bindings,
         std::function<void(const AsyncInferCompletionInfo &)> callback);
-    Expected<LatencyMeasurementResult> get_hw_latency_measurement(const std::string &network_name);
+    Expected<LatencyMeasurementResult> get_hw_latency_measurement();
     hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout);
     hailo_status set_scheduler_threshold(uint32_t threshold);
     hailo_status set_scheduler_priority(uint8_t priority);
diff --git a/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp
index 0cc4ad60..46d45829 100644
--- a/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp
@@ -218,7 +218,7 @@ hailo_status InferVStreams::infer(const std::map<std::string, MemoryView>& input
                     auto status = input_vstream.write(MemoryView::create_const(
                         input_buffer.data() + offset,
                         input_vstream.get_frame_size()));
-                    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+                    if (HAILO_STREAM_ABORT == status) {
                         LOGGER__DEBUG("Input stream was aborted!");
                         return status;
                     }
@@ -249,7 +249,7 @@ hailo_status InferVStreams::infer(const std::map<std::string, MemoryView>& input
     auto error_status = HAILO_SUCCESS;
     for (auto& result : results) {
         status = result->get();
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             continue;
         }
         if (HAILO_SUCCESS != status) {
@@ -389,4 +389,19 @@ hailo_status InferVStreams::set_nms_max_proposals_per_class(uint32_t max_proposa
     return HAILO_SUCCESS;
 }
 
+hailo_status InferVStreams::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size)
+{
+    auto has_mask_output = false;
+    for (auto &ouput_vstream : m_outputs) {
+        if (HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == ouput_vstream.get_info().format.order) {
+            has_mask_output = true;
+            CHECK_SUCCESS(ouput_vstream.set_nms_max_accumulated_mask_size(max_accumulated_mask_size));
+        }
+    }
+    CHECK(has_mask_output, HAILO_INVALID_OPERATION,
+        "'set_nms_max_accumulated_mask_size()' is called, but there is no NMS WITH BYTE MASK output in this model.");
+
+    return HAILO_SUCCESS;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp
new file mode 100644
index 00000000..54260eef
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp
@@ -0,0 +1,993 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file multi_io_elements.cpp
+ * @brief Implementation of the multiple input/outputs elements
+ **/
+
+#include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/pipeline/multi_io_elements.hpp"
+
+namespace hailort
+{
+
+BaseMuxElement::BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline, hailo_status &status) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_timeout(timeout)
+{
+    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+    m_sinks.reserve(sink_count);
+    for (uint32_t i = 0; i < sink_count; ++i) {
+        m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+        m_sink_name_to_index[m_sinks[i].name()] = i;
+    }
+    m_barrier = make_shared_nothrow<Barrier>(sink_count);
+    if (nullptr == m_barrier) {
+        status = HAILO_OUT_OF_HOST_MEMORY;
+        return;
+    }
+    status = HAILO_SUCCESS;
+}
+
+std::vector<PipelinePad*> BaseMuxElement::execution_pads()
+{
+    if (m_next_pads.size() == 0) {
+        if (PipelineDirection::PUSH == m_pipeline_direction) {
+            m_next_pads.reserve(m_sources.size());
+            for (auto &source : m_sources ) {
+                m_next_pads.push_back(source.next());
+            }
+        } else {
+            m_next_pads.reserve(m_sinks.size());
+            for (auto &sink : m_sinks ) {
+                m_next_pads.push_back(sink.prev());
+            }
+        }
+    }
+    return m_next_pads;
+}
+
+hailo_status BaseMuxElement::execute_terminate(hailo_status error_status)
+{
+    if (m_is_terminated) {
+        return HAILO_SUCCESS;
+    }
+
+    auto terminate_status = PipelineElement::execute_terminate(error_status);
+
+    {
+        // Ensuring nothing currently runs
+        std::unique_lock<std::mutex> lock(m_mutex);
+    }
+    m_barrier->terminate();
+
+    CHECK_SUCCESS(terminate_status);
+
+    return HAILO_SUCCESS;
+}
+
+
+hailo_status BaseMuxElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+void BaseMuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    assert(PipelineDirection::PUSH == m_pipeline_direction);
+    assert(m_next_pads.size() == 1);
+
+    m_barrier->arrive_and_wait();
+    if (HAILO_SUCCESS == m_pipeline_status->load()) {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_input_buffers[sink.name()] = std::move(buffer);
+        if (m_input_buffers.size() == m_sink_name_to_index.size()) { // Last sink to set its buffer
+
+            for (auto &input_buffer : m_input_buffers) {
+                if (HAILO_SUCCESS != input_buffer.second.action_status()) {
+                    handle_non_recoverable_async_error(input_buffer.second.action_status());
+                    m_input_buffers.clear();
+                    m_barrier->terminate();
+                    return;
+                }
+            }
+
+            std::vector<PipelineBuffer> input_buffers(m_input_buffers.size());
+            for (auto &input_buffer : m_input_buffers) {
+                input_buffers[m_sink_name_to_index[input_buffer.first]] = std::move(input_buffer.second);
+            }
+
+            auto output = action(std::move(input_buffers), PipelineBuffer());
+            if (HAILO_SUCCESS == output.status()) {
+                m_next_pads[0]->run_push_async(output.release());
+            } else {
+                m_next_pads[0]->run_push_async(PipelineBuffer(output.status()));
+            }
+
+            m_input_buffers.clear();
+        }
+    }
+}
+
+Expected<PipelineBuffer> BaseMuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
+{
+    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
+        "PostInferElement {} does not support run_pull operation", name());
+    std::vector<PipelineBuffer> inputs;
+    inputs.reserve(m_sinks.size());
+    for (auto &sink : m_sinks) {
+        auto buffer = sink.prev()->run_pull();
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+            return make_unexpected(buffer.status());
+        }
+        CHECK_EXPECTED(buffer);
+
+        inputs.push_back(buffer.release());
+    }
+
+    auto output = action(std::move(inputs), std::move(optional));
+    CHECK_EXPECTED(output);
+
+    return output;
+}
+
+Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
+    const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    assert(nms_op->outputs_metadata().size() == 1);
+    auto vstream_info = nms_op->metadata()->get_output_vstream_info();
+    CHECK_EXPECTED(vstream_info);
+
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    hailo_status status = HAILO_UNINITIALIZED;
+    auto nms_elem_ptr = make_shared_nothrow<NmsPostProcessMuxElement>(nms_op, name, timeout,
+        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, status);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+    CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", nms_elem_ptr->description());
+    return nms_elem_ptr;
+}
+
+Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return NmsPostProcessMuxElement::create(nms_op, name, build_params.timeout,
+        build_params.elem_stats_flags,
+        build_params.pipeline_status, pipeline_direction, async_pipeline);
+}
+
+Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
+       const std::string &name, const hailo_vstream_params_t &vstream_params,
+       std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction,
+       std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return NmsPostProcessMuxElement::create(nms_op, name, std::chrono::milliseconds(vstream_params.timeout_ms),
+        vstream_params.pipeline_elements_stats_flags,
+        pipeline_status, pipeline_direction, async_pipeline);
+}
+
+NmsPostProcessMuxElement::NmsPostProcessMuxElement(std::shared_ptr<net_flow::Op> nms_op,
+    const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline, hailo_status &status) :
+    BaseMuxElement(nms_op->inputs_metadata().size(), name, timeout, std::move(duration_collector), std::move(pipeline_status),
+        pipeline_direction, async_pipeline, status),
+    m_nms_op(nms_op)
+{}
+
+Expected<PipelineBuffer> NmsPostProcessMuxElement::action(std::vector<PipelineBuffer> &&input_buffers, PipelineBuffer &&optional)
+{
+    std::map<std::string, MemoryView> inputs;
+    std::map<std::string, MemoryView> outputs;
+    for (size_t i = 0; i < input_buffers.size(); ++i) {
+        inputs.insert({m_sinks_names[i], input_buffers[i].as_view()});
+    }
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto acquired_buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
+        return make_unexpected(acquired_buffer.status());
+    }
+
+    if (!acquired_buffer) {
+        for (auto &input : input_buffers) {
+            input.set_action_status(acquired_buffer.status());
+        }
+    }
+    CHECK_EXPECTED(acquired_buffer);
+    outputs.insert({"", acquired_buffer->as_view()}); // TODO: fill with correct name
+    m_duration_collector.start_measurement();
+
+    auto post_process_result = m_nms_op->execute(inputs, outputs);
+    m_duration_collector.complete_measurement();
+
+    for (auto &input : input_buffers) {
+        input.set_action_status(post_process_result);
+    }
+    acquired_buffer->set_action_status(post_process_result);
+
+    if (post_process_result != HAILO_INSUFFICIENT_BUFFER) {
+        // In YOLOv5-Seg there is an option for the user to change the frame size.
+        // Therefore we want to return an error status if the buffer is not big enough for all the detections found.
+        // We return the actual buffer and the error status,
+        // so the user will be able to choose if the change the frame_size or ignore the rest of the detections.
+        CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+    }
+    return acquired_buffer;
+}
+
+std::string NmsPostProcessMuxElement::description() const
+{
+    std::stringstream element_description;
+    element_description << "(" << this->name() << " | " << m_nms_op->metadata()->get_op_description() << ")";
+    return element_description.str();
+}
+
+static hailo_nms_info_t fuse_nms_info(const std::vector<hailo_nms_info_t> &nms_infos)
+{
+    hailo_nms_info_t fused_info = nms_infos[0];
+    fused_info.is_defused = false;
+    fused_info.number_of_classes = 0;
+    for (const auto &nms_info : nms_infos) {
+        fused_info.number_of_classes += nms_info.number_of_classes;
+        assert(nms_infos[0].max_bboxes_per_class == nms_info.max_bboxes_per_class);
+        assert(nms_infos[0].bbox_size == nms_info.bbox_size);
+        assert(nms_infos[0].chunks_per_frame == nms_info.chunks_per_frame);
+        assert(nms_infos[0].burst_size == nms_info.burst_size);
+        assert(nms_infos[0].burst_type == nms_info.burst_type);
+    }
+    return fused_info;
+}
+
+Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos,
+    const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    const auto &fused_info = fuse_nms_info(nms_infos);
+
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto status = HAILO_UNINITIALIZED;
+    auto nms_elem_ptr = make_shared_nothrow<NmsMuxElement>(nms_infos, fused_info, name, timeout,
+        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, status);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+    CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    LOGGER__INFO("Created {}", nms_elem_ptr->description());
+
+    return nms_elem_ptr;
+}
+
+Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos, const std::string &name,
+    const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return NmsMuxElement::create(nms_infos, name, std::chrono::milliseconds(vstream_params.timeout_ms),
+        vstream_params.pipeline_elements_stats_flags, pipeline_status, pipeline_direction,
+        async_pipeline);
+}
+
+Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos,
+    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return NmsMuxElement::create(nms_infos, name, build_params.timeout, build_params.elem_stats_flags,
+        build_params.pipeline_status, pipeline_direction, async_pipeline);
+}
+
+NmsMuxElement::NmsMuxElement(const std::vector<hailo_nms_info_t> &nms_infos, const hailo_nms_info_t &fused_nms_info,
+    const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline, hailo_status &status) :
+    BaseMuxElement(nms_infos.size(), name, timeout, std::move(duration_collector), std::move(pipeline_status), pipeline_direction,
+        async_pipeline, status),
+    m_nms_infos(nms_infos),
+    m_fused_nms_info(fused_nms_info)
+{}
+
+const hailo_nms_info_t &NmsMuxElement::get_fused_nms_info() const
+{
+    return m_fused_nms_info;
+}
+
+Expected<PipelineBuffer> NmsMuxElement::action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional)
+{
+    std::vector<MemoryView> input_views;
+
+    input_views.reserve(inputs.size());
+    for (auto &input_buf : inputs) {
+        input_views.push_back(input_buf.as_view());
+    }
+    auto pool = next_pad_downstream().element().get_buffer_pool();
+    assert(pool);
+
+    auto acquired_buffer = pool->get_available_buffer(std::move(optional), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
+        return make_unexpected(acquired_buffer.status());
+    }
+
+    if (!acquired_buffer) {
+        for (auto &input : inputs) {
+            input.set_action_status(acquired_buffer.status());
+        }
+    }    
+    CHECK_AS_EXPECTED(HAILO_TIMEOUT != acquired_buffer.status(), HAILO_TIMEOUT,
+        "{} failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count());
+    CHECK_EXPECTED(acquired_buffer);
+
+    m_duration_collector.start_measurement();
+    const auto status = fuse_buffers(input_views, m_nms_infos, acquired_buffer.value().as_view());
+    m_duration_collector.complete_measurement();
+
+    for (auto &input : inputs) {
+        input.set_action_status(status);
+    }
+    acquired_buffer->set_action_status(status);
+
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return acquired_buffer.release();
+}
+
+BaseDemuxElement::BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_timeout(timeout),
+    m_is_activated(false),
+    m_was_stream_aborted(false),
+    m_source_name_to_index(),
+    m_was_source_called(source_count, false),
+    m_buffers_for_action()
+{
+    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+    m_sources.reserve(source_count);
+    for (uint32_t i = 0; i < source_count; i++) {
+        m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+        m_source_name_to_index[m_sources[i].name()] = i;
+    }
+}
+
+hailo_status BaseDemuxElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
+        "BaseDemuxElement {} does not support run_push operation", name());
+
+    auto outputs = action(std::move(buffer));
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) {
+        return outputs.status();
+    }
+    CHECK_EXPECTED_AS_STATUS(outputs);
+
+    for (const auto &pad : execution_pads()) {
+        assert(m_source_name_to_index.count(pad->prev()->name()) > 0);
+        auto source_index = m_source_name_to_index[pad->prev()->name()];
+        auto status = pad->run_push(std::move(outputs.value()[source_index]));
+
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+            LOGGER__INFO("run_push of {} was shutdown!", name());
+            return status;
+        }
+        if (HAILO_STREAM_ABORT == status) {
+            LOGGER__INFO("run_push of {} was aborted!", name());
+            return status;
+        }
+        CHECK_SUCCESS(status);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+void BaseDemuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    assert(PipelineDirection::PUSH == m_pipeline_direction);
+    if (HAILO_SUCCESS != buffer.action_status()) {
+        for (const auto &pad : execution_pads()) {
+            auto source_index = m_source_name_to_index[pad->prev()->name()];
+            auto pool = m_sources[source_index].next()->element().get_buffer_pool();
+            assert(pool);
+
+            auto acquired_buffer = pool->acquire_buffer(m_timeout);
+            if (HAILO_SUCCESS == acquired_buffer.status()) {
+                acquired_buffer->set_action_status(buffer.action_status());
+                pad->run_push_async(acquired_buffer.release());
+            } else {
+                handle_non_recoverable_async_error(acquired_buffer.status());
+            }
+        }
+        return;
+    }
+
+    auto outputs = action(std::move(buffer));
+
+    for (const auto &pad : execution_pads()) {
+        assert(m_source_name_to_index.count(pad->prev()->name()) > 0);
+        auto source_index = m_source_name_to_index[pad->prev()->name()];
+        if (HAILO_SUCCESS == outputs.status()) {
+            pad->run_push_async(std::move(outputs.value()[source_index]));
+        } else {
+            pad->run_push_async(PipelineBuffer(outputs.status()));
+        }
+    }
+}
+
+Expected<PipelineBuffer> BaseDemuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
+{
+    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
+        "BaseDemuxElement {} does not support run_pull operation", name());
+
+    CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in demux element!");
+
+    std::unique_lock<std::mutex> lock(m_mutex);
+    if (!m_is_activated) {
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+
+    if (m_was_stream_aborted) {
+        return make_unexpected(HAILO_STREAM_ABORT);
+    }
+
+    m_was_source_called[m_source_name_to_index[source.name()]] = true;
+
+    if (were_all_srcs_arrived()) {
+        // If all srcs arrived, execute the demux
+        auto input = execution_pads()[0]->run_pull();
+        if (HAILO_STREAM_ABORT == input.status()) {
+            LOGGER__INFO("run_pull of demux element was aborted!");
+            m_was_stream_aborted = true;
+            lock.unlock();
+            m_cv.notify_all();
+            return make_unexpected(input.status());
+        }
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == input.status()) {
+            LOGGER__INFO("run_pull of demux element was aborted in {} because pipeline deactivated!", name());
+            m_is_activated = false;
+            lock.unlock();
+            m_cv.notify_all();
+            return make_unexpected(input.status());
+        }
+        CHECK_EXPECTED(input);
+
+        auto outputs = action(input.release());
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) {
+            LOGGER__INFO("run_pull of demux element was aborted in {} because pipeline deactivated!", name());
+            m_is_activated = false;
+            lock.unlock();
+            m_cv.notify_all();
+            return make_unexpected(outputs.status());
+        }
+        CHECK_EXPECTED(outputs);
+
+        m_buffers_for_action = outputs.release();
+
+        for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
+            m_was_source_called[i] = false;
+        }
+
+        // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
+        lock.unlock();
+        m_cv.notify_all();
+    } else {
+        // If not all srcs arrived, wait until m_was_source_called is false (set to false after the demux execution)
+        auto wait_successful = m_cv.wait_for(lock, m_timeout, [&](){
+            return !m_was_source_called[m_source_name_to_index[source.name()]] || m_was_stream_aborted || !m_is_activated;
+        });
+        CHECK_AS_EXPECTED(wait_successful, HAILO_TIMEOUT, "Waiting for other threads in demux {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
+
+        if (m_was_stream_aborted) {
+            lock.unlock();
+            m_cv.notify_all();
+            return make_unexpected(HAILO_STREAM_ABORT);
+        }
+
+        // We check if the element is not activated in case notify_all() was called from deactivate()
+        if (!m_is_activated) {
+            lock.unlock();
+            m_cv.notify_all();
+            return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+        }
+    }
+
+    assert(m_source_name_to_index[source.name()] < m_buffers_for_action.size());
+    return std::move(m_buffers_for_action[m_source_name_to_index[source.name()]]);
+}
+
+bool BaseDemuxElement::were_all_srcs_arrived()
+{
+    return std::all_of(m_was_source_called.begin(), m_was_source_called.end(), [](bool v) { return v; });
+}
+
+hailo_status BaseDemuxElement::execute_activate()
+{
+    if (m_is_activated) {
+        return HAILO_SUCCESS;
+    }
+    m_is_activated = true;// TODO Should this always be true, no matter the status of source().activate()?
+    m_was_stream_aborted = false;
+
+    return PipelineElementInternal::execute_activate();
+}
+
+hailo_status BaseDemuxElement::execute_deactivate()
+{
+    if (!m_is_activated) {
+        return HAILO_SUCCESS;
+    }
+    m_is_activated = false;
+
+    // deactivate should be called before mutex acquire and notify_all because it is possible that all queues are waiting on
+    // the run_pull of the source (HwRead) and the mutex is already acquired so this would prevent a timeout error
+    hailo_status status = PipelineElementInternal::execute_deactivate();
+
+    {
+        // There is a case where the other thread is halted (via context switch) before the wait_for() function,
+        // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
+        // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
+        std::unique_lock<std::mutex> lock(m_mutex);
+    }
+    m_cv.notify_all();
+
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseDemuxElement::execute_post_deactivate(bool should_clear_abort)
+{
+    for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
+        m_was_source_called[i] = false;
+    }
+    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
+}
+
+hailo_status BaseDemuxElement::execute_abort()
+{
+    auto status = PipelineElementInternal::execute_abort();
+    CHECK_SUCCESS(status);
+    {
+        // There is a case where the other thread is halted (via context switch) before the wait_for() function,
+        // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
+        // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
+        std::unique_lock<std::mutex> lock(m_mutex);
+    }
+    m_cv.notify_all();
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseDemuxElement::set_timeout(std::chrono::milliseconds timeout)
+{
+    m_timeout = timeout;
+    return HAILO_SUCCESS;
+}
+
+Expected<uint32_t> BaseDemuxElement::get_source_index_from_source_name(const std::string &source_name)
+{
+    CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND);
+    auto ret_val = m_source_name_to_index.at(source_name);
+    return ret_val;
+}
+
+std::vector<PipelinePad*> BaseDemuxElement::execution_pads()
+{
+    if (m_next_pads.size() == 0)
+    {
+        if (PipelineDirection::PUSH == m_pipeline_direction) {
+            m_next_pads.reserve(m_sources.size());
+            for (auto &source : m_sources ) {
+                m_next_pads.push_back(source.next());
+            }
+        } else {
+            m_next_pads.reserve(m_sinks.size());
+            for (auto &sink : m_sinks ) {
+                m_next_pads.push_back(sink.prev());
+            }
+        }
+    }
+    return m_next_pads;
+}
+
+Expected<std::shared_ptr<TransformDemuxElement>> TransformDemuxElement::create(std::shared_ptr<OutputDemuxer> demuxer,
+    const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+
+    auto demux_elem_ptr = make_shared_nothrow<TransformDemuxElement>(demuxer, name, timeout,
+        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != demux_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return demux_elem_ptr;
+}
+
+Expected<std::shared_ptr<TransformDemuxElement>> TransformDemuxElement::create(std::shared_ptr<OutputDemuxer> demuxer,
+    const std::string &name, const ElementBuildParams &build_params,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return TransformDemuxElement::create(demuxer, name, build_params.timeout, build_params.elem_stats_flags,
+        build_params.pipeline_status, pipeline_direction, async_pipeline);
+}
+
+TransformDemuxElement::TransformDemuxElement(std::shared_ptr<OutputDemuxer> demuxer,
+    const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline) :
+    BaseDemuxElement(demuxer->get_edges_stream_info().size(), name, timeout, std::move(duration_collector),
+        std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_demuxer(demuxer) 
+{}
+
+Expected<std::vector<PipelineBuffer>> TransformDemuxElement::action(PipelineBuffer &&input)
+{
+    std::vector<PipelineBuffer> outputs;
+    std::vector<MemoryView> raw_buffers;
+
+    auto mux_edges = m_demuxer->get_edges_stream_info();
+    outputs.reserve(mux_edges.size());
+    raw_buffers.reserve(mux_edges.size());
+
+    for (uint32_t i = 0; i < mux_edges.size(); i++) {
+
+        auto pool = m_sources[i].next()->element().get_buffer_pool();
+        assert(pool);
+
+        auto acquired_buffer = pool->acquire_buffer(m_timeout);
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
+            return make_unexpected(acquired_buffer.status());
+        }
+
+        if (!acquired_buffer) {
+                input.set_action_status(acquired_buffer.status());
+        } 
+        CHECK_EXPECTED(acquired_buffer, "Failed to acquire buffer");
+        outputs.emplace_back(acquired_buffer.release());
+        raw_buffers.push_back(outputs.back().as_view());
+    }
+
+    m_duration_collector.start_measurement();
+    const auto status = m_demuxer->transform_demux(input.as_view(), raw_buffers);
+    m_duration_collector.complete_measurement();
+
+    input.set_action_status(status);
+    for (auto &output : outputs) {
+        output.set_action_status(status);
+    }
+
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return outputs;
+}
+
+PixBufferElement::PixBufferElement(const std::string &name, std::chrono::milliseconds timeout,
+    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    hailo_format_order_t order, std::shared_ptr<AsyncPipeline> async_pipeline) :
+        BaseDemuxElement(((order == HAILO_FORMAT_ORDER_I420) ? NUMBER_OF_PLANES_I420 : NUMBER_OF_PLANES_NV12_NV21),
+            name, timeout, std::move(duration_collector), std::move(pipeline_status),
+            PipelineDirection::PUSH, async_pipeline),
+        m_order(order)
+{}
+
+Expected<std::shared_ptr<PixBufferElement>> PixBufferElement::create(const std::string &name,
+    std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, hailo_format_order_t order,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto pix_buffer_splitter_elem_ptr = make_shared_nothrow<PixBufferElement>(name, timeout,
+        std::move(duration_collector), std::move(pipeline_status), order, async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != pix_buffer_splitter_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return pix_buffer_splitter_elem_ptr;
+}
+
+Expected<std::vector<PipelineBuffer>> PixBufferElement::action(PipelineBuffer &&input)
+{
+    // splits the planes into buffers
+    m_duration_collector.start_measurement();
+    std::vector<PipelineBuffer> outputs;
+
+    auto input_pix_buffer_expected = input.as_hailo_pix_buffer(m_order);
+
+    if (!input_pix_buffer_expected) {
+        input.set_action_status(input_pix_buffer_expected.status());
+    }
+    CHECK_EXPECTED(input_pix_buffer_expected);
+    auto input_pix_buffer = input_pix_buffer_expected.release();
+
+    if (PipelineBuffer::Type::FLUSH == input.get_type()) {
+        for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) {
+            outputs.emplace_back(PipelineBuffer(PipelineBuffer::Type::FLUSH));
+        }
+    } else {
+        auto shared_input_buff = make_shared_nothrow<PipelineBuffer>(std::move(input));
+        if (!shared_input_buff) {
+            handle_non_recoverable_async_error(HAILO_OUT_OF_HOST_MEMORY);
+        }
+        CHECK_NOT_NULL_AS_EXPECTED(shared_input_buff, HAILO_OUT_OF_HOST_MEMORY);
+
+        for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) {
+            outputs.emplace_back(MemoryView(input_pix_buffer.planes[i].user_ptr, input_pix_buffer.planes[i].bytes_used),
+                [input_ptr = shared_input_buff](hailo_status status)
+                {
+                    if (HAILO_SUCCESS != status) {
+                        input_ptr->set_action_status(status);
+                    }
+                });
+        }
+    }
+
+    m_duration_collector.complete_measurement();
+    return outputs;
+}
+
+Expected<std::shared_ptr<AsyncHwElement>> AsyncHwElement::create(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
+    std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, const std::string &name,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto duration_collector = DurationCollector::create(elem_flags);
+    CHECK_EXPECTED(duration_collector);
+
+    auto min_buffer_pool_size = net_group->get_min_buffer_pool_size();
+    CHECK_EXPECTED(min_buffer_pool_size);
+
+    auto status = HAILO_UNINITIALIZED;
+    auto elem_ptr = make_shared_nothrow<AsyncHwElement>(named_stream_infos, timeout, name,
+        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, net_group,
+        min_buffer_pool_size.release(), status);
+    CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    LOGGER__INFO("Created {}", elem_ptr->description());
+
+    return elem_ptr;
+}
+
+AsyncHwElement::AsyncHwElement(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::chrono::milliseconds timeout,
+    const std::string &name, DurationCollector &&duration_collector,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<ConfiguredNetworkGroup> net_group,
+    const size_t max_ongoing_transfers, hailo_status &status) :
+        PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+        m_timeout(timeout),
+        m_net_group(net_group),
+        m_max_ongoing_transfers(max_ongoing_transfers)
+{
+    uint32_t sinks_count = 0;
+    uint32_t sources_count = 0;
+    for (const auto &stream_info_pair : named_stream_infos) {
+        if (HAILO_D2H_STREAM == stream_info_pair.second.direction) {
+            m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
+            const auto &source_name = m_sources[sources_count++].name();
+            m_source_name_to_stream_name[source_name] = stream_info_pair.first;
+
+            m_source_name_to_index[source_name] = static_cast<uint32_t>(m_sources.size() - 1);
+        } else {
+            m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
+            const auto &sink_name = m_sinks[sinks_count++].name();
+            m_sink_name_to_stream_name[sink_name] = stream_info_pair.first;
+            m_sink_name_to_index[sink_name] = static_cast<uint32_t>(m_sinks.size() - 1);
+        }
+    }
+    m_barrier = make_shared_nothrow<Barrier>(sinks_count);
+    if (nullptr == m_barrier) {
+        status = HAILO_OUT_OF_HOST_MEMORY;
+        return;
+    }
+    status = HAILO_SUCCESS;
+}
+
+// This func overrides the regular dataflow of this element and calls all next elements run_push_async directly
+// (normally, the run_push_async of the next elements will be called by the LL async read_done)
+void AsyncHwElement::handle_error_in_hw_async_elem(hailo_status error_status)
+{
+    for (auto &name_output_stream_pair : m_source_name_to_index) {
+        auto source_index = name_output_stream_pair.second;
+        assert(source_index < m_sources.size());
+
+        auto pool = m_sources[source_index].next()->element().get_buffer_pool();
+        assert(pool);
+
+        auto expected_buffer = pool->acquire_buffer(m_timeout);
+        if (HAILO_SUCCESS == expected_buffer.status()) {
+            expected_buffer->set_action_status(error_status);
+            m_sources[source_index].next()->run_push_async(expected_buffer.release());
+        } else {
+            m_sources[source_index].next()->run_push_async(PipelineBuffer(error_status));
+        }
+    }
+
+    return;
+}
+
+void AsyncHwElement::action()
+{
+    // Assuming m_input_buffers is full (has a valid buffer for all sinks)
+    for (auto &input_buffer : m_input_buffers) {
+        if (HAILO_SUCCESS != input_buffer.second.action_status()) {
+            handle_error_in_hw_async_elem(input_buffer.second.action_status());
+            m_input_buffers.clear();
+            return;
+        }
+    }
+
+    // TODO: HRT-13324 Change to be map of <std::string, PipelineBuffer>
+    std::unordered_map<std::string, std::shared_ptr<PipelineBuffer>> source_name_to_output_buffer;
+    for (auto &name_to_index_pair : m_source_name_to_index) {
+        auto pool = m_sources[name_to_index_pair.second].next()->element().get_buffer_pool();
+        assert(pool);
+
+        auto expected_buffer = pool->acquire_buffer(m_timeout);
+        if (HAILO_SUCCESS != expected_buffer.status()) {
+            handle_non_recoverable_async_error(expected_buffer.status());
+            m_input_buffers.clear();
+            m_barrier->terminate();
+            return;
+        }
+        source_name_to_output_buffer[name_to_index_pair.first] = make_shared_nothrow<PipelineBuffer>(expected_buffer.release());
+    }
+
+    NamedBuffersCallbacks named_buffers_callbacks;
+
+    for (auto &input_buffer : m_input_buffers) {
+        const auto &stream_name = m_sink_name_to_stream_name.at(input_buffer.first);
+        // std::function requires its lambda to be copyable, so using shared_ptr<PipelineBuffer>
+        auto buffer_shared = make_shared_nothrow<PipelineBuffer>(std::move(input_buffer.second));
+        if (nullptr == buffer_shared) {
+            handle_non_recoverable_async_error(HAILO_OUT_OF_HOST_MEMORY);
+            m_input_buffers.clear();
+            m_barrier->terminate();
+            return;
+        }
+        named_buffers_callbacks.emplace(stream_name, std::make_pair(buffer_shared->as_view(),
+            [buffer_shared](hailo_status status) { buffer_shared->set_action_status(status); }));
+    }
+
+    for (auto &output_buffer : source_name_to_output_buffer) {
+        const auto &stream_name = m_source_name_to_stream_name.at(output_buffer.first);
+        named_buffers_callbacks.emplace(stream_name, std::make_pair(output_buffer.second->as_view(),
+            [this, buffer = output_buffer.second, source_name = output_buffer.first](hailo_status status){
+                buffer->set_action_status(status);
+                if (HAILO_SUCCESS == m_pipeline_status->load()) {
+                    assert(contains(m_source_name_to_index, source_name));
+                    // If pipeline_status is not success, someone already handled this error and no reason for this buffer to be pushed
+                    assert(contains(m_source_name_to_index, source_name));
+                    m_sources[m_source_name_to_index[source_name]].next()->run_push_async(std::move(*buffer));
+                }
+        }));
+    }
+
+    auto status = m_net_group->wait_for_ongoing_callbacks_count_under(m_max_ongoing_transfers);
+    if (HAILO_SUCCESS != status ) {
+        handle_non_recoverable_async_error(status);
+        m_input_buffers.clear();
+        m_barrier->terminate();
+        return;
+    }
+
+    status = m_net_group->infer_async(named_buffers_callbacks, [](hailo_status){});
+    if (HAILO_SUCCESS != status ) {
+        handle_non_recoverable_async_error(status);
+        m_input_buffers.clear();
+        m_barrier->terminate();
+        return;
+    }
+
+    m_input_buffers.clear();
+}
+
+void AsyncHwElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink)
+{
+    assert(contains(m_sink_name_to_index, sink.name()));
+
+    m_barrier->arrive_and_wait();
+    if (HAILO_SUCCESS == m_pipeline_status->load()) {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        m_input_buffers[sink.name()] = std::move(buffer);
+        if (m_input_buffers.size() == m_sink_name_to_index.size()) { // Last sink to set its buffer
+            action();
+        }
+    }
+}
+
+hailo_status AsyncHwElement::run_push(PipelineBuffer &&/*optional*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+Expected<uint32_t> AsyncHwElement::get_source_index_from_output_stream_name(const std::string &output_stream_name)
+{
+    for (const auto &name_pair : m_source_name_to_stream_name) {
+        if (name_pair.second == output_stream_name) {
+            assert(contains(m_source_name_to_index, name_pair.first));
+            uint32_t ret_val = m_source_name_to_index.at(name_pair.first);
+            return ret_val;
+        }
+    }
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
+Expected<uint32_t> AsyncHwElement::get_source_index_from_source_name(const std::string &source_name)
+{
+    CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND, "couldnt find src '{}'", source_name);
+    auto ret_val = m_source_name_to_index.at(source_name);
+    return ret_val;
+}
+
+Expected<uint32_t> AsyncHwElement::get_sink_index_from_input_stream_name(const std::string &input_stream_name)
+{
+    for (const auto &name_pair : m_sink_name_to_stream_name) {
+        if (name_pair.second == input_stream_name) {
+            return Expected<uint32_t>(m_sink_name_to_index.at(name_pair.first));
+        }
+    }
+    return make_unexpected(HAILO_INVALID_ARGUMENT);
+}
+
+Expected<PipelineBuffer> AsyncHwElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+std::vector<PipelinePad*> AsyncHwElement::execution_pads()
+{
+    std::vector<PipelinePad*> result;
+    result.reserve(m_sources.size());
+    for (auto& pad : m_sources) {
+        result.push_back(pad.next());
+    }
+    return result;
+}
+
+hailo_status AsyncHwElement::execute_terminate(hailo_status error_status)
+{
+    if (m_is_terminated) {
+        return HAILO_SUCCESS;
+    }
+
+    m_barrier->terminate();
+
+    // Checking success of shutdown is best effort (terminate should be called even if shutdown fails)
+    auto shutdown_status = m_net_group->shutdown();
+    auto terminate_status = PipelineElement::execute_terminate(error_status);
+    CHECK_SUCCESS(shutdown_status);
+    CHECK_SUCCESS(terminate_status);
+
+    return HAILO_SUCCESS;
+}
+
+std::vector<std::shared_ptr<BufferPool>> AsyncHwElement::get_hw_interacted_buffer_pools_h2d()
+{
+    std::vector<std::shared_ptr<BufferPool>> res;
+    for (auto &sink : m_sinks) {
+        res.push_back(sink.prev()->element().get_buffer_pool());
+    }
+    return res;
+}
+
+std::vector<std::shared_ptr<BufferPool>> AsyncHwElement::get_hw_interacted_buffer_pools_d2h()
+{
+    std::vector<std::shared_ptr<BufferPool>> res;
+    for (auto &source : m_sources) {
+        auto pools = source.element().get_buffer_pool();
+        res.push_back(source.next()->element().get_buffer_pool());
+    }
+    return res;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp
new file mode 100644
index 00000000..b7468f1a
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp
@@ -0,0 +1,279 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file multi_io_elements.hpp
+ * @brief all multiple inputs/outputs elements in the pipeline.
+ **/
+
+#ifndef _HAILO_MULTI_IO_ELEMENTS_HPP_
+#define _HAILO_MULTI_IO_ELEMENTS_HPP_
+
+#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp"
+
+namespace hailort
+{
+
+class BaseMuxElement : public PipelineElementInternal
+{
+public:
+    virtual ~BaseMuxElement() = default;
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+
+protected:
+    BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline,
+        hailo_status &status);
+    virtual hailo_status execute_terminate(hailo_status error_status) override;
+    virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) = 0;
+    virtual std::vector<PipelinePad*> execution_pads() override;
+
+    PipelinePad &next_pad_downstream()
+    {
+        return *m_sources[0].next();
+    }
+
+    std::chrono::milliseconds m_timeout;
+
+private:
+    std::mutex m_mutex;
+    std::unordered_map<std::string, uint32_t> m_sink_name_to_index;
+    std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
+    std::vector<PipelinePad*> m_next_pads;
+    BarrierPtr m_barrier;
+};
+
+class NmsPostProcessMuxElement : public BaseMuxElement
+{
+public:
+    static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
+        const std::string &name, std::chrono::milliseconds timeout,
+        hailo_pipeline_elem_stats_flags_t elem_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
+        const std::string &name, const hailo_vstream_params_t &vstream_params,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    NmsPostProcessMuxElement(std::shared_ptr<net_flow::Op> nms_op, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline,  hailo_status &status);
+    virtual std::string description() const override;
+
+    void add_sink_name(const std::string &name) // TODO: remove this (HRT-8875)
+    {
+        m_sinks_names.push_back(name);
+    }
+
+    std::shared_ptr<net_flow::Op> get_op() { return m_nms_op; }
+
+    virtual hailo_status set_nms_score_threshold(float32_t threshold)
+    {
+        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
+        assert(nullptr != nms_metadata);
+        nms_metadata->nms_config().nms_score_th = threshold;
+
+        return HAILO_SUCCESS;
+    }
+
+    virtual hailo_status set_nms_iou_threshold(float32_t threshold)
+    {
+        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
+        assert(nullptr != nms_metadata);
+        nms_metadata->nms_config().nms_iou_th = threshold;
+
+        return HAILO_SUCCESS;
+    }
+
+    virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
+    {
+        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
+        assert(nullptr != nms_metadata);
+        nms_metadata->nms_config().max_proposals_per_class = max_proposals_per_class;
+
+        return HAILO_SUCCESS;
+    }
+
+    virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size)
+    {
+        auto yolov5seg_metadata = std::dynamic_pointer_cast<net_flow::Yolov5SegOpMetadata>(get_op()->metadata());
+        assert(nullptr != yolov5seg_metadata);
+        yolov5seg_metadata->yolov5seg_config().max_accumulated_mask_size = max_accumulated_mask_size;
+
+        return HAILO_SUCCESS;
+    }
+
+protected:
+    virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) override;
+
+private:
+    std::shared_ptr<net_flow::Op> m_nms_op;
+    std::vector<std::string> m_sinks_names; // TODO: remove this (HRT-8875)
+};
+
+class NmsMuxElement : public BaseMuxElement
+{
+public:
+    static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos,
+        const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos, const std::string &name,
+        const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos,
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    NmsMuxElement(const std::vector<hailo_nms_info_t> &nms_infos, const hailo_nms_info_t &fused_nms_info, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline, hailo_status &status);
+    const hailo_nms_info_t &get_fused_nms_info() const;
+
+protected:
+    virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) override;
+
+private:
+    std::vector<hailo_nms_info_t> m_nms_infos;
+    hailo_nms_info_t m_fused_nms_info;
+};
+
+class BaseDemuxElement : public PipelineElementInternal
+{
+public:
+    virtual ~BaseDemuxElement() = default;
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    hailo_status set_timeout(std::chrono::milliseconds timeout);
+
+    virtual Expected<uint32_t> get_source_index_from_source_name(const std::string &source_name) override;
+
+protected:
+    BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout,
+        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+    virtual hailo_status execute_activate() override;
+    virtual hailo_status execute_deactivate() override;
+    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
+    virtual hailo_status execute_abort() override;
+    virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input) = 0;
+    virtual std::vector<PipelinePad*> execution_pads() override;
+
+    std::chrono::milliseconds m_timeout;
+
+private:
+    bool were_all_srcs_arrived();
+
+    std::atomic_bool m_is_activated;
+    std::atomic_bool m_was_stream_aborted;
+    std::unordered_map<std::string, uint32_t> m_source_name_to_index;
+    std::vector<bool> m_was_source_called;
+    std::vector<PipelineBuffer> m_buffers_for_action;
+    std::mutex m_mutex;
+    std::condition_variable m_cv;
+    std::vector<PipelinePad*> m_next_pads;
+};
+
+class TransformDemuxElement : public BaseDemuxElement
+{
+public:
+    static Expected<std::shared_ptr<TransformDemuxElement>> create(std::shared_ptr<OutputDemuxer> demuxer,
+        const std::string &name, std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<TransformDemuxElement>> create(std::shared_ptr<OutputDemuxer> demuxer,
+        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    TransformDemuxElement(std::shared_ptr<OutputDemuxer> demuxer, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+
+protected:
+    virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input) override;
+
+private:
+    std::shared_ptr<OutputDemuxer> m_demuxer;
+};
+
+class PixBufferElement : public BaseDemuxElement
+{
+public:
+    static Expected<std::shared_ptr<PixBufferElement>> create(const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, hailo_format_order_t order,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+
+    PixBufferElement(const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, hailo_format_order_t order,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
+
+protected:
+    virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input);
+    hailo_format_order_t m_order;
+};
+
+// Note: This element does infer - it sends writes to HW and reads the outputs
+class AsyncHwElement : public PipelineElementInternal
+{
+public:
+    static Expected<std::shared_ptr<AsyncHwElement>> create(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
+        std::chrono::milliseconds timeout, hailo_pipeline_elem_stats_flags_t elem_flags, const std::string &name,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::shared_ptr<ConfiguredNetworkGroup> net_group, PipelineDirection pipeline_direction = PipelineDirection::PUSH,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    AsyncHwElement(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::chrono::milliseconds timeout,
+        const std::string &name, DurationCollector &&duration_collector,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+        std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<ConfiguredNetworkGroup> net_group,
+        const size_t max_ongoing_transfers, hailo_status &status);
+    virtual ~AsyncHwElement() = default;
+
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    void action();
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+
+    Expected<uint32_t> get_source_index_from_output_stream_name(const std::string &output_stream_name);
+    Expected<uint32_t> get_sink_index_from_input_stream_name(const std::string &input_stream_name);
+    virtual Expected<uint32_t> get_source_index_from_source_name(const std::string &source_name) override;
+
+    std::vector<BufferPoolPtr> get_hw_interacted_buffer_pools_h2d();
+    std::vector<BufferPoolPtr> get_hw_interacted_buffer_pools_d2h();
+
+protected:
+    virtual std::vector<PipelinePad*> execution_pads() override;
+    virtual hailo_status execute_terminate(hailo_status error_status) override;
+
+private:
+    void handle_error_in_hw_async_elem(hailo_status error_status);
+
+    std::chrono::milliseconds m_timeout;
+    std::shared_ptr<ConfiguredNetworkGroup> m_net_group;
+    size_t m_max_ongoing_transfers;
+
+    std::unordered_map<std::string, std::string> m_sink_name_to_stream_name;
+    std::unordered_map<std::string, std::string> m_source_name_to_stream_name;
+    std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
+    std::mutex m_mutex;
+    std::unordered_map<std::string, uint32_t> m_source_name_to_index;
+    std::unordered_map<std::string, uint32_t> m_sink_name_to_index;
+    BarrierPtr m_barrier;
+};
+
+
+
+} /* namespace hailort */
+
+#endif /* _HAILO_MULTI_IO_ELEMENTS_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
index 6360ed86..0dff98fe 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
@@ -13,7 +13,11 @@
 
 #include "hailo/expected.hpp"
 #include "hailo/hailort.h"
+#include "hailo/hailort_common.hpp"
+#include "hailo/vdevice.hpp"
 #include "net_flow/pipeline/pipeline.hpp"
+#include "utils/buffer_storage.hpp"
+
 #include <cstdint>
 
 namespace hailort
@@ -37,10 +41,6 @@ void PipelineBuffer::Metadata::set_start_time(PipelineTimePoint val)
     m_start_time = val;
 }
 
-PipelineBuffer::PipelineBuffer() :
-    PipelineBuffer(Type::DATA)
-{}
-
 PipelineBuffer::PipelineBuffer(Type type) :
     m_type(type),
     m_pool(nullptr),
@@ -70,22 +70,8 @@ PipelineBuffer::PipelineBuffer(hailo_status action_status, const TransferDoneCal
     };
 }
 
-PipelineBuffer::PipelineBuffer(MemoryView view, bool is_user_buffer, BufferPoolPtr pool, bool should_measure, hailo_status action_status) :
-    m_type(Type::DATA),
-    m_pool(pool),
-    m_view(view),
-    m_metadata(Metadata(add_timestamp(should_measure))),
-    m_is_user_buffer(is_user_buffer),
-    m_should_call_exec_done(true),
-    m_action_status(action_status)
-{
-    m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer](hailo_status){
-        release_buffer(buffer_pool, mem_view, is_user_buffer);
-    };
-}
-
-PipelineBuffer::PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done, bool is_user_buffer, BufferPoolPtr pool, bool should_measure,
-    hailo_status action_status) :
+PipelineBuffer::PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done, hailo_status action_status, bool is_user_buffer, BufferPoolPtr pool, 
+    bool should_measure) :
     m_type(Type::DATA),
     m_pool(pool),
     m_view(view),
@@ -116,20 +102,6 @@ PipelineBuffer::PipelineBuffer(hailo_pix_buffer_t buffer, const TransferDoneCall
     };
 }
 
-PipelineBuffer::PipelineBuffer(hailo_pix_buffer_t buffer) :
-    m_type(Type::DATA),
-    m_pool(nullptr),
-    m_view(),
-    m_metadata(),
-    m_is_user_buffer(false),
-    m_should_call_exec_done(true)
-{
-    set_additional_data(std::make_shared<PixBufferPipelineData>(buffer));
-    m_exec_done = [buffer_pool = m_pool, mem_view = m_view, is_user_buffer = m_is_user_buffer](hailo_status){
-        release_buffer(buffer_pool, mem_view, is_user_buffer);
-    };
-}
-
 PipelineBuffer::PipelineBuffer(PipelineBuffer &&other) :
     m_type(other.m_type),
     m_pool(std::move(other.m_pool)),
@@ -196,43 +168,8 @@ Expected<hailo_pix_buffer_t> PipelineBuffer::as_hailo_pix_buffer(hailo_format_or
     auto pix_buffer = get_metadata().get_additional_data<PixBufferPipelineData>();
 
     if (nullptr == pix_buffer) {
-        switch(order){
-            case HAILO_FORMAT_ORDER_NV12:
-            case HAILO_FORMAT_ORDER_NV21: {
-                CHECK_AS_EXPECTED(0 == (m_view.size() % 3), HAILO_INVALID_ARGUMENT, "buffer size must be divisible by 3");
-
-                auto y_plane_size = m_view.size() * 2 / 3;
-                auto uv_plane_size = m_view.size() * 1 / 3;
-
-                auto uv_data_ptr = reinterpret_cast<uint8_t*>(m_view.data()) + y_plane_size;
-
-                hailo_pix_buffer_plane_t y {uint32_t(y_plane_size), uint32_t(y_plane_size), m_view.data()};
-                hailo_pix_buffer_plane_t uv {uint32_t(uv_plane_size), uint32_t(uv_plane_size), uv_data_ptr};
-                hailo_pix_buffer_t buffer{0, {y, uv}, NUMBER_OF_PLANES_NV12_NV21};
-
-                return buffer;
-            }
-            case HAILO_FORMAT_ORDER_I420: {
-                CHECK_AS_EXPECTED(0 == (m_view.size() % 6), HAILO_INVALID_ARGUMENT, "buffer size must be divisible by 6");
-
-                auto y_plane_size = m_view.size() * 2 / 3;
-                auto u_plane_size = m_view.size() * 1 / 6;
-                auto v_plane_size = m_view.size() * 1 / 6;
-
-                auto u_data_ptr = (char*)m_view.data() + y_plane_size;
-                auto v_data_ptr = u_data_ptr + u_plane_size;
-
-                hailo_pix_buffer_plane_t y {uint32_t(y_plane_size), uint32_t(y_plane_size), m_view.data()};
-                hailo_pix_buffer_plane_t u {uint32_t(u_plane_size), uint32_t(u_plane_size), u_data_ptr};
-                hailo_pix_buffer_plane_t v {uint32_t(v_plane_size), uint32_t(v_plane_size), v_data_ptr};
-                hailo_pix_buffer_t buffer{0, {y, u, v}, NUMBER_OF_PLANES_I420};
-
-                return buffer;
-            }
-            default: {
-                CHECK_AS_EXPECTED(false, HAILO_INTERNAL_FAILURE, "unsupported format order");
-            }
-        }
+        auto mem_view = as_view();
+        return HailoRTCommon::as_hailo_pix_buffer(mem_view, order);
     } else {
         uint32_t expected_number_of_planes;
         switch(order){
@@ -262,12 +199,6 @@ void PipelineBuffer::set_metadata(Metadata &&val)
     m_metadata = std::move(val);
 }
 
-TransferDoneCallbackAsyncInfer PipelineBuffer::get_exec_done_cb()
-{
-    m_should_call_exec_done = false;
-    return m_exec_done;
-}
-
 PipelineTimePoint PipelineBuffer::add_timestamp(bool should_measure)
 {
     return should_measure ? std::chrono::steady_clock::now() : PipelineTimePoint{};
@@ -293,6 +224,14 @@ void PipelineBuffer::set_action_status(hailo_status status)
     m_action_status = status;
 }
 
+void PipelineBuffer::call_exec_done()
+{
+    if (m_should_call_exec_done) {
+        m_exec_done(action_status());
+        m_should_call_exec_done = false;
+    }
+}
+
 Expected<BufferPoolPtr> BufferPool::create(size_t buffer_size, size_t buffer_count, EventPtr shutdown_event,
                                            hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags,
                                            bool is_empty, bool is_dma_able)
@@ -345,31 +284,33 @@ BufferPool::BufferPool(size_t buffer_size, bool is_holding_user_buffers, bool me
     m_buffers(std::move(buffers)),
     m_free_mem_views(std::move(free_mem_views)),
     m_done_cbs(std::move(done_cbs)),
-    m_queue_size_accumulator(std::move(queue_size_accumulator))
+    m_queue_size_accumulator(std::move(queue_size_accumulator)),
+    m_is_already_running(false)
 {
 }
 
 size_t BufferPool::buffer_size()
 {
-    return m_buffer_size;
+    std::unique_lock<std::mutex> lock(m_buffer_size_mutex);
+    return m_buffer_size.load();
 }
 
-hailo_status BufferPool::enqueue_buffer(MemoryView mem_view)
+hailo_status BufferPool::enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done)
 {
-    CHECK(mem_view.size() == m_buffer_size, HAILO_INTERNAL_FAILURE, "Buffer size is not the same as expected for pool! ({} != {})", mem_view.size(), m_buffer_size);
+    m_is_already_running = true;
+    auto pool_buffer_size = buffer_size();
+    CHECK(mem_view.size() == pool_buffer_size, HAILO_INTERNAL_FAILURE,
+        "Buffer size is not the same as expected for pool! ({} != {})", mem_view.size(), pool_buffer_size);
 
+    std::unique_lock<std::mutex> lock(m_enqueue_mutex);
     auto status = m_free_mem_views.enqueue(mem_view);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
     CHECK_SUCCESS(status);
 
-    return HAILO_SUCCESS;
-}
-
-hailo_status BufferPool::enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done)
-{
-    auto status = enqueue_buffer(mem_view);
-    CHECK_SUCCESS(status);
-
-    status = m_done_cbs.enqueue(exec_done);
+    // TODO: Stop using 2 queues, hold a queue of pipeline_buffer instead.
+    status = m_done_cbs.enqueue(exec_done, true); // we get here only if acquire_free_mem_view succeeded, so we want to push cb to keep sync between the queues
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
@@ -377,12 +318,12 @@ hailo_status BufferPool::enqueue_buffer(MemoryView mem_view, const TransferDoneC
 
 bool BufferPool::is_full()
 {
-    return (m_max_buffer_count - m_free_mem_views.size_approx() == 0);
+    return (m_max_buffer_count - num_of_buffers_in_pool() == 0);
 }
 
 size_t BufferPool::num_of_buffers_in_pool()
 {
-    return m_done_cbs.size_approx();
+    return m_free_mem_views.size_approx();
 }
 
 bool BufferPool::is_holding_user_buffers()
@@ -390,57 +331,18 @@ bool BufferPool::is_holding_user_buffers()
     return m_is_holding_user_buffers;
 }
 
-// This function changes the m_max_buffer_count to be num_of_buffers, and it must be called when pool is empty of buffers
-hailo_status BufferPool::allocate_buffers(bool is_dma_able, size_t num_of_buffers)
-{
-    m_is_holding_user_buffers = false;
-    CHECK(m_free_mem_views.size_approx() == 0, HAILO_INTERNAL_FAILURE, "Cannot allocate buffers for pool, since pool is not empty!");
-    m_max_buffer_count = num_of_buffers;
-    for (size_t i = 0; i < m_max_buffer_count; i++) {
-        BufferStorageParams buffer_storage_params;
-        if (is_dma_able) {
-            buffer_storage_params = BufferStorageParams::create_dma();
-        }
-        auto buffer = Buffer::create(m_buffer_size, buffer_storage_params);
-        CHECK_EXPECTED_AS_STATUS(buffer);
-
-        auto status = m_free_mem_views.enqueue(MemoryView(buffer.value()));
-        CHECK_SUCCESS(status);
-        m_buffers.emplace_back(buffer.release());
-    }
-    return HAILO_SUCCESS;
-}
-
 Expected<PipelineBuffer> BufferPool::acquire_buffer(std::chrono::milliseconds timeout,
     bool ignore_shutdown_event)
 {
-    auto mem_view = acquire_free_mem_view(timeout, ignore_shutdown_event);
-    if ((HAILO_SUCCESS != mem_view.status()) && (m_is_holding_user_buffers)) {
-        auto done_cb = acquire_on_done_cb(timeout, true);
-        CHECK_EXPECTED(done_cb);
-
-        done_cb.value()(mem_view.status());
-    }
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == mem_view.status()) {
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-    CHECK_EXPECTED(mem_view);
-
-    if (m_is_holding_user_buffers) {
-        auto done_cb = acquire_on_done_cb(timeout, true);
-        CHECK_EXPECTED(done_cb);
+    m_is_already_running = true;
 
-        return PipelineBuffer(mem_view.release(), done_cb.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency);
-    }
-
-    return PipelineBuffer(mem_view.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency);
-}
-
-Expected<std::shared_ptr<PipelineBuffer>> BufferPool::acquire_buffer_ptr(std::chrono::milliseconds timeout)
-{
-    auto mem_view = acquire_free_mem_view(timeout);
+    std::unique_lock<std::mutex> lock(m_dequeue_mutex);
+    auto mem_view = acquire_free_mem_view(timeout, ignore_shutdown_event);
     if ((HAILO_SUCCESS != mem_view.status()) && (m_is_holding_user_buffers)) {
-        auto done_cb = acquire_on_done_cb(timeout, true);
+        auto done_cb = acquire_on_done_cb(timeout, ignore_shutdown_event);
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == done_cb.status()) {
+            return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+        }
         CHECK_EXPECTED(done_cb);
 
         done_cb.value()(mem_view.status());
@@ -450,18 +352,17 @@ Expected<std::shared_ptr<PipelineBuffer>> BufferPool::acquire_buffer_ptr(std::ch
     }
     CHECK_EXPECTED(mem_view);
 
-    std::shared_ptr<PipelineBuffer> ptr = nullptr;
     if (m_is_holding_user_buffers) {
-        auto done_cb = acquire_on_done_cb(timeout, true);
+        auto done_cb = acquire_on_done_cb(timeout, true); // we get here only if acquire_free_mem_view succeeded, so we want to pop cb to keep sync between the queues
+        if (HAILO_SHUTDOWN_EVENT_SIGNALED == done_cb.status()) {
+            return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+        }
         CHECK_EXPECTED(done_cb);
 
-        ptr = make_shared_nothrow<PipelineBuffer>(mem_view.release(), done_cb.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency);
-    } else {
-        ptr = make_shared_nothrow<PipelineBuffer>(mem_view.release(), m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency);
+        return PipelineBuffer(mem_view.release(), done_cb.release(), HAILO_SUCCESS, m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency);
     }
 
-    CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
-    return ptr;
+    return PipelineBuffer(mem_view.release(), [](hailo_status){}, HAILO_SUCCESS, m_is_holding_user_buffers, shared_from_this(), m_measure_vstream_latency);
 }
 
 Expected<MemoryView> BufferPool::acquire_free_mem_view(std::chrono::milliseconds timeout,
@@ -511,10 +412,13 @@ AccumulatorPtr BufferPool::get_queue_size_accumulator()
 
 Expected<PipelineBuffer> BufferPool::get_available_buffer(PipelineBuffer &&optional, std::chrono::milliseconds timeout)
 {
+    m_is_already_running = true;
+
     if (optional) {
-        CHECK_AS_EXPECTED(optional.size() == buffer_size(), HAILO_INVALID_OPERATION,
+        auto pool_buffer_size = buffer_size();
+        CHECK_AS_EXPECTED(optional.size() == pool_buffer_size, HAILO_INVALID_OPERATION,
             "Optional buffer size must be equal to pool buffer size. Optional buffer size = {}, buffer pool size = {}",
-            optional.size(), buffer_size());
+            optional.size(), pool_buffer_size);
         return std::move(optional);
     }
 
@@ -528,11 +432,31 @@ Expected<PipelineBuffer> BufferPool::get_available_buffer(PipelineBuffer &&optio
 
 hailo_status BufferPool::release_buffer(MemoryView mem_view)
 {
-    std::unique_lock<std::mutex> lock(m_release_buffer_mutex);
+    std::unique_lock<std::mutex> lock(m_enqueue_mutex);
     // This can be called after the shutdown event was signaled so we ignore it here
     return m_free_mem_views.enqueue(std::move(mem_view), true);
 }
 
+hailo_status BufferPool::map_to_vdevice(VDevice &vdevice, hailo_dma_buffer_direction_t direction)
+{
+    for (auto &buff : m_buffers) {
+        auto dma_mapped_buffer = DmaMappedBuffer::create(vdevice, buff.data(), buff.size(), direction);
+        CHECK_EXPECTED(dma_mapped_buffer);
+        m_dma_mapped_buffers.emplace_back(dma_mapped_buffer.release());
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status BufferPool::set_buffer_size(uint32_t buffer_size)
+{
+    std::unique_lock<std::mutex> lock(m_buffer_size_mutex);
+    CHECK(!m_is_already_running, HAILO_INVALID_OPERATION,
+        "Setting buffer size of pool size after starting inference in not allowed");
+
+    m_buffer_size = buffer_size;
+    return HAILO_SUCCESS;
+}
+
 Expected<DurationCollector> DurationCollector::create(hailo_pipeline_elem_stats_flags_t flags,
     uint32_t num_frames_before_collection_start)
 {
@@ -629,7 +553,7 @@ const std::string &PipelineObject::name() const
 std::string PipelineObject::create_element_name(const std::string &element_name, const std::string &stream_name, uint8_t stream_index)
 {
     std::stringstream name;
-    name << element_name << static_cast<uint32_t>(stream_index) << "_" << stream_name;
+    name << element_name << static_cast<uint32_t>(stream_index) << stream_name;
     return name.str();
 }
 
@@ -719,11 +643,6 @@ hailo_status PipelinePad::dequeue_user_buffers(hailo_status error_status)
     return m_element.dequeue_user_buffers(error_status);
 }
 
-hailo_status PipelinePad::wait_for_finish()
-{
-    return m_element.wait_for_finish();
-}
-
 hailo_status PipelinePad::clear_abort()
 {
     return m_element.clear_abort();
@@ -873,22 +792,64 @@ std::string PipelineElement::description() const
     return element_description.str();
 }
 
-hailo_status PipelineElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
+std::string PipelineElement::links_description() const
+{
+    std::stringstream element_base_description;
+
+    element_base_description << "| inputs:";
+    if ((!sinks().empty()) && (nullptr != sinks()[0].prev())) {
+        for(const auto &sink : sinks()) {
+            if (sink.prev()) {
+                element_base_description << " " << sink.prev()->element().name();
+            }
+        }
+    } else {
+        element_base_description << " user";
+    }
+
+    element_base_description << " | outputs:";
+    if ((!sources().empty()) && (nullptr != sources()[0].next())) {
+        for(const auto &source : sources()) {
+            if (source.next()) {
+                element_base_description << " " << source.next()->element().name();
+            }
+        }
+    } else {
+        element_base_description << " user";
+    }
+
+    return element_base_description.str();
+}
+
+void PipelineElement::print_deep_description(std::vector<std::string> &visited_elements)
+{
+    auto visited_node = find(visited_elements.begin(), visited_elements.end(), this->name());
+    if (visited_elements.end() != visited_node) {
+        return;
+    }
+
+    LOGGER__INFO("{} {}", this->name().c_str(), this->links_description().c_str());
+    visited_elements.emplace_back(this->name());
+
+    for (auto &source : sources()) {
+        source.next()->element().print_deep_description(visited_elements);
+    }
+}
+
+hailo_status PipelineElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done)
 {
     (void)mem_view;
     (void)exec_done;
-    (void)source_name;
     LOGGER__ERROR("enqueue_execution_buffer is not implemented for {}!", name());
     return HAILO_NOT_IMPLEMENTED;
 };
 
-hailo_status PipelineElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done)
-{
-    return enqueue_execution_buffer(mem_view, exec_done, "");
-};
-
 hailo_status PipelineElement::empty_buffer_pool(BufferPoolPtr pool, hailo_status error_status, std::chrono::milliseconds timeout)
 {
+    if (!pool) {
+        return HAILO_SUCCESS;
+    }
+
     if (!pool->is_holding_user_buffers()) {
         return HAILO_SUCCESS;
     }
@@ -901,38 +862,17 @@ hailo_status PipelineElement::empty_buffer_pool(BufferPoolPtr pool, hailo_status
             return acquired_buffer.status();
         }
 
-        auto exec_done_cb = acquired_buffer->get_exec_done_cb();
-        exec_done_cb(error_status);
+        acquired_buffer->set_action_status(error_status);
     }
     return HAILO_SUCCESS;
 }
 
-hailo_status PipelineElement::fill_buffer_pool(bool /*is_dma_able*/, size_t /*num_of_buffers*/, const uint32_t /*source_index*/)
-{
-    return HAILO_NOT_IMPLEMENTED;
-}
-
-Expected<bool> PipelineElement::can_push_buffer_upstream(const uint32_t /*source_index*/)
-{
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
-Expected<bool> PipelineElement::can_push_buffer_downstream(const uint32_t /*source_index*/)
-{
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
-hailo_status PipelineElement::fill_buffer_pool(bool /*is_dma_able*/, size_t /*num_of_buffers*/, const std::string &/*source_name*/)
-{
-    return HAILO_NOT_IMPLEMENTED;
-}
-
-Expected<bool> PipelineElement::can_push_buffer_upstream(const std::string &/*source_name*/)
+Expected<bool> PipelineElement::can_push_buffer_upstream()
 {
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
-Expected<bool> PipelineElement::can_push_buffer_downstream(const std::string &/*source_name*/)
+Expected<bool> PipelineElement::can_push_buffer_downstream()
 {
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
@@ -982,11 +922,6 @@ hailo_status PipelineElement::dequeue_user_buffers(hailo_status error_status)
     return execute_dequeue_user_buffers(error_status);
 }
 
-hailo_status PipelineElement::wait_for_finish()
-{
-    return execute_wait_for_finish();
-}
-
 hailo_status PipelineElement::execute_activate()
 {
     return execute([&](auto *pad){ return pad->activate(); });
@@ -1033,11 +968,6 @@ hailo_status PipelineElement::execute_dequeue_user_buffers(hailo_status error_st
     return execute([&](auto *pad){ return pad->dequeue_user_buffers(error_status); });
 }
 
-hailo_status PipelineElement::execute_wait_for_finish()
-{
-    return execute([&](auto *pad){ return pad->wait_for_finish(); });
-}
-
 hailo_status PipelineElement::execute(std::function<hailo_status(PipelinePad*)> func)
 {
     for (auto pad : execution_pads()) {
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
index 8689a7ba..67d7d2d8 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
@@ -14,6 +14,7 @@
 #include "hailo/expected.hpp"
 #include "hailo/hailort.h"
 #include "hailo/runtime_statistics.hpp"
+#include "hailo/dma_mapped_buffer.hpp"
 #include "net_flow/ops/nms_post_process.hpp"
 
 #include "utils/thread_safe_queue.hpp"
@@ -38,6 +39,7 @@ enum class BufferType
     UNINITIALIZED,
     VIEW,
     PIX_BUFFER,
+    DMA_BUFFER,
 };
 
 using TransferDoneCallbackAsyncInfer = std::function<void(hailo_status)>;
@@ -46,8 +48,7 @@ using PipelineTimePoint = std::chrono::steady_clock::time_point;
 #define BUFFER_POOL_DEFAULT_QUEUE_TIMEOUT (std::chrono::milliseconds(10000))
 #define DEFAULT_NUM_FRAMES_BEFORE_COLLECTION_START (100)
 
-#define NUMBER_OF_PLANES_NV12_NV21 (2)
-#define NUMBER_OF_PLANES_I420 (3)
+class VDevice;
 
 struct AdditionalData {};
 
@@ -110,14 +111,13 @@ class PipelineBuffer final
     };
 
     // Creates an empty PipelineBuffer (with no buffer/memory view)
-    PipelineBuffer();
     PipelineBuffer(Type type);
-    PipelineBuffer(hailo_status status, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){});
-    PipelineBuffer(MemoryView view, bool is_user_buffer = true, BufferPoolPtr pool = nullptr, bool should_measure = false, hailo_status status = HAILO_SUCCESS);
-    PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done,
-        bool is_user_buffer = true, BufferPoolPtr pool = nullptr, bool should_measure = false, hailo_status status = HAILO_SUCCESS);
-    PipelineBuffer(hailo_pix_buffer_t buffer);
-    PipelineBuffer(hailo_pix_buffer_t buffer, const TransferDoneCallbackAsyncInfer &exec_done);
+    // TODO HRT-12185: remove the option to pass a lambda as a parameter and save it as a member since it increases the memory consumption Significantly
+    PipelineBuffer(hailo_status action_status = HAILO_SUCCESS, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){});
+    PipelineBuffer(MemoryView view, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){},
+                    hailo_status action_status = HAILO_SUCCESS, bool is_user_buffer = true, BufferPoolPtr pool = nullptr, bool should_measure = false);
+    PipelineBuffer(hailo_pix_buffer_t buffer, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){});
+
     ~PipelineBuffer();
 
     PipelineBuffer(const PipelineBuffer &) = delete;
@@ -134,9 +134,9 @@ class PipelineBuffer final
     Metadata get_metadata() const;
     void set_metadata(Metadata &&val);
     void set_additional_data(std::shared_ptr<AdditionalData> data) { m_metadata.set_additional_data(data);}
-    TransferDoneCallbackAsyncInfer get_exec_done_cb();
     hailo_status action_status();
     void set_action_status(hailo_status status);
+    void call_exec_done();
 
 private:
     Type m_type;
@@ -166,23 +166,22 @@ class BufferPool : public std::enable_shared_from_this<BufferPool>
     virtual ~BufferPool() = default;
 
     size_t buffer_size();
-    hailo_status enqueue_buffer(MemoryView mem_view);
-    hailo_status enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done);
-    hailo_status allocate_buffers(bool is_dma_able, size_t num_of_buffers);
+    hailo_status enqueue_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done = [](hailo_status){});
     Expected<PipelineBuffer> acquire_buffer(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false);
-    Expected<std::shared_ptr<PipelineBuffer>> acquire_buffer_ptr(std::chrono::milliseconds timeout);
     AccumulatorPtr get_queue_size_accumulator();
     Expected<PipelineBuffer> get_available_buffer(PipelineBuffer &&optional, std::chrono::milliseconds timeout);
     bool is_full();
     size_t num_of_buffers_in_pool();
     bool is_holding_user_buffers();
 
+    hailo_status map_to_vdevice(VDevice &vdevice, hailo_dma_buffer_direction_t direction);
+    hailo_status set_buffer_size(uint32_t buffer_size);
 private:
     Expected<MemoryView> acquire_free_mem_view(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false);
     Expected<TransferDoneCallbackAsyncInfer> acquire_on_done_cb(std::chrono::milliseconds timeout, bool ignore_shutdown_event = false);
     hailo_status release_buffer(MemoryView mem_view);
 
-    const size_t m_buffer_size;
+    std::atomic<size_t> m_buffer_size;
     bool m_is_holding_user_buffers;
     size_t m_max_buffer_count;
     const bool m_measure_vstream_latency;
@@ -192,10 +191,20 @@ class BufferPool : public std::enable_shared_from_this<BufferPool>
     // So when the pool has allocated buffers, it will hold them in the vector and have pointers to them in the queue.
     // And when the pool holds user buffers, the vector will be empty and only the queue will hold the user's buffers.
     std::vector<Buffer> m_buffers;
+
+    // When m_buffers is not empty, and we need to pre-map the buffers to the vdevice, this vector will hold reference
+    // to the mapping objects.
+    std::vector<hailort::DmaMappedBuffer> m_dma_mapped_buffers;
+
     SpscQueue<MemoryView> m_free_mem_views;
     SpscQueue<TransferDoneCallbackAsyncInfer> m_done_cbs;
     AccumulatorPtr m_queue_size_accumulator;
-    std::mutex m_release_buffer_mutex;
+    // we have enqueue and dequeue mutex to allow mpmc
+    std::mutex m_enqueue_mutex;
+    std::mutex m_dequeue_mutex;
+    std::mutex m_buffer_size_mutex;
+
+    std::atomic<bool> m_is_already_running;
 
     friend class PipelineBuffer;
 };
@@ -292,7 +301,6 @@ class PipelinePad final : public PipelineObject
     hailo_status abort();
     hailo_status terminate(hailo_status error_status);
     hailo_status dequeue_user_buffers(hailo_status error_status);
-    hailo_status wait_for_finish();
     hailo_status clear_abort();
     virtual hailo_status run_push(PipelineBuffer &&buffer);
     void run_push_async(PipelineBuffer &&buffer);
@@ -345,7 +353,6 @@ class PipelineElement : public PipelineObject
     hailo_status terminate(hailo_status error_status);
     hailo_status dequeue_user_buffers(hailo_status error_status);
     hailo_status clear_abort();
-    hailo_status wait_for_finish();
     AccumulatorPtr get_fps_accumulator();
     AccumulatorPtr get_latency_accumulator();
     bool is_terminating_element();
@@ -355,16 +362,13 @@ class PipelineElement : public PipelineObject
     const std::vector<PipelinePad> &sinks() const;
     const std::vector<PipelinePad> &sources() const;
     virtual std::string description() const;
+    std::string links_description() const;
+    void print_deep_description(std::vector<std::string> &visited_elements);
 
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name);
-    hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done);
+    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done);
     hailo_status empty_buffer_pool(BufferPoolPtr pool, hailo_status error_status, std::chrono::milliseconds timeout);
-    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index = UINT32_MAX);
-    virtual Expected<bool> can_push_buffer_downstream(const uint32_t source_index = UINT32_MAX);
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index = UINT32_MAX);
-    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name = "");
-    virtual Expected<bool> can_push_buffer_downstream(const std::string &source_name = "");
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name = "");
+    virtual Expected<bool> can_push_buffer_upstream();
+    virtual Expected<bool> can_push_buffer_downstream();
 
     virtual Expected<uint32_t> get_source_index_from_source_name(const std::string &/*source_name*/) {
         // This function is overriden in multi-srcs elements
@@ -383,6 +387,16 @@ class PipelineElement : public PipelineObject
         return HAILO_INVALID_OPERATION;
     }
 
+    virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t /*max_accumulated_mask_size*/) {
+        return HAILO_INVALID_OPERATION;
+    }
+
+    virtual BufferPoolPtr get_buffer_pool() const
+    {
+        // This method should be overriden by element with local pools
+        return nullptr;
+    }
+
 protected:
     DurationCollector m_duration_collector;
     std::shared_ptr<std::atomic<hailo_status>> m_pipeline_status;
@@ -405,7 +419,6 @@ class PipelineElement : public PipelineObject
     virtual hailo_status execute_terminate(hailo_status error_status);
     virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status);
     virtual hailo_status execute_clear_abort();
-    virtual hailo_status execute_wait_for_finish();
 
     virtual hailo_status execute(std::function<hailo_status(PipelinePad*)>);
 
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
index 084ea3a1..6c945fbb 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
@@ -8,15 +8,13 @@
  **/
 #include "net_flow/pipeline/pipeline_internal.hpp"
 #include "net_flow/pipeline/async_infer_runner.hpp"
-#include "common/os_utils.hpp"
-#include "common/runtime_statistics_internal.hpp"
 
 namespace hailort
 {
 
 PipelineElementInternal::PipelineElementInternal(const std::string &name, DurationCollector &&duration_collector,
-                                 std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                 PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
     PipelineElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction),
     m_async_pipeline(async_pipeline)
 {}
@@ -33,64 +31,9 @@ void PipelineElementInternal::handle_non_recoverable_async_error(hailo_status er
     }
 }
 
-SourceElement::SourceElement(const std::string &name, DurationCollector &&duration_collector,
-                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                             PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
-{
-    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-}
-
-PipelinePad &SourceElement::source()
-{
-    return m_sources[0];
-}
-
-std::vector<PipelinePad*> SourceElement::execution_pads()
-{
-    std::vector<PipelinePad*> result{&source()};
-    return result;
-}
-
-SinkElement::SinkElement(const std::string &name, DurationCollector &&duration_collector,
-                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                         PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
-{
-    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-}
-
-PipelinePad &SinkElement::sink()
-{
-    return m_sinks[0];
-}
-
-std::vector<PipelinePad*> SinkElement::execution_pads()
-{
-    std::vector<PipelinePad*> result{&sink()};
-    return result;
-}
-
-hailo_status SinkElement::execute_terminate(hailo_status /*error_status*/)
-{
-    return HAILO_SUCCESS;
-}
-
-hailo_status SinkElement::execute_dequeue_user_buffers(hailo_status /*error_status*/)
-{
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncPushQueueElement::execute_dequeue_user_buffers(hailo_status error_status)
-{
-    auto status = m_queue.clear();
-    CHECK_SUCCESS(PipelineElement::execute_dequeue_user_buffers(error_status));
-    return status;
-}
-
 IntermediateElement::IntermediateElement(const std::string &name, DurationCollector &&duration_collector,
-                                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                         PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline) :
     PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
 {
     m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
@@ -103,1467 +46,4 @@ std::vector<PipelinePad*> IntermediateElement::execution_pads()
     return result;
 }
 
-FilterElement::FilterElement(const std::string &name, DurationCollector &&duration_collector,
-                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                             PipelineDirection pipeline_direction, BufferPoolPtr buffer_pool,
-                             std::chrono::milliseconds timeout, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
-    m_pool(buffer_pool),
-    m_timeout(timeout)
-{}
-
-hailo_status FilterElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    auto output = action(std::move(buffer), PipelineBuffer());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
-        return output.status();
-    }
-    CHECK_EXPECTED_AS_STATUS(output);
-
-    hailo_status status = next_pad().run_push(output.release());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("run_push of {} was shutdown!", name());
-        return status;
-    }
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("run_push of {} was aborted!", name());
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-void FilterElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    assert(m_pipeline_direction == PipelineDirection::PUSH);
-    if (HAILO_SUCCESS != buffer.action_status()) {
-        auto buffer_from_pool = m_pool->get_available_buffer(PipelineBuffer(), m_timeout);
-        if (HAILO_SUCCESS != buffer_from_pool.status()) {
-            handle_non_recoverable_async_error(buffer_from_pool.status());
-        } else {
-            buffer_from_pool->set_action_status(buffer.action_status());
-
-            auto exec_done_cb = buffer.get_exec_done_cb();
-            exec_done_cb(buffer.action_status());
-
-            next_pad().run_push_async(buffer_from_pool.release());
-        }
-        return;
-    }
-
-    auto output = action(std::move(buffer), PipelineBuffer());
-    if (HAILO_SUCCESS == output.status()) {
-        next_pad().run_push_async(output.release());
-    } else {
-        next_pad().run_push_async(PipelineBuffer(output.status()));
-    }
-    return;
-}
-
-Expected<PipelineBuffer> FilterElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
-{
-    auto buffer = next_pad().run_pull();
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        LOGGER__INFO("run_pull in FilterElement was shutdown!");
-        return make_unexpected(buffer.status());
-    }
-    CHECK_EXPECTED(buffer);
-    return action(buffer.release(), std::move(optional));
-}
-
-std::vector<AccumulatorPtr> FilterElement::get_queue_size_accumulators()
-{
-    if (nullptr == m_pool || nullptr == m_pool->get_queue_size_accumulator()) {
-        return std::vector<AccumulatorPtr>();
-    }
-    return {m_pool->get_queue_size_accumulator()};
-}
-
-hailo_status FilterElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
-{
-    (void)source_name;
-
-    auto status = m_pool->enqueue_buffer(mem_view, exec_done);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status FilterElement::execute_dequeue_user_buffers(hailo_status error_status)
-{
-    auto status = empty_buffer_pool(m_pool, error_status, m_timeout);
-    CHECK_SUCCESS(status);
-    return PipelineElement::execute_dequeue_user_buffers(error_status);
-}
-
-Expected<bool> FilterElement::can_push_buffer_upstream(const uint32_t /*source_index*/)
-{
-    return !m_pool->is_full();
-}
-
-hailo_status FilterElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/)
-{
-    auto status = m_pool->allocate_buffers(is_dma_able, num_of_buffers);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<bool> FilterElement::can_push_buffer_upstream(const std::string &source_name)
-{
-    auto source_index = get_source_index_from_source_name(source_name);
-    CHECK_EXPECTED(source_index);
-    return can_push_buffer_upstream(*source_index);
-}
-
-hailo_status FilterElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
-{
-    auto source_index = get_source_index_from_source_name(source_name);
-    CHECK_EXPECTED_AS_STATUS(source_index);
-    return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
-}
-
-Expected<SpscQueue<PipelineBuffer>> BaseQueueElement::create_queue(size_t queue_size, EventPtr shutdown_event)
-{
-    auto queue = SpscQueue<PipelineBuffer>::create(queue_size, shutdown_event);
-    CHECK_EXPECTED(queue);
-
-    return queue.release();
-}
-
-BaseQueueElement::BaseQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction,
-                                   std::shared_ptr<AsyncPipeline> async_pipeline) :
-    IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
-    m_queue(std::move(queue)),
-    m_shutdown_event(shutdown_event),
-    m_timeout(timeout),
-    m_is_thread_running(true),
-    m_activation_event(std::move(activation_event)),
-    m_deactivation_event(std::move(deactivation_event)),
-    m_queue_size_accumulator(std::move(queue_size_accumulator)),
-    m_is_run_in_thread_running(false)
-{}
-
-BaseQueueElement::~BaseQueueElement()
-{
-    LOGGER__INFO("Queue element {} has {} frames in his Queue on destruction", name(), m_queue.size_approx());
-}
-
-void BaseQueueElement::start_thread()
-{
-    m_thread = std::thread([this] () {
-        OsUtils::set_current_thread_name(thread_name());
-        while (m_is_thread_running.load()) {
-            auto status = m_activation_event.wait(INIFINITE_TIMEOUT());
-
-            if (!m_is_thread_running) {
-                LOGGER__INFO("Thread in element {} is not running anymore, exiting..", this->name());
-                break;
-            }
-            if (HAILO_SUCCESS == status) {
-                {
-                    std::unique_lock<std::mutex> lock(m_mutex);
-                    m_is_run_in_thread_running = true;
-                }
-                m_cv.notify_all();
-
-                status = run_in_thread();
-
-                {
-                    std::unique_lock<std::mutex> lock(m_mutex);
-                    m_is_run_in_thread_running = false;
-                }
-                m_cv.notify_all();
-            }
-
-            if (HAILO_SUCCESS != status) {
-                if (HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
-                    // We do not want to log error for HAILO_STREAM_ABORTED_BY_USER
-                    if (HAILO_STREAM_ABORTED_BY_USER != status) {
-                        LOGGER__ERROR("Queue element {} run in thread function failed! status = {}", this->name(), status);
-                    }
-
-                    // Store the real error in pipeline_status
-                    m_pipeline_status->store(status);
-
-                    // Signal other threads to stop
-                    hailo_status shutdown_status = m_shutdown_event->signal();
-                    if (HAILO_SUCCESS != shutdown_status) {
-                        LOGGER__CRITICAL("Failed shutting down queue with status {}", shutdown_status);
-                    }
-                }
-                //Thread has done its execution. Mark to the thread to wait for activation again
-                hailo_status event_status = m_activation_event.reset();
-                if (HAILO_SUCCESS != event_status) {
-                    LOGGER__CRITICAL("Failed reset activation event of element {}, with status {}", this->name(), event_status);
-                }
-
-                // Mark to deactivation function that the thread is done
-                event_status = m_deactivation_event.signal();
-                if (HAILO_SUCCESS != event_status) {
-                    LOGGER__CRITICAL("Failed signaling deactivation event of element {}, with status {}", this->name(), event_status);
-                }
-            }
-        }
-    });
-}
-
-void BaseQueueElement::stop_thread()
-{
-    m_shutdown_event->signal();
-
-    // Mark thread as not running, then wake it in case it is waiting on m_activation_event
-    m_is_thread_running = false;
-    m_activation_event.signal();
-
-    if (m_thread.joinable()) {
-        m_thread.join();
-    }
-}
-
-std::vector<AccumulatorPtr> BaseQueueElement::get_queue_size_accumulators()
-{
-    if (nullptr == m_queue_size_accumulator) {
-        return std::vector<AccumulatorPtr>();
-    }
-    return {m_queue_size_accumulator};
-}
-
-hailo_status BaseQueueElement::execute_activate()
-{
-    hailo_status status = PipelineElementInternal::execute_activate();
-    CHECK_SUCCESS(status);
-
-    status = m_activation_event.signal();
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort)
-{
-    hailo_status status = m_deactivation_event.wait(INIFINITE_TIMEOUT());
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to post_deactivate() in {} with status {}", name(), status);
-    }
-
-    status = m_deactivation_event.reset();
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to reset of deactivation event in {} with status {}", name(), status);
-    }
-
-    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
-}
-
-hailo_status BaseQueueElement::execute_clear()
-{
-    auto status = PipelineElementInternal::execute_clear();
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status);
-    }
-
-    auto queue_status = m_queue.clear();
-    CHECK_SUCCESS(queue_status, "Failed to clear() queue in {} with status {}", name(), status);
-
-    return status;
-}
-
-hailo_status BaseQueueElement::execute_wait_for_finish()
-{
-    std::unique_lock<std::mutex> lock(m_mutex);
-    m_cv.wait(lock, [this] () {
-        return !m_is_run_in_thread_running;
-    });
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseQueueElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
-{
-    (void)source_name;
-    return m_sinks[0].prev()->element().enqueue_execution_buffer(mem_view, exec_done, m_sinks[0].prev()->name());
-}
-
-Expected<bool> BaseQueueElement::can_push_buffer_upstream(const uint32_t source_index)
-{
-    return m_sinks[0].prev()->element().can_push_buffer_upstream(source_index);
-}
-
-Expected<bool> BaseQueueElement::can_push_buffer_downstream(const uint32_t /*source_index*/)
-{
-    return !m_queue.is_queue_full();
-}
-
-hailo_status BaseQueueElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index)
-{
-    return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, source_index);
-}
-
-Expected<bool> BaseQueueElement::can_push_buffer_upstream(const std::string &source_name)
-{
-    return m_sinks[0].prev()->element().can_push_buffer_upstream(source_name);
-}
-
-Expected<bool> BaseQueueElement::can_push_buffer_downstream(const std::string &/*source_name*/)
-{
-    return !m_queue.is_queue_full();
-}
-
-hailo_status BaseQueueElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
-{
-    return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, source_name);
-}
-
-hailo_status PushQueueElement::execute_abort()
-{
-    auto status = m_shutdown_event->reset();
-    CHECK_SUCCESS(status);
-
-    m_pipeline_status->store(HAILO_STREAM_ABORTED_BY_USER);
-
-    status = PipelineElementInternal::execute_abort();
-    CHECK_SUCCESS(status);
-
-    status = m_activation_event.signal();
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseQueueElement::execute_clear_abort()
-{
-    auto status = m_shutdown_event->reset();
-    CHECK_SUCCESS(status);
-
-    m_pipeline_status->store(HAILO_SUCCESS);
-    return PipelineElementInternal::execute_clear_abort();
-}
-
-hailo_status BaseQueueElement::set_timeout(std::chrono::milliseconds timeout)
-{
-    m_timeout = timeout;
-    return HAILO_SUCCESS;
-}
-
-std::string BaseQueueElement::description() const
-{
-    std::stringstream element_description;
-
-    element_description << "(" << this->name();
-    if (HAILO_INFINITE != this->m_timeout.count()) {
-        element_description << " | timeout: "  << std::chrono::duration_cast<std::chrono::seconds>(this->m_timeout).count() << "s";
-    }
-    element_description << ")";
-
-    return element_description.str();
-}
-
-hailo_status BaseQueueElement::pipeline_status()
-{
-    auto status = m_pipeline_status->load();
-
-    // We treat HAILO_STREAM_ABORTED_BY_USER as success because it is caused by user action (aborting streams)
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        return HAILO_SUCCESS;
-    }
-    return status;
-}
-
-Expected<std::shared_ptr<PushQueueElement>> PushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction,
-        std::shared_ptr<AsyncPipeline> async_pipeline)
-{
-    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
-    CHECK_EXPECTED(queue);
-
-    auto activation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(activation_event);
-
-    auto deactivation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(deactivation_event);
-
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-
-    AccumulatorPtr queue_size_accumulator = nullptr;
-    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
-        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
-        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    auto queue_ptr = make_shared_nothrow<PushQueueElement>(queue.release(), shutdown_event, name, timeout,
-        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
-        activation_event.release(), deactivation_event.release(), pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name);
-
-    LOGGER__INFO("Created {}", queue_ptr->name());
-
-    return queue_ptr;
-}
-
-Expected<std::shared_ptr<PushQueueElement>> PushQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction,
-        std::shared_ptr<AsyncPipeline> async_pipeline)
-{
-    return PushQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size,
-    vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction, async_pipeline);
-}
-
-PushQueueElement::PushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector, 
-                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction,
-                                   std::shared_ptr<AsyncPipeline> async_pipeline, bool should_start_thread) :
-    BaseQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
-                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, async_pipeline)
-{
-    if (should_start_thread) {
-        start_thread();
-    }
-}
-
-PushQueueElement::~PushQueueElement()
-{
-    stop_thread();
-}
-
-hailo_status PushQueueElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    if (nullptr != m_queue_size_accumulator) {
-        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
-    }
-    auto status = m_pipeline_status->load();
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("run_push of {} was aborted!", name());
-        return status;
-    }
-    CHECK_SUCCESS(m_pipeline_status->load());
-    status = m_queue.enqueue(std::move(buffer), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        auto queue_thread_status = pipeline_status();
-        CHECK_SUCCESS(queue_thread_status,
-            "Shutdown event was signaled in enqueue of queue element {} because thread has failed with status={}!", name(),
-            queue_thread_status);
-        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_SUCCESS(status);
-    return HAILO_SUCCESS;
-}
-
-void PushQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) {
-    LOGGER__ERROR("run_push_async is not supported for {}", name());
-    assert(false);
-}
-
-Expected<PipelineBuffer> PushQueueElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
-{
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
-
-hailo_status PushQueueElement::execute_deactivate()
-{
-    // Mark to the threads that deactivate() was called.
-    hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE));
-    if (HAILO_SUCCESS != status) {
-        // We want to deactivate source even if enqueue failed
-        auto deactivation_status = PipelineElementInternal::execute_deactivate();
-        CHECK_SUCCESS(deactivation_status);
-        if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) {
-            LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status);
-        }
-        else {
-             LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status);
-             return status;
-        }
-    }
-
-    return HAILO_SUCCESS;
-}
-
-PipelinePad &PushQueueElement::next_pad()
-{
-    // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed)
-    return *m_sources[0].next();
-}
-
-hailo_status PushQueueElement::run_in_thread()
-{
-    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_EXPECTED_AS_STATUS(buffer);
-
-    // Return if deactivated
-    if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) {
-        hailo_status status = m_shutdown_event->signal();
-        CHECK_SUCCESS(status);
-
-        status = next_pad().deactivate();
-        if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status);
-        }
-
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-
-    hailo_status status = next_pad().run_push(buffer.release());
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("run_push of {} was aborted!", name());
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction)
-{
-    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
-    CHECK_EXPECTED(queue);
-
-    auto activation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(activation_event);
-
-    auto deactivation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(deactivation_event);
-
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-
-    AccumulatorPtr queue_size_accumulator = nullptr;
-    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
-        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
-        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    auto queue_ptr = make_shared_nothrow<AsyncPushQueueElement>(queue.release(), shutdown_event, name, timeout,
-        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
-        activation_event.release(), deactivation_event.release(), pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name);
-
-    LOGGER__INFO("Created {}", queue_ptr->name());
-
-    return queue_ptr;
-}
-
-Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPushQueueElement::create(const std::string &name, const ElementBuildParams &build_params,
-    std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction)
-{
-    return AsyncPushQueueElement::create(name, build_params.timeout, build_params.buffer_pool_size_edges,
-            build_params.elem_stats_flags, build_params.shutdown_event, build_params.pipeline_status, async_pipeline, pipeline_direction);
-}
-
-AsyncPushQueueElement::AsyncPushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector, 
-                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    PushQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
-                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, async_pipeline, false)
-{
-    start_thread();
-}
-
-void AsyncPushQueueElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    if (nullptr != m_queue_size_accumulator) {
-        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
-    }
-
-    auto status = m_queue.enqueue(std::move(buffer), m_timeout);
-    if (HAILO_SUCCESS != status && HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
-        handle_non_recoverable_async_error(status);
-        stop_thread();
-    }
-}
-
-void AsyncPushQueueElement::start_thread()
-{
-    m_thread = std::thread([this] () {
-        OsUtils::set_current_thread_name(thread_name());
-        while (m_is_thread_running.load()) {
-            auto status = m_pipeline_status->load();
-            if (HAILO_SUCCESS != status) {
-                LOGGER__INFO("Thread in element {} is not running anymore, exiting..", name());
-                m_is_thread_running = false;
-                break;
-            }
-
-            status = run_in_thread();
-            if (HAILO_SUCCESS != status) {
-                handle_non_recoverable_async_error(status);
-                m_is_thread_running = false;
-                break;
-            }
-        }
-    });
-}
-
-hailo_status AsyncPushQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    return HAILO_INVALID_OPERATION;
-}
-
-hailo_status AsyncPushQueueElement::run_in_thread()
-{
-    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
-    auto buffer_status = buffer.status();
-    switch (buffer_status) {
-    case HAILO_SHUTDOWN_EVENT_SIGNALED:
-        break;
-
-    case HAILO_SUCCESS:
-        // Return if deactivated
-        if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) {
-            hailo_status status = m_shutdown_event->signal();
-            CHECK_SUCCESS(status);
-
-            status = next_pad().deactivate();
-            if (HAILO_SUCCESS != status) {
-                LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status);
-            }
-
-            return HAILO_SHUTDOWN_EVENT_SIGNALED;
-        }
-
-        next_pad().run_push_async(buffer.release());
-        break;
-
-    default:
-        next_pad().run_push_async(PipelineBuffer(buffer_status));
-    }
-
-    return buffer_status;
-}
-
-hailo_status AsyncPushQueueElement::execute_deactivate()
-{
-    // Mark to the threads that deactivate() was called.
-    hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE));
-    if (HAILO_SUCCESS != status) {
-        // We want to deactivate source even if enqueue failed
-        auto deactivation_status = PipelineElementInternal::execute_deactivate();
-        CHECK_SUCCESS(deactivation_status);
-        if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) {
-            LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status);
-        } else {
-             LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status);
-             return status;
-        }
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncPushQueueElement::execute_post_deactivate(bool should_clear_abort)
-{
-    // We marked thread to stop with PipelineBuffer::Type::DEACTIVATE, now we wait for it to finish
-    stop_thread();
-    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
-}
-
-hailo_status AsyncPushQueueElement::execute_terminate(hailo_status error_status)
-{
-    if (m_is_terminated) {
-        return HAILO_SUCCESS;
-    }
-
-    auto terminate_status = PipelineElement::execute_terminate(error_status);
-
-    if ((!next_pad().element().is_terminating_element())) {
-        stop_thread();
-    }
-
-    CHECK_SUCCESS(terminate_status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<std::shared_ptr<PullQueueElement>> PullQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
-    CHECK_EXPECTED(queue);
-
-    auto activation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(activation_event);
-
-    auto deactivation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(deactivation_event);
-
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-
-    AccumulatorPtr queue_size_accumulator = nullptr;
-    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
-        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
-        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    auto queue_ptr = make_shared_nothrow<PullQueueElement>(queue.release(), shutdown_event, name, timeout,
-        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
-        activation_event.release(), deactivation_event.release(), pipeline_direction);
-    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PullQueueElement {} failed!", name);
-
-    LOGGER__INFO("Created {}", queue_ptr->name());
-
-    return queue_ptr;
-}
-Expected<std::shared_ptr<PullQueueElement>> PullQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    return PullQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
-        vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction);
-}
-
-PullQueueElement::PullQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-                                   std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                                   AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   Event &&activation_event, Event &&deactivation_event, PipelineDirection pipeline_direction) :
-    BaseQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
-                     std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), pipeline_direction, nullptr)
-{
-    start_thread();
-}
-
-PullQueueElement::~PullQueueElement()
-{
-    stop_thread();
-}
-
-hailo_status PullQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    return HAILO_INVALID_OPERATION;
-}
-
-void PullQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    LOGGER__ERROR("run_push_async is not supported for {}", name());
-    assert(false);
-}
-
-Expected<PipelineBuffer> PullQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*sink*/)
-{
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in queue element!");
-
-    if (nullptr != m_queue_size_accumulator) {
-        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
-    }
-    auto output = m_queue.dequeue(m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
-        auto queue_thread_status = pipeline_status();
-        CHECK_SUCCESS_AS_EXPECTED(queue_thread_status,
-            "Shutdown event was signaled in dequeue of queue element {} because thread has failed with status={}!", name(),
-            queue_thread_status);
-        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-    CHECK_EXPECTED(output);
-
-    return output;
-}
-
-hailo_status PullQueueElement::execute_deactivate()
-{
-    hailo_status status = PipelineElementInternal::execute_deactivate();
-    auto shutdown_event_status = m_shutdown_event->signal();
-    CHECK_SUCCESS(status);
-    CHECK_SUCCESS(shutdown_event_status);
-
-    return HAILO_SUCCESS;
-}
-
-PipelinePad &PullQueueElement::next_pad()
-{
-    // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled)
-    return *m_sinks[0].prev();
-}
-
-hailo_status PullQueueElement::run_in_thread()
-{
-    auto buffer = next_pad().run_pull();
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        LOGGER__INFO("Shutdown event was signaled in run_pull of queue element {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) {
-        LOGGER__INFO("run_pull of queue element {} was aborted!", name());
-        return HAILO_STREAM_ABORTED_BY_USER;
-    }
-    if (HAILO_NETWORK_GROUP_NOT_ACTIVATED == buffer.status()) {
-        LOGGER__INFO("run_pull of queue element {} was called before network_group is activated!", name());
-        return HAILO_NETWORK_GROUP_NOT_ACTIVATED;
-    }
-    CHECK_EXPECTED_AS_STATUS(buffer);
-    
-    hailo_status status = m_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<std::shared_ptr<UserBufferQueueElement>> UserBufferQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
-    hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    PipelineDirection pipeline_direction)
-{
-    auto pending_buffer_queue = BaseQueueElement::create_queue(1, shutdown_event);
-    CHECK_EXPECTED(pending_buffer_queue);
-
-    auto full_buffer_queue = BaseQueueElement::create_queue(1, shutdown_event);
-    CHECK_EXPECTED(full_buffer_queue);
-
-    auto activation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(activation_event);
-
-    auto deactivation_event = Event::create(Event::State::not_signalled);
-    CHECK_EXPECTED(deactivation_event);
-
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-
-    AccumulatorPtr queue_size_accumulator = nullptr;
-    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
-        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
-        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    auto queue_ptr = make_shared_nothrow<UserBufferQueueElement>(pending_buffer_queue.release(),
-        full_buffer_queue.release(), shutdown_event, name, timeout, duration_collector.release(),
-        std::move(queue_size_accumulator), std::move(pipeline_status), activation_event.release(),
-        deactivation_event.release(), pipeline_direction);
-    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating UserBufferQueueElement {} failed!", name);
-
-    LOGGER__INFO("Created {}", queue_ptr->name());
-
-    return queue_ptr;
-}
-
-Expected<std::shared_ptr<UserBufferQueueElement>> UserBufferQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    return UserBufferQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
-        vstream_params.pipeline_elements_stats_flags, shutdown_event, pipeline_status, pipeline_direction);
-}
-
-UserBufferQueueElement::UserBufferQueueElement(SpscQueue<PipelineBuffer> &&queue, SpscQueue<PipelineBuffer> &&full_buffer_queue,
-                                               EventPtr shutdown_event, const std::string &name, std::chrono::milliseconds timeout,
-                                               DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-                                               std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                               Event &&activation_event, Event &&deactivation_event,
-                                               PipelineDirection pipeline_direction) :
-    PullQueueElement(std::move(queue), shutdown_event, name, timeout, std::move(duration_collector),
-                     std::move(queue_size_accumulator), std::move(pipeline_status), std::move(activation_event),
-                     std::move(deactivation_event),
-                     pipeline_direction),
-    m_full_buffer_queue(std::move(full_buffer_queue))
-{}
-
-Expected<PipelineBuffer> UserBufferQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
-{
-    // TODO: Support fps/latency collection for queue elems (HRT-7711)
-    CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be valid in {}!", name());
-
-    hailo_status status = m_queue.enqueue(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    if (nullptr != m_queue_size_accumulator) {
-        m_queue_size_accumulator->add_data_point(static_cast<double>(m_full_buffer_queue.size_approx()));
-    }
-    auto output = m_full_buffer_queue.dequeue(m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
-        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-    CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)",
-        name(), HAILO_TIMEOUT, m_timeout.count());
-    CHECK_EXPECTED(output);
-
-    CHECK_AS_EXPECTED(output->data() == optional.data(), HAILO_INTERNAL_FAILURE, "The buffer received in {} was not the same as the user buffer!", name());
-    return output;
-}
-
-hailo_status UserBufferQueueElement::execute_clear()
-{
-    auto status = PipelineElementInternal::execute_clear();
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status);
-    }
-
-    auto queue_clear_status = m_full_buffer_queue.clear();
-    if (HAILO_SUCCESS != queue_clear_status) {
-        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status);
-        status = queue_clear_status;
-    }
-
-    queue_clear_status = m_queue.clear();
-    if (HAILO_SUCCESS != queue_clear_status) {
-        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status);
-        status = queue_clear_status;
-    }
-
-    return status;
-}
-
-hailo_status UserBufferQueueElement::run_in_thread()
-{
-    auto optional = m_queue.dequeue(INIFINITE_TIMEOUT());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == optional.status()) {
-        LOGGER__INFO("Shutdown event was signaled in dequeue of {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_EXPECTED_AS_STATUS(optional);
-
-    auto buffer = next_pad().run_pull(optional.release());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        LOGGER__INFO("Shutdown event was signaled in run_pull of {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    if (HAILO_STREAM_ABORTED_BY_USER == buffer.status()) {
-        LOGGER__INFO("run_pull of {} was aborted!", name());
-        return HAILO_STREAM_ABORTED_BY_USER;
-    }
-    CHECK_EXPECTED_AS_STATUS(buffer);
-    
-    hailo_status status = m_full_buffer_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT());
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Shutdown event was signaled in enqueue of {}!", name());
-        return HAILO_SHUTDOWN_EVENT_SIGNALED;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-BaseMuxElement::BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout,
-                               DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                               BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
-    m_timeout(timeout),
-    m_pool(buffer_pool)
-{
-    m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-    m_sinks.reserve(sink_count);
-    m_sink_has_arrived.reserve(sink_count);
-    for (uint32_t i = 0; i < sink_count; ++i) {
-        m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-        m_index_of_sink[m_sinks[i].name()] = i;
-        m_sink_has_arrived[m_sinks[i].name()] = false;
-    }
-}
-
-std::vector<PipelinePad*> BaseMuxElement::execution_pads()
-{
-    if (m_next_pads.size() == 0) {
-        if (PipelineDirection::PUSH == m_pipeline_direction) {
-            m_next_pads.reserve(m_sources.size());
-            for (auto &source : m_sources ) {
-                m_next_pads.push_back(source.next());
-            }
-        } else {
-            m_next_pads.reserve(m_sinks.size());
-            for (auto &sink : m_sinks ) {
-                m_next_pads.push_back(sink.prev());
-            }
-        }
-    }
-    return m_next_pads;
-}
-
-hailo_status BaseMuxElement::execute_terminate(hailo_status error_status)
-{
-    if (m_is_terminated) {
-        return HAILO_SUCCESS;
-    }
-
-    auto terminate_status = PipelineElement::execute_terminate(error_status);
-
-    if (!m_is_terminating_element) {
-        {
-            // There is a case where the other thread is halted (via context switch) before the wait_for() function,
-            // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
-            // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
-            std::unique_lock<std::mutex> lock(m_mutex);
-        }
-        m_cv.notify_all();
-    }
-
-    CHECK_SUCCESS(terminate_status);
-
-    return HAILO_SUCCESS;
-}
-
-
-hailo_status BaseMuxElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    return HAILO_INVALID_OPERATION;
-}
-
-void BaseMuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink)
-{
-    assert(PipelineDirection::PUSH == m_pipeline_direction);
-    assert(m_next_pads.size() == 1);
-
-    std::unique_lock<std::mutex> lock(m_mutex);
-
-    m_sink_has_arrived[sink.name()] = true;
-    m_input_buffers[sink.name()] = std::move(buffer);
-    if (has_all_sinks_arrived()) {
-        hailo_status all_buffers_status = HAILO_SUCCESS;
-        for (auto &input_buffer : m_input_buffers) {
-            if (HAILO_SUCCESS != input_buffer.second.action_status()) {
-                all_buffers_status = input_buffer.second.action_status();
-                break;  // error from one buffer is enough
-            }
-        }
-
-        if (HAILO_SUCCESS != all_buffers_status) {
-            auto acquired_buffer = m_pool->get_available_buffer(PipelineBuffer(), m_timeout);
-            if (HAILO_SUCCESS == acquired_buffer.status()) {
-                acquired_buffer->set_action_status(all_buffers_status);
-
-                auto exec_done_cb = m_input_buffers[sink.name()].get_exec_done_cb();
-                exec_done_cb(m_input_buffers[sink.name()].action_status());
-
-                m_next_pads[0]->run_push_async(acquired_buffer.release());
-            } else {
-                handle_non_recoverable_async_error(acquired_buffer.status());
-            }
-        } else {
-            std::vector<PipelineBuffer> input_buffers;
-            input_buffers.resize(m_input_buffers.size());
-            for (auto &input_buffer : m_input_buffers) {
-                input_buffers[m_index_of_sink[input_buffer.first]] = std::move(input_buffer.second);
-            }
-
-            auto output = action(std::move(input_buffers), PipelineBuffer());
-            if (HAILO_SUCCESS == output.status()) {
-                m_next_pads[0]->run_push_async(output.release());
-            } else {
-                m_next_pads[0]->run_push_async(PipelineBuffer(output.status()));
-            }
-        }
-
-        for (const auto &curr_sink : m_sinks) {
-            m_sink_has_arrived[curr_sink.name()] = false;
-        }
-        m_input_buffers.clear();
-
-        // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-        lock.unlock();
-        m_cv.notify_all();
-    } else {
-        auto done = m_cv.wait_for(lock, m_timeout, [&](){
-            if (m_pipeline_status->load() != HAILO_SUCCESS) {
-                return true; // so we can exit this flow
-            }
-            return !m_sink_has_arrived[sink.name()];
-        });
-
-        if (!done) {
-            LOGGER__ERROR("Waiting for other threads in AsyncHwElement {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
-            handle_non_recoverable_async_error(HAILO_TIMEOUT);
-        }
-
-        if (m_pipeline_status->load() == HAILO_STREAM_ABORTED_BY_USER) {
-            lock.unlock();
-            m_cv.notify_all();
-        }
-    }
-}
-
-bool BaseMuxElement::has_all_sinks_arrived()
-{
-    for (const auto &current_sink : m_sink_has_arrived) {
-        if (!current_sink.second) {
-            return false;
-        }
-    }
-    return true;
-}
-Expected<PipelineBuffer> BaseMuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
-{
-    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
-        "PostInferElement {} does not support run_pull operation", name());
-    std::vector<PipelineBuffer> inputs;
-    inputs.reserve(m_sinks.size());
-    for (auto &sink : m_sinks) {
-        auto buffer = sink.prev()->run_pull();
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-            return make_unexpected(buffer.status());
-        }
-        CHECK_EXPECTED(buffer);
-
-        inputs.push_back(buffer.release());
-    }
-
-    auto output = action(std::move(inputs), std::move(optional));
-    CHECK_EXPECTED(output);
-
-    return output;
-}
-
-hailo_status BaseMuxElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
-{
-    (void)source_name;
-    auto status = m_pool->enqueue_buffer(mem_view, exec_done);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseMuxElement::execute_dequeue_user_buffers(hailo_status error_status)
-{
-    auto status = empty_buffer_pool(m_pool, error_status, m_timeout);
-    CHECK_SUCCESS(status);
-    return PipelineElement::execute_dequeue_user_buffers(error_status);
-}
-
-Expected<bool> BaseMuxElement::can_push_buffer_upstream(const uint32_t /*source_index*/)
-{
-    return !m_pool->is_full();
-}
-
-hailo_status BaseMuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/)
-{
-    auto status = m_pool->allocate_buffers(is_dma_able, num_of_buffers);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-Expected<bool> BaseMuxElement::can_push_buffer_upstream(const std::string &source_name)
-{
-    auto source_index = get_source_index_from_source_name(source_name);
-    CHECK_EXPECTED(source_index);
-    return can_push_buffer_upstream(*source_index);
-}
-
-hailo_status BaseMuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
-{
-    auto source_index = get_source_index_from_source_name(source_name);
-    CHECK_EXPECTED_AS_STATUS(source_index);
-    return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
-}
-
-BaseDemuxElement::BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        std::vector<BufferPoolPtr> pools, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
-    m_timeout(timeout),
-    m_pools(pools),
-    m_is_activated(false),
-    m_was_stream_aborted(false),
-    m_source_name_to_index(),
-    m_was_source_called(source_count, false),
-    m_buffers_for_action()
-{
-    m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-    m_sources.reserve(source_count);
-    for (uint32_t i = 0; i < source_count; i++) {
-        m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-        m_source_name_to_index[m_sources[i].name()] = i;
-    }
-}
-
-hailo_status BaseDemuxElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
-        "BaseDemuxElement {} does not support run_push operation", name());
-
-    auto outputs = action(std::move(buffer));
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) {
-        return outputs.status();
-    }
-    CHECK_EXPECTED_AS_STATUS(outputs);
-
-    for (const auto &pad : execution_pads()) {
-        assert(m_source_name_to_index.count(pad->prev()->name()) > 0);
-        auto source_index = m_source_name_to_index[pad->prev()->name()];
-        auto status = pad->run_push(std::move(outputs.value()[source_index]));
-
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-            LOGGER__INFO("run_push of {} was shutdown!", name());
-            return status;
-        }
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
-            LOGGER__INFO("run_push of {} was aborted!", name());
-            return status;
-        }
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-void BaseDemuxElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    assert(PipelineDirection::PUSH == m_pipeline_direction);
-    if (HAILO_SUCCESS != buffer.action_status()) {
-        for (const auto &pad : execution_pads()) {
-            auto source_index = m_source_name_to_index[pad->prev()->name()];
-            auto acquired_buffer = m_pools[source_index]->acquire_buffer(m_timeout);
-            if (HAILO_SUCCESS == acquired_buffer.status()) {
-                acquired_buffer->set_action_status(buffer.action_status());
-
-                auto exec_done_cb = buffer.get_exec_done_cb();
-                exec_done_cb(buffer.action_status());
-
-                pad->run_push_async(acquired_buffer.release());
-            } else {
-                handle_non_recoverable_async_error(acquired_buffer.status());
-            }
-        }
-        return;
-    }
-
-    auto outputs = action(std::move(buffer));
-
-    for (const auto &pad : execution_pads()) {
-        assert(m_source_name_to_index.count(pad->prev()->name()) > 0);
-        auto source_index = m_source_name_to_index[pad->prev()->name()];
-        if (HAILO_SUCCESS == outputs.status()) {
-            pad->run_push_async(std::move(outputs.value()[source_index]));
-        } else {
-            pad->run_push_async(PipelineBuffer(outputs.status()));
-        }
-    }
-}
-
-Expected<PipelineBuffer> BaseDemuxElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
-{
-    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
-        "BaseDemuxElement {} does not support run_pull operation", name());
-
-    CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in demux element!");
-
-    std::unique_lock<std::mutex> lock(m_mutex);
-    if (!m_is_activated) {
-        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-    }
-
-    if (m_was_stream_aborted) {
-        return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
-    }
-
-    m_was_source_called[m_source_name_to_index[source.name()]] = true;
-
-    if (were_all_srcs_arrived()) {
-        // If all srcs arrived, execute the demux
-        auto input = execution_pads()[0]->run_pull();
-        if (HAILO_STREAM_ABORTED_BY_USER == input.status()) {
-            LOGGER__INFO("run_pull of demux element was aborted!");
-            m_was_stream_aborted = true;
-            lock.unlock();
-            m_cv.notify_all();
-            return make_unexpected(input.status());
-        }
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == input.status()) {
-            return make_unexpected(input.status());
-        }
-        CHECK_EXPECTED(input);
-
-        auto outputs = action(input.release());
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == outputs.status()) {
-            return make_unexpected(outputs.status());
-        }
-        CHECK_EXPECTED(outputs);
-
-        m_buffers_for_action = outputs.release();
-
-        for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
-            m_was_source_called[i] = false;
-        }
-
-        // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-        lock.unlock();
-        m_cv.notify_all();
-    } else {
-        // If not all srcs arrived, wait until m_was_source_called is false (set to false after the demux execution)
-        auto wait_successful = m_cv.wait_for(lock, m_timeout, [&](){
-            return !m_was_source_called[m_source_name_to_index[source.name()]] || m_was_stream_aborted || !m_is_activated;
-        });
-        CHECK_AS_EXPECTED(wait_successful, HAILO_TIMEOUT, "Waiting for other threads in demux {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
-
-        if (m_was_stream_aborted) {
-            lock.unlock();
-            m_cv.notify_all();
-            return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
-        }
-
-        // We check if the element is not activated in case notify_all() was called from deactivate()
-        if (!m_is_activated) {
-            lock.unlock();
-            m_cv.notify_all();
-            return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-        }
-    }
-
-    assert(m_source_name_to_index[source.name()] < m_buffers_for_action.size());
-    return std::move(m_buffers_for_action[m_source_name_to_index[source.name()]]);
-}
-
-bool BaseDemuxElement::were_all_srcs_arrived()
-{
-    return std::all_of(m_was_source_called.begin(), m_was_source_called.end(), [](bool v) { return v; });
-}
-
-hailo_status BaseDemuxElement::execute_activate()
-{
-    if (m_is_activated) {
-        return HAILO_SUCCESS;
-    }
-    m_is_activated = true;// TODO Should this always be true, no matter the status of source().activate()?
-    m_was_stream_aborted = false;
-
-    return PipelineElementInternal::execute_activate();
-}
-
-hailo_status BaseDemuxElement::execute_deactivate()
-{
-    if (!m_is_activated) {
-        return HAILO_SUCCESS;
-    }
-    m_is_activated = false;
-
-    // deactivate should be called before mutex acquire and notify_all because it is possible that all queues are waiting on
-    // the run_pull of the source (HwRead) and the mutex is already acquired so this would prevent a timeout error
-    hailo_status status = PipelineElementInternal::execute_deactivate();
-
-    {
-        // There is a case where the other thread is halted (via context switch) before the wait_for() function,
-        // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
-        // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
-        std::unique_lock<std::mutex> lock(m_mutex);
-    }
-    m_cv.notify_all();
-
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseDemuxElement::execute_post_deactivate(bool should_clear_abort)
-{
-    for (uint32_t i = 0; i < m_was_source_called.size(); i++) {
-        m_was_source_called[i] = false;
-    }
-    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
-}
-
-hailo_status BaseDemuxElement::execute_abort()
-{
-    auto status = PipelineElementInternal::execute_abort();
-    CHECK_SUCCESS(status);
-    {
-        // There is a case where the other thread is halted (via context switch) before the wait_for() function,
-        // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
-        // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
-        std::unique_lock<std::mutex> lock(m_mutex);
-    }
-    m_cv.notify_all();
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseDemuxElement::set_timeout(std::chrono::milliseconds timeout)
-{
-    m_timeout = timeout;
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseDemuxElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
-{
-    auto pool_id = m_source_name_to_index.at(source_name);
-    auto status = m_pools[pool_id]->enqueue_buffer(mem_view, exec_done);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseDemuxElement::execute_dequeue_user_buffers(hailo_status error_status)
-{
-    for (auto &pool : m_pools) {
-        auto status = empty_buffer_pool(pool, error_status, m_timeout);
-        CHECK_SUCCESS(status);
-    }
-    return PipelineElement::execute_dequeue_user_buffers(error_status);;
-}
-
-Expected<bool> BaseDemuxElement::can_push_buffer_upstream(const uint32_t source_index)
-{
-    CHECK_AS_EXPECTED(source_index < m_pools.size(), HAILO_INTERNAL_FAILURE);
-    return !m_pools[source_index]->is_full();
-}
-
-hailo_status BaseDemuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index)
-{
-    CHECK(source_index < m_pools.size(), HAILO_INTERNAL_FAILURE);
-    CHECK_SUCCESS(m_pools[source_index]->allocate_buffers(is_dma_able, num_of_buffers));
-    return HAILO_SUCCESS;
-}
-
-Expected<bool> BaseDemuxElement::can_push_buffer_upstream(const std::string &source_name)
-{
-    auto source_index = get_source_index_from_source_name(source_name);
-    CHECK_EXPECTED(source_index);
-    return can_push_buffer_upstream(*source_index);
-}
-
-hailo_status BaseDemuxElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
-{
-    auto source_index = get_source_index_from_source_name(source_name);
-    CHECK_EXPECTED_AS_STATUS(source_index);
-    return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
-}
-
-Expected<uint32_t> BaseDemuxElement::get_source_index_from_source_name(const std::string &source_name)
-{
-    CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND);
-    auto ret_val = m_source_name_to_index.at(source_name);
-    return ret_val;
-}
-
-std::vector<PipelinePad*> BaseDemuxElement::execution_pads()
-{
-    if (m_next_pads.size() == 0)
-    {
-        if (PipelineDirection::PUSH == m_pipeline_direction) {
-            m_next_pads.reserve(m_sources.size());
-            for (auto &source : m_sources ) {
-                m_next_pads.push_back(source.next());
-            }
-        } else {
-            m_next_pads.reserve(m_sinks.size());
-            for (auto &sink : m_sinks ) {
-                m_next_pads.push_back(sink.prev());
-            }
-        }
-    }
-    return m_next_pads;
-}
-
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
index c699d74d..c904a59d 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
@@ -12,6 +12,8 @@
 
 #include "net_flow/pipeline/pipeline.hpp"
 
+#include "common/barrier.hpp"
+
 namespace hailort
 {
 
@@ -38,40 +40,12 @@ class PipelineElementInternal : public PipelineElement
 
 protected:
     void handle_non_recoverable_async_error(hailo_status error_status);
+
     std::weak_ptr<AsyncPipeline> m_async_pipeline;
 
     friend class PipelinePad;
 };
 
-
-// An element with one source pad only (generates data)
-class SourceElement : public PipelineElementInternal
-{
-public:
-    SourceElement(const std::string &name, DurationCollector &&duration_collector,
-                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                  PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    PipelinePad &source();
-
-protected:
-    virtual std::vector<PipelinePad*> execution_pads() override;
-};
-
-// An element with one sink pad only (consumes data)
-class SinkElement : public PipelineElementInternal
-{
-public:
-    SinkElement(const std::string &name, DurationCollector &&duration_collector,
-                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    PipelinePad &sink();
-
-protected:
-    virtual std::vector<PipelinePad*> execution_pads() override;
-    virtual hailo_status execute_terminate(hailo_status error_status) override;
-    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
-};
-
 // Transfers data from one pad to another pad. Has one sink pad and one source pad.
 class IntermediateElement : public PipelineElementInternal
 {
@@ -85,283 +59,6 @@ class IntermediateElement : public PipelineElementInternal
     virtual std::vector<PipelinePad*> execution_pads() override;
 };
 
-class FilterElement : public IntermediateElement
-{
-public:
-    FilterElement(const std::string &name, DurationCollector &&duration_collector,
-                  std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                  PipelineDirection pipeline_direction, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-                  std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~FilterElement() = default;
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
-    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
-
-    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
-
-protected:
-    // The optional buffer functions as an output buffer that the user can write to instead of acquiring a new buffer
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) = 0;
-    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
-
-    BufferPoolPtr m_pool;
-    std::chrono::milliseconds m_timeout;
-};
-
-class BaseQueueElement : public IntermediateElement
-{
-public:
-    virtual ~BaseQueueElement();
-
-    hailo_status set_timeout(std::chrono::milliseconds timeout);
-    virtual std::string description() const override;
-
-    static constexpr auto INIFINITE_TIMEOUT() { return std::chrono::milliseconds(HAILO_INFINITE); }
-
-protected:
-    static Expected<SpscQueue<PipelineBuffer>> create_queue(size_t queue_size, EventPtr shutdown_event);
-    BaseQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-
-    hailo_status pipeline_status();
-
-    virtual hailo_status execute_activate() override;
-    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
-    virtual hailo_status execute_clear() override;
-    virtual hailo_status execute_clear_abort() override;
-    virtual hailo_status execute_wait_for_finish() override;
-
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
-    virtual Expected<bool> can_push_buffer_downstream(const uint32_t source_index) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
-    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
-    virtual Expected<bool> can_push_buffer_downstream(const std::string &source_name) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
-
-    /// Starts/stops the queue thread. This functions needs to be called on subclasses ctor and dtor
-    /// accordingly because otherwise, if we will start/stop thread in this class we will face pure-call
-    /// to `run_in_thread`.
-    /// This functions don't return status because they are meant to be called on ctor and dtor 
-    virtual void start_thread();
-    virtual void stop_thread();
-
-    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
-
-    virtual hailo_status run_in_thread() = 0;
-    virtual std::string thread_name() = 0;
-
-    SpscQueue<PipelineBuffer> m_queue;
-    EventPtr m_shutdown_event;
-    std::chrono::milliseconds m_timeout;
-    std::thread m_thread;
-    std::atomic_bool m_is_thread_running;
-    Event m_activation_event;
-    Event m_deactivation_event;
-    AccumulatorPtr m_queue_size_accumulator;
-    std::atomic_bool m_is_run_in_thread_running;
-    std::condition_variable m_cv;
-    std::mutex m_mutex;
-};
-
-class PushQueueElement : public BaseQueueElement
-{
-public:
-    static Expected<std::shared_ptr<PushQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PUSH,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<PushQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    PushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline, bool should_start_thread = true);
-    virtual ~PushQueueElement();
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual PipelinePad &next_pad() override;
-
-protected:
-    virtual hailo_status execute_deactivate() override;
-    virtual hailo_status run_in_thread() override;
-    virtual std::string thread_name() override { return "PUSH_QUEUE"; };
-    virtual hailo_status execute_abort() override;
-};
-
-class AsyncPushQueueElement : public PushQueueElement
-{
-public:
-    static Expected<std::shared_ptr<AsyncPushQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    static Expected<std::shared_ptr<AsyncPushQueueElement>> create(const std::string &name, const ElementBuildParams &build_params,
-        std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction);
-    AsyncPushQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
-
-protected:
-    virtual hailo_status run_in_thread() override;
-    virtual std::string thread_name() override { return "ASYNC_PUSH_Q"; };
-    virtual void start_thread() override;
-    virtual hailo_status execute_terminate(hailo_status error_status);
-    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
-    virtual hailo_status execute_deactivate() override;
-};
-
-class PullQueueElement : public BaseQueueElement
-{
-public:
-    static Expected<std::shared_ptr<PullQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
-        size_t queue_size, hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    static Expected<std::shared_ptr<PullQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    PullQueueElement(SpscQueue<PipelineBuffer> &&queue, EventPtr shutdown_event, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction);
-    virtual ~PullQueueElement();
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual PipelinePad &next_pad() override;
-
-protected:
-    virtual hailo_status execute_deactivate() override;
-    virtual hailo_status run_in_thread() override;
-    virtual std::string thread_name() override { return "PULL_QUEUE"; };
-};
-
-class UserBufferQueueElement : public PullQueueElement
-{
-public:
-    static Expected<std::shared_ptr<UserBufferQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
-        hailo_pipeline_elem_stats_flags_t flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    static Expected<std::shared_ptr<UserBufferQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    UserBufferQueueElement(SpscQueue<PipelineBuffer> &&queue, SpscQueue<PipelineBuffer> &&full_buffer_queue, EventPtr shutdown_event,
-        const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
-        PipelineDirection pipeline_direction);
-
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-
-protected:
-    virtual hailo_status execute_clear() override;
-    virtual hailo_status run_in_thread() override;
-
-private:
-    SpscQueue<PipelineBuffer> m_full_buffer_queue;
-};
-
-class BaseMuxElement : public PipelineElementInternal
-{
-public:
-    virtual ~BaseMuxElement() = default;
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
-    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
-
-protected:
-    BaseMuxElement(size_t sink_count, const std::string &name, std::chrono::milliseconds timeout,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual hailo_status execute_terminate(hailo_status error_status) override;
-    virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) = 0;
-    virtual std::vector<PipelinePad*> execution_pads() override;
-    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
-
-    std::chrono::milliseconds m_timeout;
-    BufferPoolPtr m_pool;
-
-private:
-    bool has_all_sinks_arrived();
-    std::unordered_map<std::string, bool> m_sink_has_arrived;
-    std::mutex m_mutex;
-    std::unordered_map<std::string, uint32_t> m_index_of_sink;
-    std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
-    std::vector<PipelinePad*> m_next_pads;
-    std::condition_variable m_cv;
-};
-
-class BaseDemuxElement : public PipelineElementInternal
-{
-public:
-    virtual ~BaseDemuxElement() = default;
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    hailo_status set_timeout(std::chrono::milliseconds timeout);
-
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
-    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
-
-    virtual Expected<uint32_t> get_source_index_from_source_name(const std::string &source_name) override;
-
-protected:
-    BaseDemuxElement(size_t source_count, const std::string &name, std::chrono::milliseconds timeout,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        std::vector<BufferPoolPtr> pools, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual hailo_status execute_activate() override;
-    virtual hailo_status execute_deactivate() override;
-    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
-    virtual hailo_status execute_abort() override;
-    virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input) = 0;
-    virtual std::vector<PipelinePad*> execution_pads() override;
-    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
-
-    std::chrono::milliseconds m_timeout;
-    std::vector<BufferPoolPtr> m_pools;
-
-private:
-    bool were_all_srcs_arrived();
-
-    std::atomic_bool m_is_activated;
-    std::atomic_bool m_was_stream_aborted;
-    std::unordered_map<std::string, uint32_t> m_source_name_to_index;
-    std::vector<bool> m_was_source_called;
-    std::vector<PipelineBuffer> m_buffers_for_action;
-    std::mutex m_mutex;
-    std::condition_variable m_cv;
-    std::vector<PipelinePad*> m_next_pads;
-};
-
 enum class AccumulatorType
 {
     FPS,
diff --git a/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp
new file mode 100644
index 00000000..3527fde9
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp
@@ -0,0 +1,842 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file queue_elements.cpp
+ * @brief Implementation of the queue elements
+ **/
+
+#include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/pipeline/queue_elements.hpp"
+#include "common/os_utils.hpp"
+#include "common/runtime_statistics_internal.hpp"
+
+namespace hailort
+{
+
+Expected<SpscQueue<PipelineBuffer>> BaseQueueElement::create_queue(size_t queue_size, EventPtr shutdown_event)
+{
+    auto queue = SpscQueue<PipelineBuffer>::create(queue_size, shutdown_event);
+    CHECK_EXPECTED(queue);
+
+    return queue.release();
+}
+
+BaseQueueElement::BaseQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name,
+    std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    IntermediateElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
+    m_queue(std::move(queue)),
+    m_shutdown_event(shutdown_event),
+    m_timeout(timeout),
+    m_is_thread_running(true),
+    m_activation_event(std::move(activation_event)),
+    m_deactivation_event(std::move(deactivation_event)),
+    m_queue_size_accumulator(std::move(queue_size_accumulator)),
+    m_pool(buffer_pool)
+{}
+
+BaseQueueElement::~BaseQueueElement()
+{
+    LOGGER__INFO("Queue element {} has {} frames in his Queue on destruction", name(), m_queue.size_approx());
+}
+
+void BaseQueueElement::start_thread()
+{
+    m_thread = std::thread([this] () {
+        OsUtils::set_current_thread_name(thread_name());
+        while (m_is_thread_running.load()) {
+            auto status = m_activation_event.wait(INIFINITE_TIMEOUT());
+
+            if (!m_is_thread_running) {
+                LOGGER__INFO("Thread in element {} is not running anymore, exiting..", this->name());
+                break;
+            }
+            if (HAILO_SUCCESS == status) {
+                status = run_in_thread();
+            }
+
+            if (HAILO_SUCCESS != status) {
+                if (HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
+                    // We do not want to log error for HAILO_STREAM_ABORT
+                    if (HAILO_STREAM_ABORT != status) {
+                        LOGGER__ERROR("Queue element {} run in thread function failed! status = {}", this->name(), status);
+                    }
+
+                    // Store the real error in pipeline_status
+                    m_pipeline_status->store(status);
+                }
+                // Signal other threads to stop
+                hailo_status shutdown_status = m_shutdown_event->signal();
+                if (HAILO_SUCCESS != shutdown_status) {
+                    LOGGER__CRITICAL("Failed shutting down queue with status {}", shutdown_status);
+                }
+
+                // Thread has done its execution. Mark to the thread to wait for activation again
+                hailo_status event_status = m_activation_event.reset();
+                if (HAILO_SUCCESS != event_status) {
+                    LOGGER__CRITICAL("Failed reset activation event of element {}, with status {}", this->name(), event_status);
+                }
+
+                // Mark to deactivation function that the thread is done
+                event_status = m_deactivation_event.signal();
+                if (HAILO_SUCCESS != event_status) {
+                    LOGGER__CRITICAL("Failed signaling deactivation event of element {}, with status {}", this->name(), event_status);
+                }
+            }
+        }
+    });
+}
+
+void BaseQueueElement::stop_thread()
+{
+    m_shutdown_event->signal();
+
+    // Mark thread as not running, then wake it in case it is waiting on m_activation_event
+    m_is_thread_running = false;
+    m_activation_event.signal();
+
+    if (m_thread.joinable()) {
+        m_thread.join();
+    }
+}
+
+std::vector<AccumulatorPtr> BaseQueueElement::get_queue_size_accumulators()
+{
+    if (nullptr == m_queue_size_accumulator) {
+        return std::vector<AccumulatorPtr>();
+    }
+    return {m_queue_size_accumulator};
+}
+
+hailo_status BaseQueueElement::execute_activate()
+{
+    auto status = m_shutdown_event->reset();
+    CHECK_SUCCESS(status);
+
+    status = PipelineElementInternal::execute_activate();
+    CHECK_SUCCESS(status);
+
+    status = m_deactivation_event.reset();
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Failed to reset of deactivation event in {} with status {}", name(), status);
+    }
+
+    status = m_activation_event.signal();
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseQueueElement::execute_post_deactivate(bool should_clear_abort)
+{
+    hailo_status status = m_deactivation_event.wait(INIFINITE_TIMEOUT());
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Failed to post_deactivate() in {} with status {}", name(), status);
+    }
+
+    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
+}
+
+hailo_status BaseQueueElement::execute_clear()
+{
+    auto status = PipelineElementInternal::execute_clear();
+    if (HAILO_SUCCESS != status) {
+        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), status);
+    }
+
+    auto queue_clear_status = m_queue.clear();
+    if (HAILO_SUCCESS != queue_clear_status) {
+        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), queue_clear_status);
+        status = queue_clear_status;
+    }
+
+    auto pool_clear_status = empty_buffer_pool(m_pool, HAILO_SUCCESS, BUFFER_POOL_DEFAULT_QUEUE_TIMEOUT);
+    if (HAILO_SUCCESS != pool_clear_status) {
+        LOGGER__ERROR("Failed to clear() in {} with status {}", name(), pool_clear_status);
+        status = pool_clear_status;
+    }
+
+    return status;
+}
+
+hailo_status PushQueueElement::execute_abort()
+{
+    auto status = m_shutdown_event->reset();
+    CHECK_SUCCESS(status);
+
+    m_pipeline_status->store(HAILO_STREAM_ABORT);
+
+    status = PipelineElementInternal::execute_abort();
+    CHECK_SUCCESS(status);
+
+    status = m_activation_event.signal();
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status BaseQueueElement::execute_clear_abort()
+{
+    auto status = m_shutdown_event->reset();
+    CHECK_SUCCESS(status);
+
+    m_pipeline_status->store(HAILO_SUCCESS);
+    return PipelineElementInternal::execute_clear_abort();
+}
+
+hailo_status BaseQueueElement::set_timeout(std::chrono::milliseconds timeout)
+{
+    m_timeout = timeout;
+    return HAILO_SUCCESS;
+}
+
+std::string BaseQueueElement::description() const
+{
+    std::stringstream element_description;
+
+    element_description << "(" << this->name();
+    if (HAILO_INFINITE != this->m_timeout.count()) {
+        element_description << " | timeout: "  << std::chrono::duration_cast<std::chrono::seconds>(this->m_timeout).count() << "s";
+    }
+    element_description << ")";
+
+    return element_description.str();
+}
+
+hailo_status BaseQueueElement::pipeline_status()
+{
+    auto status = m_pipeline_status->load();
+
+    // We treat HAILO_STREAM_ABORT as success because it is caused by user action (aborting streams)
+    if (HAILO_STREAM_ABORT == status) {
+        return HAILO_SUCCESS;
+    }
+    return status;
+}
+
+Expected<std::shared_ptr<PushQueueElement>> PushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
+    size_t queue_size, size_t frame_size, hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vs_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
+    CHECK_EXPECTED(shutdown_event_exp);
+    auto shutdown_event = shutdown_event_exp.release();
+
+    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
+    CHECK_EXPECTED(queue);
+
+    auto activation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(activation_event);
+
+    auto deactivation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(deactivation_event);
+
+    // We do not measure duration for Q elements
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+
+    AccumulatorPtr queue_size_accumulator = nullptr;
+    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
+        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
+        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, flags, vs_flags);
+    CHECK_EXPECTED(buffer_pool);
+
+    auto queue_ptr = make_shared_nothrow<PushQueueElement>(queue.release(), buffer_pool.release(), shutdown_event, name, timeout,
+        duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
+        activation_event.release(), deactivation_event.release(), async_pipeline, true);
+    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name);
+
+    LOGGER__INFO("Created {}", queue_ptr->description());
+
+    return queue_ptr;
+}
+
+Expected<std::shared_ptr<PushQueueElement>> PushQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        size_t frame_size, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::shared_ptr<AsyncPipeline> async_pipeline)
+{
+    return PushQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size,
+        frame_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags,
+        pipeline_status, async_pipeline);
+}
+
+PushQueueElement::PushQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name,
+    std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+    std::shared_ptr<AsyncPipeline> async_pipeline, bool should_start_thread) :
+    BaseQueueElement(std::move(queue), buffer_pool, shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
+        std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), PipelineDirection::PUSH, async_pipeline)
+{
+    if (should_start_thread) {
+        start_thread();
+    }
+}
+
+PushQueueElement::~PushQueueElement()
+{
+    stop_thread();
+}
+
+hailo_status PushQueueElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    auto status = m_pipeline_status->load();
+    if (HAILO_STREAM_ABORT == status) {
+        LOGGER__INFO("run_push of {} was aborted!", name());
+        return status;
+    }
+    CHECK_SUCCESS(m_pipeline_status->load());
+
+    if (nullptr != m_queue_size_accumulator) {
+        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
+    }
+    status = m_queue.enqueue(std::move(buffer), m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        auto queue_thread_status = pipeline_status();
+        CHECK_SUCCESS(queue_thread_status,
+            "Shutdown event was signaled in enqueue of queue element {} because thread has failed with status={}!", name(),
+            queue_thread_status);
+        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_SUCCESS(status);
+    return HAILO_SUCCESS;
+}
+
+void PushQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/) {
+    LOGGER__ERROR("run_push_async is not supported for {}", name());
+    assert(false);
+}
+
+Expected<PipelineBuffer> PushQueueElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+{
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+hailo_status PushQueueElement::execute_deactivate()
+{
+    // Mark to the threads that deactivate() was called.
+    hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE));
+    if (HAILO_SUCCESS != status) {
+        // We want to deactivate source even if enqueue failed
+        auto deactivation_status = PipelineElementInternal::execute_deactivate();
+        CHECK_SUCCESS(deactivation_status);
+        if ((HAILO_STREAM_ABORT == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) {
+            LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status);
+        }
+        else {
+             LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status);
+             return status;
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+PipelinePad &PushQueueElement::next_pad()
+{
+    // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed)
+    return *m_sources[0].next();
+}
+
+hailo_status PushQueueElement::run_in_thread()
+{
+    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_EXPECTED_AS_STATUS(buffer);
+
+    // Return if deactivated
+    if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) {
+        hailo_status status = m_shutdown_event->signal();
+        CHECK_SUCCESS(status);
+
+        status = next_pad().deactivate();
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status);
+        }
+
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+
+    hailo_status status = next_pad().run_push(buffer.release());
+    if (HAILO_STREAM_ABORT == status) {
+        LOGGER__INFO("run_push of {} was aborted!", name());
+        return status;
+    }
+    else if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("run_push of {} stopped because Shutdown event was signaled!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPushQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
+    size_t queue_size, size_t frame_size, bool is_empty, bool interacts_with_hw, hailo_pipeline_elem_stats_flags_t flags,
+    hailo_vstream_stats_flags_t vstream_stats_flags, EventPtr shutdown_event,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline, bool is_entry)
+{
+    if (is_entry) {
+        // Multiplying by 2 to ensure dual-buffering when edge-element is the bottleneck
+        queue_size = queue_size * 2;
+    }
+
+    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
+    CHECK_EXPECTED(queue);
+
+    auto activation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(activation_event);
+
+    auto deactivation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(deactivation_event);
+
+    // We do not measure duration for Q elements
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+
+    AccumulatorPtr queue_size_accumulator = nullptr;
+    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
+        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
+        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, flags, vstream_stats_flags, is_empty, interacts_with_hw);
+    CHECK_EXPECTED(buffer_pool);
+
+    auto queue_ptr = make_shared_nothrow<AsyncPushQueueElement>(queue.release(), buffer_pool.release(),
+        shutdown_event, name, timeout, duration_collector.release(), std::move(queue_size_accumulator),
+        std::move(pipeline_status), activation_event.release(), deactivation_event.release(), async_pipeline);
+    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PushQueueElement {} failed!", name);
+
+    LOGGER__INFO("Created {}", queue_ptr->description());
+
+    return queue_ptr;
+}
+
+Expected<std::shared_ptr<AsyncPushQueueElement>> AsyncPushQueueElement::create(const std::string &name, const ElementBuildParams &build_params,
+    size_t frame_size, bool is_empty, bool interacts_with_hw, std::shared_ptr<AsyncPipeline> async_pipeline, bool is_entry)
+{
+    // Pools that interacts with HW should be as big as the edges pools (user-buffers)
+    auto queue_size = (interacts_with_hw) ? build_params.buffer_pool_size_edges : build_params.buffer_pool_size_internal;
+    return AsyncPushQueueElement::create(name, build_params.timeout, queue_size, frame_size, is_empty, interacts_with_hw,
+        build_params.elem_stats_flags, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, async_pipeline,
+        is_entry);
+}
+
+AsyncPushQueueElement::AsyncPushQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event,
+    const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,  AccumulatorPtr &&queue_size_accumulator,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+    std::shared_ptr<AsyncPipeline> async_pipeline) :
+    PushQueueElement(std::move(queue), buffer_pool, shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
+        std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), async_pipeline, false)
+{
+    start_thread();
+}
+
+void AsyncPushQueueElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+{
+    // We do not measure duration for Q elements
+    if (nullptr != m_queue_size_accumulator) {
+        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
+    }
+
+    auto status = m_queue.enqueue(std::move(buffer), m_timeout);
+    if (HAILO_SUCCESS != status && HAILO_SHUTDOWN_EVENT_SIGNALED != status) {
+        handle_non_recoverable_async_error(status);
+        stop_thread();
+    }
+}
+
+void AsyncPushQueueElement::start_thread()
+{
+    m_thread = std::thread([this] () {
+        OsUtils::set_current_thread_name(thread_name());
+        while (m_is_thread_running.load()) {
+            auto status = m_pipeline_status->load();
+            if (HAILO_SUCCESS != status) {
+                LOGGER__INFO("Thread in element {} is not running anymore, exiting..", name());
+                m_is_thread_running = false;
+                break;
+            }
+
+            status = run_in_thread();
+            if (HAILO_SUCCESS != status) {
+                handle_non_recoverable_async_error(status);
+                m_is_thread_running = false;
+                break;
+            }
+        }
+    });
+}
+
+hailo_status AsyncPushQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+hailo_status AsyncPushQueueElement::run_in_thread()
+{
+    auto buffer = m_queue.dequeue(INIFINITE_TIMEOUT());
+    auto buffer_status = buffer.status();
+    switch (buffer_status) {
+    case HAILO_SHUTDOWN_EVENT_SIGNALED:
+        break;
+
+    case HAILO_SUCCESS:
+        // Return if deactivated
+        if (PipelineBuffer::Type::DEACTIVATE == buffer->get_type()) {
+            hailo_status status = m_shutdown_event->signal();
+            CHECK_SUCCESS(status);
+
+            status = next_pad().deactivate();
+            if (HAILO_SUCCESS != status) {
+                LOGGER__ERROR("Deactivate of source in {} has failed with status {}", name(), status);
+            }
+
+            return HAILO_SHUTDOWN_EVENT_SIGNALED;
+        }
+
+        next_pad().run_push_async(buffer.release());
+        break;
+
+    default:
+        next_pad().run_push_async(PipelineBuffer(buffer_status));
+    }
+
+    return buffer_status;
+}
+
+hailo_status AsyncPushQueueElement::execute_deactivate()
+{
+    // Mark to the threads that deactivate() was called.
+    hailo_status status = m_queue.enqueue(PipelineBuffer(PipelineBuffer::Type::DEACTIVATE));
+    if (HAILO_SUCCESS != status) {
+        // We want to deactivate source even if enqueue failed
+        auto deactivation_status = PipelineElementInternal::execute_deactivate();
+        CHECK_SUCCESS(deactivation_status);
+        if ((HAILO_STREAM_ABORT == status) || (HAILO_SHUTDOWN_EVENT_SIGNALED == status)) {
+            LOGGER__INFO("enqueue() in element {} was aborted, got status = {}", name(), status);
+        } else {
+             LOGGER__ERROR("enqueue() in element {} failed, got status = {}", name(), status);
+             return status;
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status AsyncPushQueueElement::execute_post_deactivate(bool should_clear_abort)
+{
+    // We marked thread to stop with PipelineBuffer::Type::DEACTIVATE, now we wait for it to finish
+    stop_thread();
+    return PipelineElementInternal::execute_post_deactivate(should_clear_abort);
+}
+
+hailo_status AsyncPushQueueElement::execute_terminate(hailo_status error_status)
+{
+    if (m_is_terminated) {
+        return HAILO_SUCCESS;
+    }
+
+    auto terminate_status = PipelineElement::execute_terminate(error_status);
+
+    if ((!next_pad().element().is_terminating_element())) {
+        stop_thread();
+    }
+
+    CHECK_SUCCESS(terminate_status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status AsyncPushQueueElement::execute_dequeue_user_buffers(hailo_status error_status)
+{
+    auto dequeue_status = PipelineElement::execute_dequeue_user_buffers(error_status);
+    auto clear_queues_status = m_queue.clear();
+    auto empty_pool_status = empty_buffer_pool(m_pool, error_status, m_timeout);
+
+    CHECK_SUCCESS(dequeue_status);
+    CHECK_SUCCESS(clear_queues_status);
+    CHECK_SUCCESS(empty_pool_status);
+    return HAILO_SUCCESS;
+}
+
+Expected<bool> AsyncPushQueueElement::can_push_buffer_downstream()
+{
+    return !m_queue.is_queue_full();
+}
+
+Expected<std::shared_ptr<PullQueueElement>> PullQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
+    size_t queue_size, size_t frame_size, hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vstream_stats_flags,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+{
+    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
+    CHECK_EXPECTED(shutdown_event_exp);
+    auto shutdown_event = shutdown_event_exp.release();
+
+    auto queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
+    CHECK_EXPECTED(queue);
+
+    auto activation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(activation_event);
+
+    auto deactivation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(deactivation_event);
+
+    // We do not measure duration for Q elements
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+
+    AccumulatorPtr queue_size_accumulator = nullptr;
+    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
+        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
+        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, flags, vstream_stats_flags);
+    CHECK_EXPECTED(buffer_pool);
+
+    auto queue_ptr = make_shared_nothrow<PullQueueElement>(queue.release(), buffer_pool.release(), shutdown_event,
+        name, timeout, duration_collector.release(), std::move(queue_size_accumulator), std::move(pipeline_status),
+        activation_event.release(), deactivation_event.release());
+    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating PullQueueElement {} failed!", name);
+
+    LOGGER__INFO("Created {}", queue_ptr->description());
+
+    return queue_ptr;
+}
+Expected<std::shared_ptr<PullQueueElement>> PullQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        size_t frame_size, std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+{
+    return PullQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
+        vstream_params.queue_size, frame_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags,
+        pipeline_status);
+}
+
+PullQueueElement::PullQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event,
+    const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event) :
+    BaseQueueElement(std::move(queue), buffer_pool, shutdown_event, name, timeout, std::move(duration_collector), std::move(queue_size_accumulator),
+        std::move(pipeline_status), std::move(activation_event), std::move(deactivation_event), PipelineDirection::PULL, nullptr)
+{
+    start_thread();
+}
+
+PullQueueElement::~PullQueueElement()
+{
+    stop_thread();
+}
+
+hailo_status PullQueueElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    return HAILO_INVALID_OPERATION;
+}
+
+void PullQueueElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+{
+    LOGGER__ERROR("run_push_async is not supported for {}", name());
+    assert(false);
+}
+
+Expected<PipelineBuffer> PullQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*sink*/)
+{
+    // We do not measure duration for Q elements
+    CHECK_AS_EXPECTED(!optional, HAILO_INVALID_ARGUMENT, "Optional buffer is not allowed in queue element!");
+
+    auto output = m_queue.dequeue(m_timeout);
+
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
+        auto queue_thread_status = pipeline_status();
+        CHECK_SUCCESS_AS_EXPECTED(queue_thread_status,
+            "Shutdown event was signaled in dequeue of queue element {} because thread has failed with status={}!", name(),
+            queue_thread_status);
+        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+    CHECK_EXPECTED(output);
+
+    return output;
+}
+
+hailo_status PullQueueElement::execute_deactivate()
+{
+    hailo_status status = PipelineElementInternal::execute_deactivate();
+    auto shutdown_event_status = m_shutdown_event->signal();
+    CHECK_SUCCESS(status);
+    CHECK_SUCCESS(shutdown_event_status);
+
+    return HAILO_SUCCESS;
+}
+
+PipelinePad &PullQueueElement::next_pad()
+{
+    // Note: The next elem to be run is upstream from this elem (i.e. buffers are pulled)
+    return *m_sinks[0].prev();
+}
+
+hailo_status PullQueueElement::run_in_thread()
+{
+    auto buffer = next_pad().run_pull();
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        LOGGER__INFO("Shutdown event was signaled in run_pull of queue element {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    if (HAILO_STREAM_ABORT == buffer.status()) {
+        LOGGER__INFO("run_pull of queue element {} was aborted!", name());
+        return HAILO_STREAM_ABORT;
+    }
+    if (HAILO_NETWORK_GROUP_NOT_ACTIVATED == buffer.status()) {
+        LOGGER__INFO("run_pull of queue element {} was called before network_group is activated!", name());
+        return HAILO_NETWORK_GROUP_NOT_ACTIVATED;
+    }
+    CHECK_EXPECTED_AS_STATUS(buffer);
+
+    if (nullptr != m_queue_size_accumulator) {
+        m_queue_size_accumulator->add_data_point(static_cast<double>(m_queue.size_approx()));
+    }
+
+    hailo_status status = m_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::shared_ptr<UserBufferQueueElement>> UserBufferQueueElement::create(const std::string &name, std::chrono::milliseconds timeout,
+    hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vstream_stats_flags, size_t frame_size,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+{
+    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
+    CHECK_EXPECTED(shutdown_event_exp);
+    auto shutdown_event = shutdown_event_exp.release();
+
+    const auto queue_size = 1;
+    auto pending_buffer_queue = BaseQueueElement::create_queue(queue_size, shutdown_event);
+    CHECK_EXPECTED(pending_buffer_queue);
+
+    auto activation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(activation_event);
+
+    auto deactivation_event = Event::create(Event::State::not_signalled);
+    CHECK_EXPECTED(deactivation_event);
+
+    // We do not measure duration for Q elements
+    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
+    CHECK_EXPECTED(duration_collector);
+
+    AccumulatorPtr queue_size_accumulator = nullptr;
+    if ((flags & HAILO_PIPELINE_ELEM_STATS_MEASURE_QUEUE_SIZE) != 0) {
+        queue_size_accumulator = make_shared_nothrow<FullAccumulator<double>>("queue_size");
+        CHECK_AS_EXPECTED(nullptr != queue_size_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto is_empty = true; // UserBufferQueue always holds user buffers, therefore its created empty
+    auto is_dma_able = false;
+    auto buffer_pool = BufferPool::create(frame_size, queue_size, shutdown_event, flags, vstream_stats_flags, is_empty, is_dma_able);
+    CHECK_EXPECTED(buffer_pool);
+
+    auto queue_ptr = make_shared_nothrow<UserBufferQueueElement>(pending_buffer_queue.release(),
+        buffer_pool.release(), shutdown_event, name, timeout, duration_collector.release(),
+        std::move(queue_size_accumulator), std::move(pipeline_status), activation_event.release(),
+        deactivation_event.release());
+    CHECK_AS_EXPECTED(nullptr != queue_ptr, HAILO_OUT_OF_HOST_MEMORY, "Creating UserBufferQueueElement {} failed!", name);
+
+    LOGGER__INFO("Created {}", queue_ptr->description());
+
+    return queue_ptr;
+}
+
+Expected<std::shared_ptr<UserBufferQueueElement>> UserBufferQueueElement::create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+    size_t frame_size, std::shared_ptr<std::atomic<hailo_status>> pipeline_status)
+{
+    return UserBufferQueueElement::create(name, std::chrono::milliseconds(vstream_params.timeout_ms),
+        vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, frame_size, pipeline_status);
+}
+
+UserBufferQueueElement::UserBufferQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool,
+    EventPtr shutdown_event, const std::string &name, std::chrono::milliseconds timeout,
+    DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    Event &&activation_event, Event &&deactivation_event) :
+    PullQueueElement(std::move(queue), buffer_pool, shutdown_event, name, timeout, std::move(duration_collector),
+        std::move(queue_size_accumulator), std::move(pipeline_status), std::move(activation_event),
+        std::move(deactivation_event))
+{}
+
+Expected<PipelineBuffer> UserBufferQueueElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
+{
+    CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be valid in {}!", name());
+
+    hailo_status status = m_pool->enqueue_buffer(optional.as_view());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("Shutdown event was signaled in enqueue of queue element {}!", name());
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    auto output = m_queue.dequeue(m_timeout);
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == output.status()) {
+        LOGGER__INFO("Shutdown event was signaled in dequeue of queue element {}!", name());
+        return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
+    }
+
+    CHECK_AS_EXPECTED(HAILO_TIMEOUT != output.status(), HAILO_TIMEOUT, "{} (D2H) failed with status={} (timeout={}ms)",
+        name(), HAILO_TIMEOUT, m_timeout.count());
+    CHECK_EXPECTED(output);
+
+    CHECK_AS_EXPECTED(output->data() == optional.data(), HAILO_INTERNAL_FAILURE, "The buffer received in {} was not the same as the user buffer!", name());
+    return output;
+}
+
+hailo_status UserBufferQueueElement::set_buffer_pool_buffer_size(uint32_t frame_size)
+{
+    return m_pool->set_buffer_size(frame_size);
+}
+
+hailo_status UserBufferQueueElement::run_in_thread()
+{
+    auto optional = m_pool->acquire_buffer(INIFINITE_TIMEOUT());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == optional.status()) {
+        LOGGER__INFO("Shutdown event was signaled in dequeue of {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_EXPECTED_AS_STATUS(optional);
+
+    auto buffer = next_pad().run_pull(optional.release());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
+        LOGGER__INFO("Shutdown event was signaled in run_pull of {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    if (HAILO_STREAM_ABORT == buffer.status()) {
+        LOGGER__INFO("run_pull of {} was aborted!", name());
+        return HAILO_STREAM_ABORT;
+    }
+    CHECK_EXPECTED_AS_STATUS(buffer);
+
+    hailo_status status = m_queue.enqueue(buffer.release(), INIFINITE_TIMEOUT());
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("Shutdown event was signaled in enqueue of {}!", name());
+        return HAILO_SHUTDOWN_EVENT_SIGNALED;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+std::vector<AccumulatorPtr> UserBufferQueueElement::get_queue_size_accumulators()
+{
+    return std::vector<AccumulatorPtr>(); // Since this element is sync, queue state will always be 0
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp
new file mode 100644
index 00000000..4f86dafa
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp
@@ -0,0 +1,179 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file queue_elements.hpp
+ * @brief all queue elements in the pipeline.
+ **/
+
+#ifndef _HAILO_QUEUE_ELEMENTS_HPP_
+#define _HAILO_QUEUE_ELEMENTS_HPP_
+
+#include "net_flow/pipeline/pipeline_internal.hpp"
+
+namespace hailort
+{
+
+class BaseQueueElement : public IntermediateElement
+{
+public:
+    virtual ~BaseQueueElement();
+
+    hailo_status set_timeout(std::chrono::milliseconds timeout);
+    virtual std::string description() const override;
+
+    static constexpr auto INIFINITE_TIMEOUT() { return std::chrono::milliseconds(HAILO_INFINITE); }
+
+    virtual BufferPoolPtr get_buffer_pool() const override
+    {
+        return m_pool;
+    }
+
+protected:
+    static Expected<SpscQueue<PipelineBuffer>> create_queue(size_t queue_size, EventPtr shutdown_event);
+    BaseQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool,
+        EventPtr shutdown_event, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+        AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        Event &&activation_event, Event &&deactivation_event,
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+
+    hailo_status pipeline_status();
+
+    virtual hailo_status execute_activate() override;
+    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
+    virtual hailo_status execute_clear() override;
+    virtual hailo_status execute_clear_abort() override;
+
+    /// Starts/stops the queue thread. This functions needs to be called on subclasses ctor and dtor
+    /// accordingly because otherwise, if we will start/stop thread in this class we will face pure-call
+    /// to `run_in_thread`.
+    /// This functions don't return status because they are meant to be called on ctor and dtor 
+    virtual void start_thread();
+    virtual void stop_thread();
+
+    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
+
+    virtual hailo_status run_in_thread() = 0;
+    virtual std::string thread_name() = 0;
+
+    SpscQueue<PipelineBuffer> m_queue;
+    EventPtr m_shutdown_event;
+    std::chrono::milliseconds m_timeout;
+    std::thread m_thread;
+    std::atomic_bool m_is_thread_running;
+    Event m_activation_event;
+    Event m_deactivation_event;
+    AccumulatorPtr m_queue_size_accumulator;
+    BufferPoolPtr m_pool;
+};
+
+class PushQueueElement : public BaseQueueElement
+{
+public:
+    static Expected<std::shared_ptr<PushQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, size_t frame_size, hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vs_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    static Expected<std::shared_ptr<PushQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        size_t frame_size, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
+    PushQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+        std::shared_ptr<AsyncPipeline> async_pipeline, bool should_start_thread);
+    virtual ~PushQueueElement();
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual PipelinePad &next_pad() override;
+
+protected:
+    virtual hailo_status execute_deactivate() override;
+    virtual hailo_status run_in_thread() override;
+    virtual std::string thread_name() override { return "PUSH_QUEUE"; };
+    virtual hailo_status execute_abort() override;
+};
+
+class AsyncPushQueueElement : public PushQueueElement
+{
+public:
+    static Expected<std::shared_ptr<AsyncPushQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, size_t frame_size, bool is_empty, bool interacts_with_hw, hailo_pipeline_elem_stats_flags_t flags,
+        hailo_vstream_stats_flags_t vstream_stats_flags, EventPtr shutdown_event,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline, bool is_entry = false);
+    static Expected<std::shared_ptr<AsyncPushQueueElement>> create(const std::string &name, const ElementBuildParams &build_params,
+        size_t frame_size, bool is_empty, bool interacts_with_hw, std::shared_ptr<AsyncPipeline> async_pipeline, bool is_entry = false);
+    AsyncPushQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event,
+        std::shared_ptr<AsyncPipeline> async_pipeline);
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
+    virtual Expected<bool> can_push_buffer_downstream() override;
+
+protected:
+    virtual hailo_status run_in_thread() override;
+    virtual std::string thread_name() override { return "ASYNC_PUSH_Q"; };
+    virtual void start_thread() override;
+    virtual hailo_status execute_terminate(hailo_status error_status);
+    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
+    virtual hailo_status execute_deactivate() override;
+};
+
+class PullQueueElement : public BaseQueueElement
+{
+public:
+    static Expected<std::shared_ptr<PullQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
+        size_t queue_size, size_t frame_size, hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vstream_stats_flags,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+    static Expected<std::shared_ptr<PullQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        size_t frame_size, std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+    PullQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool, EventPtr shutdown_event, const std::string &name,
+        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, AccumulatorPtr &&queue_size_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event, Event &&deactivation_event);
+    virtual ~PullQueueElement();
+
+    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    virtual PipelinePad &next_pad() override;
+
+protected:
+    virtual hailo_status execute_deactivate() override;
+    virtual hailo_status run_in_thread() override;
+    virtual std::string thread_name() override { return "PULL_QUEUE"; };
+};
+
+class UserBufferQueueElement : public PullQueueElement
+{
+public:
+    static Expected<std::shared_ptr<UserBufferQueueElement>> create(const std::string &name, std::chrono::milliseconds timeout,
+        hailo_pipeline_elem_stats_flags_t flags, hailo_vstream_stats_flags_t vstream_stats_flags,
+        size_t frame_size, std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+    static Expected<std::shared_ptr<UserBufferQueueElement>> create(const std::string &name, const hailo_vstream_params_t &vstream_params,
+        size_t frame_size, std::shared_ptr<std::atomic<hailo_status>> pipeline_status);
+    UserBufferQueueElement(SpscQueue<PipelineBuffer> &&queue, BufferPoolPtr buffer_pool,
+        EventPtr shutdown_event, const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
+        AccumulatorPtr &&queue_size_accumulator, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, Event &&activation_event,
+        Event &&deactivation_event);
+
+    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
+    hailo_status set_buffer_pool_buffer_size(uint32_t frame_size);
+
+virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
+
+protected:
+    virtual hailo_status run_in_thread() override;
+};
+
+
+
+
+} /* namespace hailort */
+
+#endif /* _HAILO_QUEUE_ELEMENTS_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
index 8530e135..02a1c1f1 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
@@ -19,16 +19,6 @@
 #include "hailo/hailort_common.hpp"
 #include "net_flow/pipeline/pipeline_internal.hpp"
 #include "stream_common/stream_internal.hpp"
-#include "net_flow/ops/nms_post_process.hpp"
-#include "net_flow/ops/ssd_post_process.hpp"
-#include "net_flow/ops/yolox_post_process.hpp"
-#include "net_flow/ops/yolov8_post_process.hpp"
-#include "net_flow/ops/yolov5_post_process.hpp"
-#include "net_flow/ops/argmax_post_process.hpp"
-#include "net_flow/ops/softmax_post_process.hpp"
-#include "net_flow/ops/yolov5_seg_post_process.hpp"
-
-#include "common/runtime_statistics_internal.hpp"
 
 #include "net_flow/pipeline/vstream_internal.hpp"
 #include <cstdint>
@@ -52,2134 +42,711 @@ static std::map<std::string, AccumulatorPtr> get_pipeline_accumulators_by_type(
 static std::map<std::string, std::vector<AccumulatorPtr>> get_pipeline_queue_size_accumulators(
     const std::vector<std::shared_ptr<PipelineElement>> &pipeline);
 
-Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-    const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
-    const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
-    hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    PipelineDirection pipeline_direction, bool is_dma_able, std::shared_ptr<AsyncPipeline> async_pipeline)
+BaseVStream::BaseVStream(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
+                         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+                         AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event,
+                         hailo_status &output_status) :
+    m_vstream_info(vstream_info),
+    m_quant_infos(quant_infos),
+    m_vstream_params(vstream_params),
+    m_measure_pipeline_latency((vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_LATENCY) != 0),
+    m_entry_element(pipeline_entry),
+    m_pipeline(std::move(pipeline)),
+    m_is_activated(false),
+    m_is_aborted(false),
+    m_pipeline_status(std::move(pipeline_status)),
+    m_core_op_activated_event(std::move(core_op_activated_event)),
+    m_fps_accumulators(get_pipeline_accumulators_by_type(m_pipeline, AccumulatorType::FPS)),
+    m_latency_accumulators(get_pipeline_accumulators_by_type(m_pipeline, AccumulatorType::LATENCY)),
+    m_queue_size_accumulators(get_pipeline_queue_size_accumulators(m_pipeline)),
+    m_pipeline_latency_accumulator(pipeline_latency_accumulator)
 {
-    auto transform_context = InputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format,
-        dst_quant_infos);
-    CHECK_EXPECTED(transform_context, "Failed Creating InputTransformContext");
-
-    bool is_empty = false;
-    auto buffer_pool = BufferPool::create(transform_context.value()->get_dst_frame_size(), buffer_pool_size, shutdown_event, elem_flags,
-        vstream_flags, is_empty, is_dma_able);
-    CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name);
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-
-    auto pre_infer_elem_ptr = make_shared_nothrow<PreInferElement>(transform_context.release(),
-        buffer_pool.release(), name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction,
-        async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != pre_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    LOGGER__INFO("Created {}", pre_infer_elem_ptr->name());
-
-    return pre_infer_elem_ptr;
+    output_status = start_vstream();
 }
 
-Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const std::string &name,
-        const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction, bool is_dma_able, std::shared_ptr<AsyncPipeline> async_pipeline)
+BaseVStream::BaseVStream(BaseVStream &&other) noexcept :
+    m_vstream_info(std::move(other.m_vstream_info)),
+    m_vstream_params(std::move(other.m_vstream_params)),
+    m_measure_pipeline_latency(std::move(other.m_measure_pipeline_latency)),
+    m_entry_element(std::move(other.m_entry_element)),
+    m_pipeline(std::move(other.m_pipeline)),
+    m_is_activated(std::exchange(other.m_is_activated, false)),
+    m_is_aborted(std::exchange(other.m_is_aborted, false)),
+    m_pipeline_status(std::move(other.m_pipeline_status)),
+    m_core_op_activated_event(std::move(other.m_core_op_activated_event)),
+    m_fps_accumulators(std::move(other.m_fps_accumulators)),
+    m_latency_accumulators(std::move(other.m_latency_accumulators)),
+    m_queue_size_accumulators(std::move(other.m_queue_size_accumulators)),
+    m_pipeline_latency_accumulator(std::move(other.m_pipeline_latency_accumulator))
+{}
+
+BaseVStream& BaseVStream::operator=(BaseVStream &&other) noexcept
 {
-    return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name,
-        std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags,
-        vstream_params.vstream_stats_flags, shutdown_event, pipeline_status, pipeline_direction, is_dma_able, async_pipeline);
+    if (this != &other) {
+        // operator= is used only for vstream creation BEFORE activation. otherwise we should deactivate vstream here
+        assert(!m_is_activated);
+        m_vstream_info = std::move(other.m_vstream_info);
+        m_quant_infos = std::move(other.m_quant_infos);
+        m_vstream_params = std::move(other.m_vstream_params);
+        m_measure_pipeline_latency = std::move(other.m_measure_pipeline_latency);
+        m_entry_element = std::move(other.m_entry_element);
+        m_pipeline = std::move(other.m_pipeline);
+        m_is_activated = std::exchange(other.m_is_activated, false);
+        m_is_aborted = std::exchange(other.m_is_aborted, false);
+        m_pipeline_status = std::move(other.m_pipeline_status);
+        m_core_op_activated_event = std::move(other.m_core_op_activated_event);
+        m_fps_accumulators = std::move(other.m_fps_accumulators);
+        m_latency_accumulators = std::move(other.m_latency_accumulators);
+        m_queue_size_accumulators = std::move(other.m_queue_size_accumulators);
+        m_pipeline_latency_accumulator = std::move(other.m_pipeline_latency_accumulator);
+    }
+    return *this;
 }
 
-Expected<std::shared_ptr<PreInferElement>> PreInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-    const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_dma_able,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status BaseVStream::start_vstream()
 {
-    return PreInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, name,
-        build_params.timeout, build_params.buffer_pool_size_internal, build_params.elem_stats_flags, build_params.vstream_stats_flags,
-        build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_dma_able, async_pipeline);
-}
+    auto status = resume();
+    CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
+        "Failed to resume stream in {}", name());
 
-PreInferElement::PreInferElement(std::unique_ptr<InputTransformContext> &&transform_context, BufferPoolPtr buffer_pool,
-                                const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                                std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
-                                std::shared_ptr<AsyncPipeline> async_pipeline) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
-    m_transform_context(std::move(transform_context))
-{}
+    LOGGER__DEBUG("Activating {}...", name());
+    status = m_entry_element->activate();
+    CHECK_SUCCESS(status);
 
-Expected<PipelineBuffer> PreInferElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
-{
-    LOGGER__ERROR("PreInferElement does not support run_pull operation");
-    return make_unexpected(HAILO_INVALID_OPERATION);
+    m_is_activated = true;
+    return HAILO_SUCCESS;
 }
 
-PipelinePad &PreInferElement::next_pad()
+hailo_status BaseVStream::abort()
 {
-    // Note: The next elem to be run is downstream from this elem (i.e. buffers are pushed)
-    return *m_sources[0].next();
-}
+    auto status = m_entry_element->abort();
+    CHECK_SUCCESS(status);
+    m_is_aborted = true;
 
-std::string PreInferElement::description() const
-{
-    std::stringstream element_description;
-    element_description << "(" << this->name() << " | " << m_transform_context->description() << ")";
-    return element_description.str();
+    return HAILO_SUCCESS;
 }
 
-Expected<PipelineBuffer> PreInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+hailo_status BaseVStream::resume()
 {
-    if (PipelineBuffer::Type::FLUSH == input.get_type()) {
-        return std::move(input);
-    }
+    auto status = m_entry_element->clear_abort();
+    CHECK_SUCCESS(status);
+    m_is_aborted = false;
 
-    auto transformed_buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == transformed_buffer.status()) {
-        return make_unexpected(transformed_buffer.status());
-    }
-    
-    if (!transformed_buffer) {
-        input.get_exec_done_cb()(transformed_buffer.status());
+    if (m_is_activated) {
+        status = m_entry_element->activate();
+        CHECK_SUCCESS(status);
     }
-    CHECK_AS_EXPECTED(HAILO_TIMEOUT != transformed_buffer.status(), HAILO_TIMEOUT,
-        "{} (H2D) failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count());
-    CHECK_EXPECTED(transformed_buffer);
-
-    auto dst = transformed_buffer->as_view();
-    m_duration_collector.start_measurement();
-    const auto status = m_transform_context->transform(input.as_view(), dst);
-    m_duration_collector.complete_measurement();
-
-    auto exec_done_cb = input.get_exec_done_cb();
-    exec_done_cb(status);
-    transformed_buffer->set_action_status(status);
-
-    auto metadata = input.get_metadata();
-
-    CHECK_SUCCESS_AS_EXPECTED(status);
+    return HAILO_SUCCESS;
+}
 
-    // Note: The latency to be measured starts as the input buffer is sent to the InputVStream (via write())
-    transformed_buffer->set_metadata(std::move(metadata));
+hailo_status BaseVStream::stop_vstream()
+{
+    hailo_status status = HAILO_SUCCESS;
+    if (m_is_activated) {
+        m_is_activated = false;
+        status = m_entry_element->deactivate();
+        if (HAILO_SUCCESS != status) {
+            LOGGER__WARNING("Failed deactivate of vstream {} status {}", name(), status);
+        }
 
-    return transformed_buffer.release();
+        // If VStream was aborted, do not clear low-level stream abortion,
+        // otherwise flush would be called on low-level stream d-tor when there is no receiver.
+        auto should_clear_abort = (!m_is_aborted);
+        status = m_entry_element->post_deactivate(should_clear_abort);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__WARNING("Failed post deactivate of vstream {} status {}", name(), status);
+        }
+    }
+    return status;
 }
 
-Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> ConvertNmsToDetectionsElement::create(
-        const hailo_nms_info_t &nms_info, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
-        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, size_t buffer_pool_size,
-        PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status BaseVStream::stop_and_clear()
 {
-    // The actual data will be in the metadata
-    auto frame_size = 0;
-    auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-    CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name);
-    auto buffer_pool = buffer_pool_expected.release();
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
+    auto status = HAILO_SUCCESS;
+    if (nullptr != m_core_op_activated_event) {
+        status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
+        CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION,
+            "Trying to clear {} vstream before its network group is deactivated", name());
+    }
 
-    auto convert_nms_to_detections_elem_ptr = make_shared_nothrow<ConvertNmsToDetectionsElement>(std::move(nms_info),
-        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != convert_nms_to_detections_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    status = stop_vstream();
+    CHECK_SUCCESS(status);
 
-    LOGGER__INFO("Created {}", convert_nms_to_detections_elem_ptr->name());
+    status = m_entry_element->clear();
+    CHECK_SUCCESS(status, "Failed clearing vstream {}", name());
 
-    return convert_nms_to_detections_elem_ptr;
-}
+    const auto curr_pipeline_status = m_pipeline_status->load();
+    if (HAILO_SUCCESS != curr_pipeline_status) {
+        LOGGER__TRACE("Overwritting current pipeline status {}", curr_pipeline_status);
+        m_pipeline_status->store(HAILO_SUCCESS);
+    }
 
-Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> ConvertNmsToDetectionsElement::create(
-        const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params,
-        PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
-{
-    return ConvertNmsToDetectionsElement::create(nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status,
-        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.buffer_pool_size_edges,
-        pipeline_direction, is_last_copy_element, async_pipeline);
+    return status;
 }
 
-ConvertNmsToDetectionsElement::ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool,
-        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
-    m_nms_info(std::move(nms_info))
-{}
-
-hailo_status ConvertNmsToDetectionsElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+hailo_status BaseVStream::before_fork()
 {
-    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
-        "ConvertNmsToDetectionsElement {} does not support run_push operation", name());
-    return FilterElement::run_push(std::move(buffer), sink);
+    return HAILO_SUCCESS;
 }
 
-PipelinePad &ConvertNmsToDetectionsElement::next_pad()
+hailo_status BaseVStream::after_fork_in_parent()
 {
-    if (PipelineDirection::PUSH == m_pipeline_direction){
-        return *m_sources[0].next();
-    }
-    return *m_sinks[0].prev();
+    return HAILO_SUCCESS;
 }
 
-std::string ConvertNmsToDetectionsElement::description() const
+hailo_status BaseVStream::after_fork_in_child()
 {
-    std::stringstream element_description;
-    element_description << "(" << this->name() << ")";
-    return element_description.str();
+    return HAILO_SUCCESS;
 }
 
-Expected<PipelineBuffer> ConvertNmsToDetectionsElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+size_t BaseVStream::get_frame_size() const
 {
-    auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        return make_unexpected(buffer.status());
-    }
-
-    if (!buffer) {
-        input.get_exec_done_cb()(buffer.status());
-    }
-    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
-
-    buffer->set_metadata(input.get_metadata());
-
-    m_duration_collector.start_measurement();
-
-    auto detections_pair = net_flow::NmsPostProcessOp::transform__d2h_NMS_DETECTIONS(input.data(), m_nms_info);
-    auto detections_pipeline_data = make_shared_nothrow<IouPipelineData>
-        (std::move(detections_pair.first),std::move(detections_pair.second));
-    buffer->set_additional_data(detections_pipeline_data);
-
-    m_duration_collector.complete_measurement();
-
-    auto exec_done_cb = input.get_exec_done_cb();
-    exec_done_cb(HAILO_SUCCESS);
-
-    return buffer.release();
+    return HailoRTCommon::get_frame_size(m_vstream_info, m_vstream_params.user_buffer_format);
 }
 
-Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(const hailo_nms_info_t nms_info,
-        const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
-        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element,
-        std::shared_ptr<AsyncPipeline> async_pipeline)
+const hailo_vstream_info_t &BaseVStream::get_info() const
 {
-    auto frame_size = HailoRTCommon::get_nms_host_frame_size(nms_info, dst_format);
-    auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-    CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name);
-    auto buffer_pool = buffer_pool_expected.release();
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-
-    auto fill_nms_format_element = make_shared_nothrow<FillNmsFormatElement>(std::move(nms_config),
-        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != fill_nms_format_element, HAILO_OUT_OF_HOST_MEMORY);
-
-    LOGGER__INFO("Created {}", fill_nms_format_element->name());
-
-    return fill_nms_format_element;
+    return m_vstream_info;
 }
 
-Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(const hailo_nms_info_t nms_info,
-        const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
-        const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
-        std::shared_ptr<AsyncPipeline> async_pipeline)
+const std::vector<hailo_quant_info_t> &BaseVStream::get_quant_infos() const
 {
-    return FillNmsFormatElement::create(nms_info, dst_format, nms_config, name, build_params.elem_stats_flags,
-        build_params.pipeline_status, build_params.timeout, build_params.vstream_stats_flags,
-        build_params.shutdown_event, build_params.buffer_pool_size_edges, pipeline_direction, is_last_copy_element,
-        async_pipeline);
+    return m_quant_infos;
 }
 
-FillNmsFormatElement::FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name,
-                                   DurationCollector &&duration_collector,
-                                   std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction,
-                                   std::shared_ptr<AsyncPipeline> async_pipeline) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
-    m_nms_config(std::move(nms_config))
-{}
-
-hailo_status FillNmsFormatElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+const hailo_format_t &BaseVStream::get_user_buffer_format() const
 {
-    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
-        "FillNmsFormatElement {} does not support run_push operation", name());
-    return FilterElement::run_push(std::move(buffer), sink);
+    return m_vstream_params.user_buffer_format;
 }
 
-PipelinePad &FillNmsFormatElement::next_pad()
+std::string BaseVStream::name() const
 {
-    if (PipelineDirection::PUSH == m_pipeline_direction){
-        return *m_sources[0].next();
-    }
-    return *m_sinks[0].prev();
+    return std::string(m_vstream_info.name);
 }
 
-std::string FillNmsFormatElement::description() const
+std::string BaseVStream::network_name() const
 {
-    std::stringstream element_description;
-    element_description << "(" << this->name() << ")";
-    return element_description.str();
+    return std::string(m_vstream_info.network_name);
 }
 
-Expected<PipelineBuffer> FillNmsFormatElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+const std::map<std::string, AccumulatorPtr> &BaseVStream::get_fps_accumulators() const
 {
-    auto buffer_expected = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer_expected.status()) {
-        return make_unexpected(buffer_expected.status());
-    }
-
-    if (!buffer_expected) {
-        input.get_exec_done_cb()(buffer_expected.status());
-    }
-    CHECK_EXPECTED(buffer_expected, "{} (D2H) failed with status={}", name(), buffer_expected.status());
-    auto buffer = buffer_expected.release();
-
-    buffer.set_metadata(input.get_metadata());
-
-    m_duration_collector.start_measurement();
-
-    auto detections = input.get_metadata().get_additional_data<IouPipelineData>();
-    auto dst = buffer.as_view();
-    net_flow::NmsPostProcessOp::fill_nms_format_buffer(dst, detections->m_detections, detections->m_detections_classes_count,
-        m_nms_config);
-
-    m_duration_collector.complete_measurement();
-
-    auto exec_done_cb = input.get_exec_done_cb();
-    exec_done_cb(HAILO_SUCCESS);
-
-    return buffer;
+    return m_fps_accumulators;
 }
 
-Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape,
-    const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
-    const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
-    hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, size_t buffer_pool_size,
-    PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
+const std::map<std::string, AccumulatorPtr> &BaseVStream::get_latency_accumulators() const
 {
-    auto frame_size = (dst_format.order == HAILO_FORMAT_ORDER_HAILO_NMS) ? HailoRTCommon::get_nms_host_frame_size(nms_info, dst_format) : HailoRTCommon::get_frame_size(dst_image_shape, dst_format);
-    auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-    CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name);
-
-    auto transform_context = OutputTransformContext::create(src_image_shape, src_format, dst_image_shape, dst_format,
-        dst_quant_infos, nms_info);
-    CHECK_EXPECTED(transform_context, "Failed Creating OutputTransformContext");
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-
-    auto post_infer_elem_ptr = make_shared_nothrow<PostInferElement>(transform_context.release(), name,
-        duration_collector.release(), std::move(pipeline_status), buffer_pool_expected.release(), timeout, pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != post_infer_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    LOGGER__INFO("Created {}", post_infer_elem_ptr->name());
-
-    return post_infer_elem_ptr;
+    return m_latency_accumulators;
 }
 
-Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info,
-        const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
+const std::map<std::string, std::vector<AccumulatorPtr>> &BaseVStream::get_queue_size_accumulators() const
 {
-    return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format, dst_quant_infos, nms_info,
-        name, vstream_params.pipeline_elements_stats_flags, pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms),
-        vstream_params.vstream_stats_flags, shutdown_event, vstream_params.queue_size, pipeline_direction, is_last_copy_element, async_pipeline);
+    return m_queue_size_accumulators;
 }
 
-Expected<std::shared_ptr<PostInferElement>> PostInferElement::create(const hailo_3d_image_shape_t &src_image_shape,
-    const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
-    const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
-    const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+AccumulatorPtr BaseVStream::get_pipeline_latency_accumulator() const
 {
-    return PostInferElement::create(src_image_shape, src_format, dst_image_shape, dst_format,
-        dst_quant_infos, nms_info, name, build_params.elem_stats_flags, build_params.pipeline_status,
-        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, build_params.buffer_pool_size_edges,
-        pipeline_direction, is_last_copy_element, async_pipeline);
+    return m_pipeline_latency_accumulator;
 }
 
-PostInferElement::PostInferElement(std::unique_ptr<OutputTransformContext> &&transform_context, const std::string &name,
-                                   DurationCollector &&duration_collector,
-                                   std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-                                   PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
-    m_transform_context(std::move(transform_context))
-{}
 
-Expected<PipelineBuffer> PostInferElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
+const std::vector<std::shared_ptr<PipelineElement>> &BaseVStream::get_pipeline() const
 {
-    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
-        "PostInferElement {} does not support run_pull operation", name()
-    );
-    return FilterElement::run_pull(std::move(optional), source);
+    return m_pipeline;
 }
 
-hailo_status PostInferElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+Expected<InputVStream> InputVStream::create(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
+        const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
+        std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr core_op_activated_event,
+        AccumulatorPtr pipeline_latency_accumulator)
 {
-    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
-        "PostInferElement {} does not support run_push operation", name());
-    return FilterElement::run_push(std::move(buffer), sink);
-}
+    auto vstream_internal = InputVStreamInternal::create(vstream_info, quant_infos, vstream_params, pipeline_entry, pipeline_exit,
+        std::move(pipeline), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator);
+    CHECK_EXPECTED(vstream_internal);
 
-PipelinePad &PostInferElement::next_pad()
-{
-    if (PipelineDirection::PUSH == m_pipeline_direction){
-        return *m_sources[0].next();
-    }
-    return *m_sinks[0].prev();
+    InputVStream vstream(vstream_internal.release());
+    return vstream;
 }
 
-std::string PostInferElement::description() const
+hailo_status InputVStream::write(const MemoryView &buffer)
 {
-    std::stringstream element_description;
-    element_description << "(" << this->name() << " | " << m_transform_context->description() << ")";
-    return element_description.str();
+    return m_vstream->write(std::move(buffer));
 }
 
-Expected<PipelineBuffer> PostInferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+hailo_status InputVStream::write(const hailo_pix_buffer_t &buffer)
 {
-    auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        return make_unexpected(buffer.status());
-    }
+    CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer.memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!");
 
-    if (!buffer) {
-        input.get_exec_done_cb()(buffer.status());
+    // If only one plane is passed, address it as memview
+    if (1 == buffer.number_of_planes) {
+        return write(MemoryView(buffer.planes[0].user_ptr, buffer.planes[0].bytes_used));
     }
-    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
 
-    // Note: The latency to be measured starts as the buffer is read from the HW (it's 'input' in this case)
-    buffer->set_metadata(input.get_metadata());
-
-    auto dst = buffer->as_view();
-    m_duration_collector.start_measurement();
-    const auto status = m_transform_context->transform(input.as_view(), dst);
-    m_duration_collector.complete_measurement();
-
-    auto exec_done_cb = input.get_exec_done_cb();
-    exec_done_cb(status);
-    buffer->set_action_status(status);
+    // If model is multi planar, pass the pix buffer
+    if (m_vstream->is_multi_planar()){
+        return m_vstream->write(buffer);
+    }
 
-    CHECK_SUCCESS_AS_EXPECTED(status);
+    // Other cases - allocate a contiguous buffer to hold all plains
+    bool is_contiguous = true;
+    uint32_t planes_total_size = 0;
+    /* assuming contiguous memory. If not, this will be overriden by the coming loop */
+    void *data_ptr = buffer.planes[0].user_ptr;
 
-    return buffer.release();
-}
+    /* calculate total data size by summing the planes' sizes and check if the planes are contiguous */
+    for (uint32_t plane_index = 0; plane_index < buffer.number_of_planes; plane_index++){
+        auto &plane = buffer.planes[plane_index];
+        planes_total_size += plane.bytes_used;
 
-static hailo_nms_info_t fuse_nms_info(const std::vector<hailo_nms_info_t> &nms_infos)
-{
-    hailo_nms_info_t fused_info = nms_infos[0];
-    fused_info.is_defused = false;
-    fused_info.number_of_classes = 0;
-    for (const auto &nms_info : nms_infos) {
-        fused_info.number_of_classes += nms_info.number_of_classes;
-        assert(nms_infos[0].max_bboxes_per_class == nms_info.max_bboxes_per_class);
-        assert(nms_infos[0].bbox_size == nms_info.bbox_size);
-        assert(nms_infos[0].chunks_per_frame == nms_info.chunks_per_frame);
-        assert(nms_infos[0].burst_size == nms_info.burst_size);
-        assert(nms_infos[0].burst_type == nms_info.burst_type);
+        if (is_contiguous && (plane_index + 1 < buffer.number_of_planes)){
+            auto &next_plane = buffer.planes[plane_index+1];
+            if ((static_cast<uint8_t*>(plane.user_ptr) + plane.bytes_used) != next_plane.user_ptr){
+                is_contiguous = false;
+            }
+        }
     }
-    return fused_info;
-}
 
-Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> RemoveOverlappingBboxesElement::create(
-        const net_flow::NmsPostProcessConfig nms_config, const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags,
-        EventPtr shutdown_event, size_t buffer_pool_size, PipelineDirection pipeline_direction, bool is_last_copy_element,
-        std::shared_ptr<AsyncPipeline> async_pipeline)
-{
-    // The actual data will be in the metadata
-    auto frame_size = 0;
-    auto buffer_pool_expected = BufferPool::create(frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-    CHECK_EXPECTED(buffer_pool_expected, "Failed creating BufferPool for {}", name);
-    auto buffer_pool = buffer_pool_expected.release();
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
+    BufferPtr contiguous_buffer = nullptr;
+    if (! is_contiguous) {
+        /* copy to a contiguous buffer, and then pass it */
+        auto expected_buffer = Buffer::create_shared(planes_total_size);
+        CHECK_EXPECTED_AS_STATUS(expected_buffer);
+        contiguous_buffer = expected_buffer.release();
+        uint32_t copied_bytes = 0;
 
-    auto convert_nms_removed_overlapping_elem_ptr = make_shared_nothrow<RemoveOverlappingBboxesElement>(std::move(nms_config),
-        name, duration_collector.release(), std::move(pipeline_status), buffer_pool, timeout, pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != convert_nms_removed_overlapping_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+        for (uint32_t plane_index = 0; plane_index < buffer.number_of_planes; plane_index++){
+            auto &plane = buffer.planes[plane_index];
+            std::memcpy(contiguous_buffer->data() + copied_bytes, plane.user_ptr, plane.bytes_used);
+            copied_bytes += plane.bytes_used;
+        }
 
-    LOGGER__INFO("Created {}", convert_nms_removed_overlapping_elem_ptr->name());
+        data_ptr = contiguous_buffer->data();
+    }
 
-    return convert_nms_removed_overlapping_elem_ptr;
+    return m_vstream->write(std::move(MemoryView(data_ptr, planes_total_size)));
 }
 
-Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> RemoveOverlappingBboxesElement::create(const net_flow::NmsPostProcessConfig nms_config,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status InputVStream::flush()
 {
-    return RemoveOverlappingBboxesElement::create(nms_config, name,
-        build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout, build_params.vstream_stats_flags,
-        build_params.shutdown_event, build_params.buffer_pool_size_edges, pipeline_direction, is_last_copy_element, async_pipeline);
+    return m_vstream->flush();
 }
 
-RemoveOverlappingBboxesElement::RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name,
-                                   DurationCollector &&duration_collector,
-                                   std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                   BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-                                   PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
-    m_nms_config(std::move(nms_config))
-{}
-
-hailo_status RemoveOverlappingBboxesElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
-{
-    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
-        "RemoveOverlappingBboxesElement {} does not support run_push operation", name());
-    return FilterElement::run_push(std::move(buffer), sink);
-}
-
-PipelinePad &RemoveOverlappingBboxesElement::next_pad()
+hailo_status InputVStream::clear(std::vector<InputVStream> &vstreams)
 {
-    if (PipelineDirection::PUSH == m_pipeline_direction){
-        return *m_sources[0].next();
+    for (auto &vstream : vstreams) {
+        auto status = vstream.stop_and_clear();
+        CHECK_SUCCESS(status);
+    }
+    for (auto &vstream : vstreams) {
+        auto status = vstream.start_vstream();
+        CHECK_SUCCESS(status);
     }
-    return *m_sinks[0].prev();
-}
 
-std::string RemoveOverlappingBboxesElement::description() const
-{
-    std::stringstream element_description;
-    element_description << "(" << this->name() << ")";
-    return element_description.str();
+    return HAILO_SUCCESS;
 }
 
-Expected<PipelineBuffer> RemoveOverlappingBboxesElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+hailo_status InputVStream::clear(std::vector<std::reference_wrapper<InputVStream>> &vstreams)
 {
-    auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        return make_unexpected(buffer.status());
+    for (auto &vstream : vstreams) {
+        auto status = vstream.get().stop_and_clear();
+        CHECK_SUCCESS(status);
     }
-
-    if (!buffer) {
-        input.get_exec_done_cb()(buffer.status());
+    for (auto &vstream : vstreams) {
+        auto status = vstream.get().start_vstream();
+        CHECK_SUCCESS(status);
     }
-    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
-
-    buffer->set_metadata(input.get_metadata());
 
-    m_duration_collector.start_measurement();
-    auto detections_pipeline_data = input.get_metadata().get_additional_data<IouPipelineData>();
-
-    net_flow::NmsPostProcessOp::remove_overlapping_boxes(detections_pipeline_data->m_detections,
-        detections_pipeline_data->m_detections_classes_count, m_nms_config.nms_iou_th);
-    m_duration_collector.complete_measurement();
-
-    auto exec_done_cb = input.get_exec_done_cb();
-    exec_done_cb(HAILO_SUCCESS);
-
-    return buffer.release();
+    return HAILO_SUCCESS;
 }
 
-Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
-    const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size,
-    hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status InputVStream::abort()
 {
-    assert(nms_op->outputs_metadata().size() == 1);
-    auto vstream_info = nms_op->metadata()->get_output_vstream_info();
-    CHECK_EXPECTED(vstream_info);
-
-    auto buffer_size = HailoRTCommon::get_nms_host_frame_size(nms_op->metadata()->get_output_vstream_info()->nms_shape,
-        nms_op->outputs_metadata().begin()->second.format);
-
-    auto buffer_pool = BufferPool::create(buffer_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-    CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool");
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-
-    auto nms_elem_ptr = make_shared_nothrow<NmsPostProcessMuxElement>(nms_op, buffer_pool.release(),
-        name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    LOGGER__INFO("Created {}", nms_elem_ptr->name());
-    return nms_elem_ptr;
+    return m_vstream->abort();
 }
 
-Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status InputVStream::resume()
 {
-    return NmsPostProcessMuxElement::create(nms_op, name, build_params.timeout,
-        build_params.buffer_pool_size_edges, build_params.elem_stats_flags, build_params.vstream_stats_flags,
-        build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline);
+    return m_vstream->resume();
 }
 
-Expected<std::shared_ptr<NmsPostProcessMuxElement>> NmsPostProcessMuxElement::create(std::shared_ptr<net_flow::Op> nms_op,
-       const std::string &name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event,
-       std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element,
-       std::shared_ptr<AsyncPipeline> async_pipeline)
+size_t InputVStream::get_frame_size() const
 {
-    return NmsPostProcessMuxElement::create(nms_op, name, std::chrono::milliseconds(vstream_params.timeout_ms),
-        vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, shutdown_event,
-        pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline);
+    return m_vstream->get_frame_size();
 }
 
-NmsPostProcessMuxElement::NmsPostProcessMuxElement(std::shared_ptr<net_flow::Op> nms_op, BufferPoolPtr &&pool,
-                                                   const std::string &name, std::chrono::milliseconds timeout,
-                                                   DurationCollector &&duration_collector,
-                                                   std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                                                   PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    BaseMuxElement(nms_op->inputs_metadata().size(), name, timeout, std::move(duration_collector), std::move(pipeline_status),
-        std::move(pool), pipeline_direction, async_pipeline),
-    m_nms_op(nms_op)
-{}
-
-std::vector<AccumulatorPtr> NmsPostProcessMuxElement::get_queue_size_accumulators()
+const hailo_vstream_info_t &InputVStream::get_info() const
 {
-    if (nullptr == m_pool->get_queue_size_accumulator()) {
-        return std::vector<AccumulatorPtr>();
-    }
-    return {m_pool->get_queue_size_accumulator()};
+    return m_vstream->get_info();
 }
 
-Expected<PipelineBuffer> NmsPostProcessMuxElement::action(std::vector<PipelineBuffer> &&input_buffers, PipelineBuffer &&optional)
+const std::vector<hailo_quant_info_t> &InputVStream::get_quant_infos() const
 {
-    std::map<std::string, MemoryView> inputs;
-    std::map<std::string, MemoryView> outputs;
-    for (size_t i = 0; i < input_buffers.size(); ++i) {
-        inputs.insert({m_sinks_names[i], input_buffers[i].as_view()});
-    }
-    auto acquired_buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
-        return make_unexpected(acquired_buffer.status());
-    }
-
-    if (!acquired_buffer) {
-        for (auto &input : input_buffers) {
-            auto exec_done_cb = input.get_exec_done_cb();
-            exec_done_cb(acquired_buffer.status());
-        }
-    }
-    CHECK_EXPECTED(acquired_buffer);
-    outputs.insert({"", acquired_buffer->as_view()}); // TODO: fill with correct name
-    m_duration_collector.start_measurement();
-
-    auto post_process_result = m_nms_op->execute(inputs, outputs);
-    m_duration_collector.complete_measurement();
-
-    for (auto &input : input_buffers) {
-        auto exec_done_cb = input.get_exec_done_cb();
-        exec_done_cb(post_process_result);
-    }
-    acquired_buffer->set_action_status(post_process_result);
-
-    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
-    return acquired_buffer;
+    return m_vstream->get_quant_infos();
 }
 
-Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos,
-    const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size,
-    hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+const hailo_format_t &InputVStream::get_user_buffer_format() const
 {
-    const auto &fused_info = fuse_nms_info(nms_infos);
-    auto buffer_pool = BufferPool::create(HailoRTCommon::get_nms_hw_frame_size(fused_info),
-        buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-    CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool");
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-
-    auto nms_elem_ptr = make_shared_nothrow<NmsMuxElement>(nms_infos, fused_info, buffer_pool.release(),
-        name, timeout, duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != nms_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    LOGGER__INFO("Created {}", nms_elem_ptr->name());
-
-    return nms_elem_ptr;
+    return m_vstream->get_user_buffer_format();
 }
 
-Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos, const std::string &name,
-        const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
+std::string InputVStream::name() const
 {
-    return NmsMuxElement::create(nms_infos, name, std::chrono::milliseconds(vstream_params.timeout_ms), vstream_params.queue_size,
-        vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags, shutdown_event, pipeline_status, pipeline_direction,
-        is_last_copy_element, async_pipeline);
+    return m_vstream->name();
 }
 
-Expected<std::shared_ptr<NmsMuxElement>> NmsMuxElement::create(const std::vector<hailo_nms_info_t> &nms_infos,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+std::string InputVStream::network_name() const
 {
-    return NmsMuxElement::create(nms_infos, name, build_params.timeout, build_params.buffer_pool_size_edges, build_params.elem_stats_flags,
-        build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element,
-        async_pipeline);
+    return m_vstream->network_name();
 }
 
-NmsMuxElement::NmsMuxElement(const std::vector<hailo_nms_info_t> &nms_infos, const hailo_nms_info_t &fused_nms_info, BufferPoolPtr &&pool,
-                             const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
-                             std::shared_ptr<AsyncPipeline> async_pipeline) :
-    BaseMuxElement(nms_infos.size(), name, timeout, std::move(duration_collector), std::move(pipeline_status), std::move(pool), pipeline_direction, async_pipeline),
-    m_nms_infos(nms_infos),
-    m_fused_nms_info(fused_nms_info)
-{}
-
-const hailo_nms_info_t &NmsMuxElement::get_fused_nms_info() const
+const std::map<std::string, AccumulatorPtr> &InputVStream::get_fps_accumulators() const
 {
-    return m_fused_nms_info;
+    return m_vstream->get_fps_accumulators();
 }
 
-std::vector<AccumulatorPtr> NmsMuxElement::get_queue_size_accumulators()
+const std::map<std::string, AccumulatorPtr> &InputVStream::get_latency_accumulators() const
 {
-    if (nullptr == m_pool->get_queue_size_accumulator()) {
-        return std::vector<AccumulatorPtr>();
-    }
-    return {m_pool->get_queue_size_accumulator()};
+    return m_vstream->get_latency_accumulators();
 }
 
-Expected<PipelineBuffer> NmsMuxElement::action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional)
+const std::map<std::string, std::vector<AccumulatorPtr>> &InputVStream::get_queue_size_accumulators() const
 {
-    std::vector<MemoryView> input_views;
+    return m_vstream->get_queue_size_accumulators();
+}
 
-    input_views.reserve(inputs.size());
-    for (auto &input_buf : inputs) {
-        input_views.push_back(input_buf.as_view());
-    }
+AccumulatorPtr InputVStream::get_pipeline_latency_accumulator() const
+{
+    return m_vstream->get_pipeline_latency_accumulator();
+}
 
-    auto acquired_buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
-        return make_unexpected(acquired_buffer.status());
-    }
+const std::vector<std::shared_ptr<PipelineElement>> &InputVStream::get_pipeline() const
+{
+    return m_vstream->get_pipeline();
+}
 
-    if (!acquired_buffer) {
-        for (auto &input : inputs) {
-            auto exec_done_cb = input.get_exec_done_cb();
-            exec_done_cb(acquired_buffer.status());
-        }
-    }    
-    CHECK_AS_EXPECTED(HAILO_TIMEOUT != acquired_buffer.status(), HAILO_TIMEOUT,
-        "{} failed with status={} (timeout={}ms)", name(), HAILO_TIMEOUT, m_timeout.count());
-    CHECK_EXPECTED(acquired_buffer);
-
-    m_duration_collector.start_measurement();
-    const auto status = fuse_buffers(input_views, m_nms_infos, acquired_buffer.value().as_view());
-    m_duration_collector.complete_measurement();
-
-    for (auto &input : inputs) {
-        auto exec_done_cb = input.get_exec_done_cb();
-        exec_done_cb(status);
-    }
-    acquired_buffer->set_action_status(status);
+hailo_status InputVStream::start_vstream()
+{
+    return m_vstream->start_vstream();
+}
 
-    CHECK_SUCCESS_AS_EXPECTED(status);
+hailo_status InputVStream::stop_vstream()
+{
+    return m_vstream->stop_vstream();
+}
 
-    return acquired_buffer.release();
+hailo_status InputVStream::stop_and_clear()
+{
+    return m_vstream->stop_and_clear();
 }
 
-Expected<std::shared_ptr<TransformDemuxElement>> TransformDemuxElement::create(std::shared_ptr<OutputDemuxer> demuxer,
-    const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
-    hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
+std::string InputVStream::get_pipeline_description() const
 {
-    std::vector<BufferPoolPtr> pools;
-    pools.reserve(demuxer->get_edges_stream_info().size());
-    for (const auto& mux_edge : demuxer->get_edges_stream_info()) {
-        auto buffer_pool = BufferPool::create(mux_edge.hw_frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-        CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool");
-        pools.push_back(buffer_pool.release());
-    }
+    return m_vstream->get_pipeline_description();
+}
 
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
+bool InputVStream::is_aborted()
+{
+    return m_vstream->is_aborted();
+}
 
+bool InputVStream::is_multi_planar()
+{
+    return m_vstream->is_multi_planar();
+}
 
-    auto demux_elem_ptr = make_shared_nothrow<TransformDemuxElement>(demuxer, std::move(pools), name, timeout,
-        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != demux_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    return demux_elem_ptr;
+hailo_status InputVStream::before_fork()
+{
+    return m_vstream->before_fork();
 }
 
-Expected<std::shared_ptr<TransformDemuxElement>> TransformDemuxElement::create(std::shared_ptr<OutputDemuxer> demuxer,
-    const std::string &name, const ElementBuildParams &build_params,
-    PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status InputVStream::after_fork_in_parent()
 {
-    return TransformDemuxElement::create(demuxer, name, build_params.timeout, build_params.buffer_pool_size_edges, build_params.elem_stats_flags,
-        build_params.vstream_stats_flags, build_params.shutdown_event, build_params.pipeline_status, pipeline_direction, is_last_copy_element, async_pipeline);
+    return m_vstream->after_fork_in_parent();
 }
 
-TransformDemuxElement::TransformDemuxElement(std::shared_ptr<OutputDemuxer> demuxer, std::vector<BufferPoolPtr> &&pools,
-    const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
-    std::shared_ptr<AsyncPipeline> async_pipeline) :
-    BaseDemuxElement(demuxer->get_edges_stream_info().size(), name, timeout, std::move(duration_collector),
-        std::move(pipeline_status), std::move(pools), pipeline_direction, async_pipeline),
-    m_demuxer(demuxer) 
-{}
-
-std::vector<AccumulatorPtr> TransformDemuxElement::get_queue_size_accumulators()
+hailo_status InputVStream::after_fork_in_child()
 {
-    std::vector<AccumulatorPtr> result;
-    for (const auto &pool : m_pools) {
-        if (nullptr != pool->get_queue_size_accumulator()) {
-            result.emplace_back(pool->get_queue_size_accumulator());
-        }
-    }
-    return result;
+    return m_vstream->after_fork_in_child();
 }
 
-Expected<std::vector<PipelineBuffer>> TransformDemuxElement::action(PipelineBuffer &&input)
+InputVStream::InputVStream(std::shared_ptr<InputVStreamInternal> vstream) : m_vstream(std::move(vstream)) {}
+
+Expected<OutputVStream> OutputVStream::create(
+        const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
+        std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+        EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator)
 {
-    std::vector<PipelineBuffer> outputs;
-    std::vector<MemoryView> raw_buffers;
+    auto vstream_internal = OutputVStreamInternal::create(vstream_info, quant_infos, vstream_params, pipeline_entry,
+        std::move(pipeline), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator);
+    CHECK_EXPECTED(vstream_internal);
 
-    auto mux_edges = m_demuxer->get_edges_stream_info();
-    outputs.reserve(mux_edges.size());
-    raw_buffers.reserve(mux_edges.size());
+    OutputVStream vstream(vstream_internal.release());
+    return vstream;
+}
 
-    for (uint32_t i = 0; i < mux_edges.size(); i++) {
-        auto acquired_buffer = m_pools[i]->acquire_buffer(m_timeout);
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == acquired_buffer.status()) {
-            return make_unexpected(acquired_buffer.status());
-        }
+hailo_status OutputVStream::read(MemoryView buffer)
+{
+    return m_vstream->read(std::move(buffer));
+}
 
-        if (!acquired_buffer) {
-                input.get_exec_done_cb()(acquired_buffer.status());
-        } 
-        CHECK_EXPECTED(acquired_buffer, "Failed to acquire buffer");
-        outputs.emplace_back(acquired_buffer.release());
-        raw_buffers.push_back(outputs.back().as_view());
+hailo_status OutputVStream::clear(std::vector<OutputVStream> &vstreams)
+{
+    for (auto &vstream : vstreams) {
+        auto status = vstream.stop_and_clear();
+        CHECK_SUCCESS(status);
     }
-
-    m_duration_collector.start_measurement();
-    const auto status = m_demuxer->transform_demux(input.as_view(), raw_buffers);
-    m_duration_collector.complete_measurement();
-
-    auto exec_done_cb = input.get_exec_done_cb();
-    exec_done_cb(status);
-    for (auto &output : outputs) {
-        output.set_action_status(status);
+    for (auto &vstream : vstreams) {
+        auto status = vstream.start_vstream();
+        CHECK_SUCCESS(status);
     }
 
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    return outputs;
+    return HAILO_SUCCESS;
 }
 
-PixBufferElement::PixBufferElement(const std::string &name, std::chrono::milliseconds timeout,
-    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-    hailo_format_order_t order, std::shared_ptr<AsyncPipeline> async_pipeline) :
-        BaseDemuxElement(((order == HAILO_FORMAT_ORDER_I420) ? NUMBER_OF_PLANES_I420 : NUMBER_OF_PLANES_NV12_NV21),
-            name, timeout, std::move(duration_collector), std::move(pipeline_status),
-            {}, PipelineDirection::PUSH, async_pipeline),
-        m_order(order)
-{}
-
-Expected<bool> PixBufferElement::can_push_buffer_upstream(const std::string &pad_name)
+hailo_status OutputVStream::abort()
 {
-    return m_sinks[0].prev()->element().can_push_buffer_upstream(pad_name);
+    return m_vstream->abort();
 }
 
-Expected<std::shared_ptr<PixBufferElement>> PixBufferElement::create(const std::string &name,
-    std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, hailo_format_order_t order,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status OutputVStream::resume()
 {
-    auto pix_buffer_splitter_elem_ptr = make_shared_nothrow<PixBufferElement>(name, timeout,
-        std::move(duration_collector), std::move(pipeline_status), order, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != pix_buffer_splitter_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-    return pix_buffer_splitter_elem_ptr;
+    return m_vstream->resume();
 }
 
-Expected<std::vector<PipelineBuffer>> PixBufferElement::action(PipelineBuffer &&input)
+hailo_status OutputVStream::clear(std::vector<std::reference_wrapper<OutputVStream>> &vstreams)
 {
-    // splits the planes into buffers
-    m_duration_collector.start_measurement();
-    std::vector<PipelineBuffer> outputs;
-
-    auto input_pix_buffer_expected = input.as_hailo_pix_buffer(m_order);
-
-    if (!input_pix_buffer_expected) {
-        input.get_exec_done_cb()(input_pix_buffer_expected.status());
+    for (auto &vstream : vstreams) {
+        auto status = vstream.get().stop_and_clear();
+        CHECK_SUCCESS(status);
     }
-    CHECK_EXPECTED(input_pix_buffer_expected);
-    auto input_pix_buffer = input_pix_buffer_expected.release();
-
-    if (PipelineBuffer::Type::FLUSH == input.get_type()) {
-        for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) {
-            outputs.emplace_back(PipelineBuffer(PipelineBuffer::Type::FLUSH));
-        }
-    } else {
-        auto shared_counter = make_shared_nothrow<std::atomic_uint32_t>(input_pix_buffer.number_of_planes);
-        if (!shared_counter) {
-            input.get_exec_done_cb()(HAILO_OUT_OF_HOST_MEMORY);
-        }
-        CHECK_NOT_NULL_AS_EXPECTED(shared_counter, HAILO_OUT_OF_HOST_MEMORY);
-
-        for (uint32_t i = 0; i < input_pix_buffer.number_of_planes; i++) {
-            outputs.emplace_back(MemoryView(input_pix_buffer.planes[i].user_ptr, input_pix_buffer.planes[i].bytes_used),
-                [shared_counter, input_cb = input.get_exec_done_cb()](hailo_status status)
-                {
-                    if (--*shared_counter == 0) {
-                        input_cb(status);
-                    }
-                });
-        }
+    for (auto &vstream : vstreams) {
+        auto status = vstream.get().start_vstream();
+        CHECK_SUCCESS(status);
     }
 
-    m_duration_collector.complete_measurement();
-    return outputs;
+    return HAILO_SUCCESS;
 }
 
-Expected<std::shared_ptr<ArgmaxPostProcessElement>> ArgmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> argmax_op,
-    const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    size_t buffer_pool_size, std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags,
-    EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
+size_t OutputVStream::get_frame_size() const
 {
-    auto out_metadata = argmax_op->outputs_metadata().begin()->second;
-    auto buffer_size = HailoRTCommon::get_frame_size(out_metadata.shape, out_metadata.format);
-    auto buffer_pool = BufferPool::create(buffer_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-    CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name);
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-    auto argmax_elem_ptr = make_shared_nothrow<ArgmaxPostProcessElement>(argmax_op,
-        name, duration_collector.release(), std::move(pipeline_status), timeout, buffer_pool.release(), pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != argmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-    LOGGER__INFO("Created {}", argmax_elem_ptr->name());
-    return argmax_elem_ptr;
+    return m_vstream->get_frame_size();
 }
 
-Expected<std::shared_ptr<ArgmaxPostProcessElement>> ArgmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> argmax_op,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+const hailo_vstream_info_t &OutputVStream::get_info() const
 {
-    return ArgmaxPostProcessElement::create(argmax_op, name,
-        build_params.elem_stats_flags, build_params.pipeline_status, build_params.buffer_pool_size_edges, build_params.timeout,
-        build_params.vstream_stats_flags, build_params.shutdown_event, pipeline_direction, is_last_copy_element, async_pipeline);
+    return m_vstream->get_info();
 }
 
-ArgmaxPostProcessElement::ArgmaxPostProcessElement(std::shared_ptr<net_flow::Op> argmax_op, const std::string &name,
-    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-    std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction,
-    std::shared_ptr<AsyncPipeline> async_pipeline) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
-    m_argmax_op(argmax_op)
-{}
-
-Expected<PipelineBuffer> ArgmaxPostProcessElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
+const std::vector<hailo_quant_info_t> &OutputVStream::get_quant_infos() const
 {
-    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
-        "ArgmaxPostProcessElement {} does not support run_pull operation", name());
-    return FilterElement::run_pull(std::move(optional), source);
+    return m_vstream->get_quant_infos();
 }
 
-hailo_status ArgmaxPostProcessElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+const hailo_format_t &OutputVStream::get_user_buffer_format() const
 {
-    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
-        "ArgmaxPostProcessElement {} does not support run_push operation", name());
-    return FilterElement::run_push(std::move(buffer), sink);
+    return m_vstream->get_user_buffer_format();
 }
 
-PipelinePad &ArgmaxPostProcessElement::next_pad()
+std::string OutputVStream::name() const
 {
-    if (PipelineDirection::PUSH == m_pipeline_direction){
-        return *m_sources[0].next();
-    }
-    return *m_sinks[0].prev();
+    return m_vstream->name();
 }
 
-std::string ArgmaxPostProcessElement::description() const
+std::string OutputVStream::network_name() const
 {
-    std::stringstream element_description;
-    element_description << "(" << this->name() << " | " << m_argmax_op->metadata()->get_op_description() << ")";
-    return element_description.str();
+    return m_vstream->network_name();
 }
 
-Expected<PipelineBuffer> ArgmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+const std::map<std::string, AccumulatorPtr> &OutputVStream::get_fps_accumulators() const
 {
-    auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        return make_unexpected(buffer.status());
-    }
-
-    if (!buffer) {
-        input.get_exec_done_cb()(buffer.status());
-    }
-    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
+    return m_vstream->get_fps_accumulators();
+}
 
-    std::map<std::string, MemoryView> inputs;
-    std::map<std::string, MemoryView> outputs;
-    auto &input_name = m_argmax_op->inputs_metadata().begin()->first;
-    auto &output_name = m_argmax_op->outputs_metadata().begin()->first;
-    inputs.insert({input_name, input.as_view()});
-    outputs.insert({output_name, buffer->as_view()});
-    m_duration_collector.start_measurement();
-    auto post_process_result = m_argmax_op->execute(inputs, outputs);
-    m_duration_collector.complete_measurement();
+const std::map<std::string, AccumulatorPtr> &OutputVStream::get_latency_accumulators() const
+{
+    return m_vstream->get_latency_accumulators();
+}
 
-    auto exec_done_cb = input.get_exec_done_cb();
-    exec_done_cb(post_process_result);
-    buffer->set_action_status(post_process_result);
+const std::map<std::string, std::vector<AccumulatorPtr>> &OutputVStream::get_queue_size_accumulators() const
+{
+    return m_vstream->get_queue_size_accumulators();
+}
 
-    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+AccumulatorPtr OutputVStream::get_pipeline_latency_accumulator() const
+{
+    return m_vstream->get_pipeline_latency_accumulator();
+}
 
-    return buffer.release();
+const std::vector<std::shared_ptr<PipelineElement>> &OutputVStream::get_pipeline() const
+{
+    return m_vstream->get_pipeline();
 }
 
-Expected<std::shared_ptr<SoftmaxPostProcessElement>> SoftmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> softmax_op,
-    const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout,
-    hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status OutputVStream::start_vstream()
 {
-    auto out_metadata = softmax_op->outputs_metadata().begin()->second;
-    auto buffer_size = HailoRTCommon::get_frame_size(out_metadata.shape, out_metadata.format);
-    auto buffer_pool = BufferPool::create(buffer_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags, is_last_copy_element);
-    CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name);
+    return m_vstream->start_vstream();
+}
 
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-    auto softmax_elem_ptr = make_shared_nothrow<SoftmaxPostProcessElement>(softmax_op,
-        name, duration_collector.release(), std::move(pipeline_status), timeout, buffer_pool.release(), pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != softmax_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-    LOGGER__INFO("Created {}", softmax_elem_ptr->name());
-    return softmax_elem_ptr;
+hailo_status OutputVStream::stop_vstream()
+{
+    return m_vstream->stop_vstream();
 }
 
-Expected<std::shared_ptr<SoftmaxPostProcessElement>> SoftmaxPostProcessElement::create(std::shared_ptr<net_flow::Op> softmax_op,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction, bool is_last_copy_element,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status OutputVStream::stop_and_clear()
 {
-    return SoftmaxPostProcessElement::create(softmax_op, name, build_params.elem_stats_flags, build_params.pipeline_status, build_params.buffer_pool_size_edges,
-        build_params.timeout, build_params.vstream_stats_flags, build_params.shutdown_event, pipeline_direction, is_last_copy_element, async_pipeline);
+    return m_vstream->stop_and_clear();
 }
 
-SoftmaxPostProcessElement::SoftmaxPostProcessElement(std::shared_ptr<net_flow::Op> softmax_op, const std::string &name,
-    DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-    std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, buffer_pool, timeout, async_pipeline),
-    m_softmax_op(softmax_op)
-{}
+std::string OutputVStream::get_pipeline_description() const
+{
+    return m_vstream->get_pipeline_description();
+}
 
-Expected<PipelineBuffer> SoftmaxPostProcessElement::run_pull(PipelineBuffer &&optional, const PipelinePad &source)
+bool OutputVStream::is_aborted()
 {
-    CHECK_AS_EXPECTED(m_pipeline_direction == PipelineDirection::PULL, HAILO_INVALID_OPERATION,
-        "SoftmaxPostProcessElement {} does not support run_pull operation", name());
-    return FilterElement::run_pull(std::move(optional), source);
+    return m_vstream->is_aborted();
 }
 
-hailo_status SoftmaxPostProcessElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
+hailo_status OutputVStream::before_fork()
 {
-    CHECK(PipelineDirection::PUSH == m_pipeline_direction, HAILO_INVALID_OPERATION,
-        "SoftmaxPostProcessElement {} does not support run_push operation", name());
-    return FilterElement::run_push(std::move(buffer), sink);
+    return m_vstream->before_fork();
 }
 
-PipelinePad &SoftmaxPostProcessElement::next_pad()
+hailo_status OutputVStream::after_fork_in_parent()
 {
-    if (PipelineDirection::PUSH == m_pipeline_direction){
-        return *m_sources[0].next();
-    }
-    return *m_sinks[0].prev();
+    return m_vstream->after_fork_in_parent();
 }
 
-std::string SoftmaxPostProcessElement::description() const
+hailo_status OutputVStream::after_fork_in_child()
 {
-    std::stringstream element_description;
-    element_description << "(" << this->name() << " | " << m_softmax_op->metadata()->get_op_description() << ")";
-    return element_description.str();
+    return m_vstream->after_fork_in_child();
 }
 
-Expected<PipelineBuffer> SoftmaxPostProcessElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+hailo_status OutputVStream::set_nms_score_threshold(float32_t threshold)
 {
-    auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        return make_unexpected(buffer.status());
-    }
+    return m_vstream->set_nms_score_threshold(threshold);
+}
 
-    if (!buffer) {
-        input.get_exec_done_cb()(buffer.status());
-    }
-    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
+hailo_status OutputVStream::set_nms_iou_threshold(float32_t threshold)
+{
+    return m_vstream->set_nms_iou_threshold(threshold);
+}
 
-    std::map<std::string, MemoryView> inputs;
-    std::map<std::string, MemoryView> outputs;
-    auto &input_name = m_softmax_op->inputs_metadata().begin()->first;
-    auto &output_name = m_softmax_op->outputs_metadata().begin()->first;
-    inputs.insert({input_name, input.as_view()});
-    outputs.insert({output_name, buffer->as_view()});
-    m_duration_collector.start_measurement();
-    auto post_process_result = m_softmax_op->execute(inputs, outputs);
-    m_duration_collector.complete_measurement();
+hailo_status OutputVStream::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
+{
+    return m_vstream->set_nms_max_proposals_per_class(max_proposals_per_class);
+}
 
-    auto exec_done_cb = input.get_exec_done_cb();
-    exec_done_cb(post_process_result);
-    buffer->set_action_status(post_process_result);
+hailo_status OutputVStream::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size)
+{
+    return m_vstream->set_nms_max_accumulated_mask_size(max_accumulated_mask_size);
+}
 
-    CHECK_SUCCESS_AS_EXPECTED(post_process_result);
+OutputVStream::OutputVStream(std::shared_ptr<OutputVStreamInternal> vstream) : m_vstream(std::move(vstream)) {}
 
-    return buffer.release();
-}
+std::map<std::string, AccumulatorPtr> get_pipeline_accumulators_by_type(
+    const std::vector<std::shared_ptr<PipelineElement>> &pipeline, AccumulatorType accumulator_type)
+{
+    std::map<std::string, AccumulatorPtr> result;
+    for (const auto &elem : pipeline) {
+        if (nullptr == elem) {
+            continue;
+        }
 
-BaseVStream::BaseVStream(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
-                         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                         EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event,
-                         hailo_status &output_status) :
-    m_vstream_info(vstream_info),
-    m_quant_infos(quant_infos),
-    m_vstream_params(vstream_params),
-    m_measure_pipeline_latency((vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_LATENCY) != 0),
-    m_entry_element(pipeline_entry),
-    m_pipeline(std::move(pipeline)),
-    m_is_activated(false),
-    m_is_aborted(false),
-    m_pipeline_status(std::move(pipeline_status)),
-    m_shutdown_event(shutdown_event),
-    m_core_op_activated_event(std::move(core_op_activated_event)),
-    m_fps_accumulators(get_pipeline_accumulators_by_type(m_pipeline, AccumulatorType::FPS)),
-    m_latency_accumulators(get_pipeline_accumulators_by_type(m_pipeline, AccumulatorType::LATENCY)),
-    m_queue_size_accumulators(get_pipeline_queue_size_accumulators(m_pipeline)),
-    m_pipeline_latency_accumulator(pipeline_latency_accumulator)
-{
-    output_status = start_vstream();
-}
-
-BaseVStream::BaseVStream(BaseVStream &&other) noexcept :
-    m_vstream_info(std::move(other.m_vstream_info)),
-    m_vstream_params(std::move(other.m_vstream_params)),
-    m_measure_pipeline_latency(std::move(other.m_measure_pipeline_latency)),
-    m_entry_element(std::move(other.m_entry_element)),
-    m_pipeline(std::move(other.m_pipeline)),
-    m_is_activated(std::exchange(other.m_is_activated, false)),
-    m_is_aborted(std::exchange(other.m_is_aborted, false)),
-    m_pipeline_status(std::move(other.m_pipeline_status)),
-    m_shutdown_event(std::move(other.m_shutdown_event)),
-    m_core_op_activated_event(std::move(other.m_core_op_activated_event)),
-    m_fps_accumulators(std::move(other.m_fps_accumulators)),
-    m_latency_accumulators(std::move(other.m_latency_accumulators)),
-    m_queue_size_accumulators(std::move(other.m_queue_size_accumulators)),
-    m_pipeline_latency_accumulator(std::move(other.m_pipeline_latency_accumulator))
-{}
-
-BaseVStream& BaseVStream::operator=(BaseVStream &&other) noexcept
-{
-    if (this != &other) {
-        // operator= is used only for vstream creation BEFORE activation. otherwise we should deactivate vstream here
-        assert(!m_is_activated);
-        m_vstream_info = std::move(other.m_vstream_info);
-        m_quant_infos = std::move(other.m_quant_infos);
-        m_vstream_params = std::move(other.m_vstream_params);
-        m_measure_pipeline_latency = std::move(other.m_measure_pipeline_latency);
-        m_entry_element = std::move(other.m_entry_element);
-        m_pipeline = std::move(other.m_pipeline);
-        m_is_activated = std::exchange(other.m_is_activated, false);
-        m_is_aborted = std::exchange(other.m_is_aborted, false);
-        m_pipeline_status = std::move(other.m_pipeline_status);
-        m_shutdown_event = std::move(other.m_shutdown_event);
-        m_core_op_activated_event = std::move(other.m_core_op_activated_event);
-        m_fps_accumulators = std::move(other.m_fps_accumulators);
-        m_latency_accumulators = std::move(other.m_latency_accumulators);
-        m_queue_size_accumulators = std::move(other.m_queue_size_accumulators);
-        m_pipeline_latency_accumulator = std::move(other.m_pipeline_latency_accumulator);
-    }
-    return *this;
-}
-
-hailo_status BaseVStream::start_vstream()
-{
-    auto status = m_shutdown_event->reset();
-    CHECK_SUCCESS(status);
-
-    status = resume();
-    CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
-        "Failed to resume stream in {}", name());
-
-    LOGGER__DEBUG("Activating {}...", name());
-    status = m_entry_element->activate();
-    CHECK_SUCCESS(status);
-
-    m_is_activated = true;
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseVStream::abort()
-{
-    auto status = m_entry_element->abort();
-    CHECK_SUCCESS(status);
-    m_is_aborted = true;
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseVStream::resume()
-{
-    auto status = m_entry_element->clear_abort();
-    CHECK_SUCCESS(status);
-    m_is_aborted = false;
-
-    if (m_is_activated) {
-        status = m_entry_element->activate();
-        CHECK_SUCCESS(status);
-    }
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseVStream::stop_vstream()
-{
-    hailo_status status = HAILO_SUCCESS;
-    if (m_is_activated) {
-        m_is_activated = false;
-        status = m_entry_element->deactivate();
-        if (HAILO_SUCCESS != status) {
-            LOGGER__WARNING("Failed deactivate of vstream {} status {}", name(), status);
-        }
-
-        // If VStream was aborted, do not clear low-level stream abortion,
-        // otherwise flush would be called on low-level stream d-tor when there is no receiver.
-        auto should_clear_abort = (!m_is_aborted);
-        status = m_entry_element->post_deactivate(should_clear_abort);
-        if (HAILO_SUCCESS != status) {
-            LOGGER__WARNING("Failed post deactivate of vstream {} status {}", name(), status);
-        }
-    }
-    return status;
-}
-
-hailo_status BaseVStream::stop_and_clear()
-{
-    auto status = HAILO_SUCCESS;
-    if (nullptr != m_core_op_activated_event) {
-        status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
-        CHECK(HAILO_TIMEOUT == status, HAILO_INVALID_OPERATION,
-            "Trying to clear {} vstream before its network group is deactivated", name());
-    }
-
-    status = stop_vstream();
-    CHECK_SUCCESS(status);
-
-    status = m_entry_element->clear();
-    CHECK_SUCCESS(status, "Failed clearing vstream {}", name());
-
-    const auto curr_pipeline_status = m_pipeline_status->load();
-    if (HAILO_SUCCESS != curr_pipeline_status) {
-        LOGGER__TRACE("Overwritting current pipeline status {}", curr_pipeline_status);
-        m_pipeline_status->store(HAILO_SUCCESS);
-    }
-
-    return status;
-}
-
-hailo_status BaseVStream::before_fork()
-{
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseVStream::after_fork_in_parent()
-{
-    return HAILO_SUCCESS;
-}
-
-hailo_status BaseVStream::after_fork_in_child()
-{
-    return HAILO_SUCCESS;
-}
-
-size_t BaseVStream::get_frame_size() const
-{
-    return HailoRTCommon::get_frame_size(m_vstream_info, m_vstream_params.user_buffer_format);
-}
-
-const hailo_vstream_info_t &BaseVStream::get_info() const
-{
-    return m_vstream_info;
-}
-
-const std::vector<hailo_quant_info_t> &BaseVStream::get_quant_infos() const
-{
-    return m_quant_infos;
-}
-
-const hailo_format_t &BaseVStream::get_user_buffer_format() const
-{
-    return m_vstream_params.user_buffer_format;
-}
-
-std::string BaseVStream::name() const
-{
-    return std::string(m_vstream_info.name);
-}
-
-std::string BaseVStream::network_name() const
-{
-    return std::string(m_vstream_info.network_name);
-}
-
-const std::map<std::string, AccumulatorPtr> &BaseVStream::get_fps_accumulators() const
-{
-    return m_fps_accumulators;
-}
-
-const std::map<std::string, AccumulatorPtr> &BaseVStream::get_latency_accumulators() const
-{
-    return m_latency_accumulators;
-}
-
-const std::map<std::string, std::vector<AccumulatorPtr>> &BaseVStream::get_queue_size_accumulators() const
-{
-    return m_queue_size_accumulators;
-}
-
-AccumulatorPtr BaseVStream::get_pipeline_latency_accumulator() const
-{
-    return m_pipeline_latency_accumulator;
-}
-
-
-const std::vector<std::shared_ptr<PipelineElement>> &BaseVStream::get_pipeline() const
-{
-    return m_pipeline;
-}
-
-Expected<InputVStream> InputVStream::create(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
-        const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
-        std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event,
-        AccumulatorPtr pipeline_latency_accumulator)
-{
-    auto vstream_internal = InputVStreamInternal::create(vstream_info, quant_infos, vstream_params, pipeline_entry, pipeline_exit,
-        std::move(pipeline), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator);
-    CHECK_EXPECTED(vstream_internal);
-
-    InputVStream vstream(vstream_internal.release());
-    return vstream;
-}
-
-hailo_status InputVStream::write(const MemoryView &buffer)
-{
-    return m_vstream->write(std::move(buffer));
-}
-
-hailo_status InputVStream::write(const hailo_pix_buffer_t &buffer)
-{
-    // If only one plane is passed, address it as memview
-    if (1 == buffer.number_of_planes) {
-        return write(MemoryView(buffer.planes[0].user_ptr, buffer.planes[0].bytes_used));
-    }
-
-    // If model is multi planar, pass the pix buffer
-    if (m_vstream->is_multi_planar()){
-        return m_vstream->write(buffer);
-    }
-
-    // Other cases - allocate a contiguous buffer to hold all plains
-    bool is_contiguous = true;
-    uint32_t planes_total_size = 0;
-    /* assuming contiguous memory. If not, this will be overriden by the coming loop */
-    void *data_ptr = buffer.planes[0].user_ptr;
-
-    /* calculate total data size by summing the planes' sizes and check if the planes are contiguous */
-    for (uint32_t plane_index = 0; plane_index < buffer.number_of_planes; plane_index++){
-        auto &plane = buffer.planes[plane_index];
-        planes_total_size += plane.bytes_used;
-
-        if (is_contiguous && (plane_index + 1 < buffer.number_of_planes)){
-            auto &next_plane = buffer.planes[plane_index+1];
-            if ((static_cast<uint8_t*>(plane.user_ptr) + plane.bytes_used) != next_plane.user_ptr){
-                is_contiguous = false;
-            }
-        }
-    }
-
-    BufferPtr contiguous_buffer = nullptr;
-    if (! is_contiguous) {
-        /* copy to a contiguous buffer, and then pass it */
-        auto expected_buffer = Buffer::create_shared(planes_total_size);
-        CHECK_EXPECTED_AS_STATUS(expected_buffer);
-        contiguous_buffer = expected_buffer.release();
-        uint32_t copied_bytes = 0;
-
-        for (uint32_t plane_index = 0; plane_index < buffer.number_of_planes; plane_index++){
-            auto &plane = buffer.planes[plane_index];
-            std::memcpy(contiguous_buffer->data() + copied_bytes, plane.user_ptr, plane.bytes_used);
-            copied_bytes += plane.bytes_used;
-        }
-
-        data_ptr = contiguous_buffer->data();
-    }
-
-    return m_vstream->write(std::move(MemoryView(data_ptr, planes_total_size)));
-}
-
-hailo_status InputVStream::flush()
-{
-    return m_vstream->flush();
-}
-
-hailo_status InputVStream::clear(std::vector<InputVStream> &vstreams)
-{
-    for (auto &vstream : vstreams) {
-        auto status = vstream.stop_and_clear();
-        CHECK_SUCCESS(status);
-    }
-    for (auto &vstream : vstreams) {
-        auto status = vstream.start_vstream();
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status InputVStream::clear(std::vector<std::reference_wrapper<InputVStream>> &vstreams)
-{
-    for (auto &vstream : vstreams) {
-        auto status = vstream.get().stop_and_clear();
-        CHECK_SUCCESS(status);
-    }
-    for (auto &vstream : vstreams) {
-        auto status = vstream.get().start_vstream();
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status InputVStream::abort()
-{
-    return m_vstream->abort();
-}
-
-hailo_status InputVStream::resume()
-{
-    return m_vstream->resume();
-}
-
-size_t InputVStream::get_frame_size() const
-{
-    return m_vstream->get_frame_size();
-}
-
-const hailo_vstream_info_t &InputVStream::get_info() const
-{
-    return m_vstream->get_info();
-}
-
-const std::vector<hailo_quant_info_t> &InputVStream::get_quant_infos() const
-{
-    return m_vstream->get_quant_infos();
-}
-
-const hailo_format_t &InputVStream::get_user_buffer_format() const
-{
-    return m_vstream->get_user_buffer_format();
-}
-
-std::string InputVStream::name() const
-{
-    return m_vstream->name();
-}
-
-std::string InputVStream::network_name() const
-{
-    return m_vstream->network_name();
-}
-
-const std::map<std::string, AccumulatorPtr> &InputVStream::get_fps_accumulators() const
-{
-    return m_vstream->get_fps_accumulators();
-}
-
-const std::map<std::string, AccumulatorPtr> &InputVStream::get_latency_accumulators() const
-{
-    return m_vstream->get_latency_accumulators();
-}
-
-const std::map<std::string, std::vector<AccumulatorPtr>> &InputVStream::get_queue_size_accumulators() const
-{
-    return m_vstream->get_queue_size_accumulators();
-}
-
-AccumulatorPtr InputVStream::get_pipeline_latency_accumulator() const
-{
-    return m_vstream->get_pipeline_latency_accumulator();
-}
-
-const std::vector<std::shared_ptr<PipelineElement>> &InputVStream::get_pipeline() const
-{
-    return m_vstream->get_pipeline();
-}
-
-hailo_status InputVStream::start_vstream()
-{
-    return m_vstream->start_vstream();
-}
-
-hailo_status InputVStream::stop_vstream()
-{
-    return m_vstream->stop_vstream();
-}
-
-hailo_status InputVStream::stop_and_clear()
-{
-    return m_vstream->stop_and_clear();
-}
-
-std::string InputVStream::get_pipeline_description() const
-{
-    return m_vstream->get_pipeline_description();
-}
-
-bool InputVStream::is_aborted()
-{
-    return m_vstream->is_aborted();
-}
-
-bool InputVStream::is_multi_planar()
-{
-    return m_vstream->is_multi_planar();
-}
-
-
-hailo_status InputVStream::before_fork()
-{
-    return m_vstream->before_fork();
-}
-
-hailo_status InputVStream::after_fork_in_parent()
-{
-    return m_vstream->after_fork_in_parent();
-}
-
-hailo_status InputVStream::after_fork_in_child()
-{
-    return m_vstream->after_fork_in_child();
-}
-
-InputVStream::InputVStream(std::shared_ptr<InputVStreamInternal> vstream) : m_vstream(std::move(vstream)) {}
-
-Expected<OutputVStream> OutputVStream::create(
-        const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
-        std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event,
-        EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator)
-{
-    auto vstream_internal = OutputVStreamInternal::create(vstream_info, quant_infos, vstream_params, pipeline_entry,
-        std::move(pipeline), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator);
-    CHECK_EXPECTED(vstream_internal);
-
-    OutputVStream vstream(vstream_internal.release());
-    return vstream;
-}
-
-hailo_status OutputVStream::read(MemoryView buffer)
-{
-    return m_vstream->read(std::move(buffer));
-}
-
-hailo_status OutputVStream::clear(std::vector<OutputVStream> &vstreams)
-{
-    for (auto &vstream : vstreams) {
-        auto status = vstream.stop_and_clear();
-        CHECK_SUCCESS(status);
-    }
-    for (auto &vstream : vstreams) {
-        auto status = vstream.start_vstream();
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status OutputVStream::abort()
-{
-    return m_vstream->abort();
-}
-
-hailo_status OutputVStream::resume()
-{
-    return m_vstream->resume();
-}
-
-hailo_status OutputVStream::clear(std::vector<std::reference_wrapper<OutputVStream>> &vstreams)
-{
-    for (auto &vstream : vstreams) {
-        auto status = vstream.get().stop_and_clear();
-        CHECK_SUCCESS(status);
-    }
-    for (auto &vstream : vstreams) {
-        auto status = vstream.get().start_vstream();
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-size_t OutputVStream::get_frame_size() const
-{
-    return m_vstream->get_frame_size();
-}
-
-const hailo_vstream_info_t &OutputVStream::get_info() const
-{
-    return m_vstream->get_info();
-}
-
-const std::vector<hailo_quant_info_t> &OutputVStream::get_quant_infos() const
-{
-    return m_vstream->get_quant_infos();
-}
-
-const hailo_format_t &OutputVStream::get_user_buffer_format() const
-{
-    return m_vstream->get_user_buffer_format();
-}
-
-std::string OutputVStream::name() const
-{
-    return m_vstream->name();
-}
-
-std::string OutputVStream::network_name() const
-{
-    return m_vstream->network_name();
-}
-
-const std::map<std::string, AccumulatorPtr> &OutputVStream::get_fps_accumulators() const
-{
-    return m_vstream->get_fps_accumulators();
-}
-
-const std::map<std::string, AccumulatorPtr> &OutputVStream::get_latency_accumulators() const
-{
-    return m_vstream->get_latency_accumulators();
-}
-
-const std::map<std::string, std::vector<AccumulatorPtr>> &OutputVStream::get_queue_size_accumulators() const
-{
-    return m_vstream->get_queue_size_accumulators();
-}
-
-AccumulatorPtr OutputVStream::get_pipeline_latency_accumulator() const
-{
-    return m_vstream->get_pipeline_latency_accumulator();
-}
-
-const std::vector<std::shared_ptr<PipelineElement>> &OutputVStream::get_pipeline() const
-{
-    return m_vstream->get_pipeline();
-}
-
-hailo_status OutputVStream::start_vstream()
-{
-    return m_vstream->start_vstream();
-}
-
-hailo_status OutputVStream::stop_vstream()
-{
-    return m_vstream->stop_vstream();
-}
-
-hailo_status OutputVStream::stop_and_clear()
-{
-    return m_vstream->stop_and_clear();
-}
-
-std::string OutputVStream::get_pipeline_description() const
-{
-    return m_vstream->get_pipeline_description();
-}
-
-bool OutputVStream::is_aborted()
-{
-    return m_vstream->is_aborted();
-}
-
-hailo_status OutputVStream::before_fork()
-{
-    return m_vstream->before_fork();
-}
-
-hailo_status OutputVStream::after_fork_in_parent()
-{
-    return m_vstream->after_fork_in_parent();
-}
-
-hailo_status OutputVStream::after_fork_in_child()
-{
-    return m_vstream->after_fork_in_child();
-}
-
-hailo_status OutputVStream::set_nms_score_threshold(float32_t threshold)
-{
-    return m_vstream->set_nms_score_threshold(threshold);
-}
-
-hailo_status OutputVStream::set_nms_iou_threshold(float32_t threshold)
-{
-    return m_vstream->set_nms_iou_threshold(threshold);
-}
-
-hailo_status OutputVStream::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
-{
-    return m_vstream->set_nms_max_proposals_per_class(max_proposals_per_class);
-}
-
-OutputVStream::OutputVStream(std::shared_ptr<OutputVStreamInternal> vstream) : m_vstream(std::move(vstream)) {}
-
-std::map<std::string, AccumulatorPtr> get_pipeline_accumulators_by_type(
-    const std::vector<std::shared_ptr<PipelineElement>> &pipeline, AccumulatorType accumulator_type)
-{
-    std::map<std::string, AccumulatorPtr> result;
-    for (const auto &elem : pipeline) {
-        if (nullptr == elem) {
-            continue;
-        }
-
-        AccumulatorPtr accumulator = nullptr;
-        if (AccumulatorType::FPS == accumulator_type) {
-            accumulator = elem->get_fps_accumulator();
-        } else if (AccumulatorType::LATENCY == accumulator_type) {
-            accumulator = elem->get_latency_accumulator();
-        } else {
-            continue;
-        }
-
-        if (nullptr != accumulator) {
-            result.emplace(elem->name(), accumulator);
-        }
-    }
-
-    return result;
-}
-
-std::map<std::string, std::vector<AccumulatorPtr>> get_pipeline_queue_size_accumulators(
-    const std::vector<std::shared_ptr<PipelineElement>> &pipeline)
-{
-    std::map<std::string, std::vector<AccumulatorPtr>> result;
-    for (const auto &elem : pipeline) {
-        if (nullptr == elem) {
-            continue;
-        }
-
-        const auto accumulators = elem->get_queue_size_accumulators();
-        if (0 != accumulators.size()) {
-            result.emplace(elem->name(), accumulators);
-        }
-    }
-
-    return result;
-}
-
-Expected<std::shared_ptr<InputVStreamInternal>> InputVStreamInternal::create(const hailo_vstream_info_t &vstream_info,
-    const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
-    std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event,
-    AccumulatorPtr pipeline_latency_accumulator)
-{
-    auto vstream = InputVStreamImpl::create(vstream_info, quant_infos, vstream_params, pipeline_entry, pipeline_exit,
-        std::move(pipeline), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator);
-    CHECK_EXPECTED(vstream);
-    auto vstream_ptr = std::shared_ptr<InputVStreamInternal>(vstream.release());
-    return vstream_ptr;
-}
-
-InputVStreamInternal::InputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
-                         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-                         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                         EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event,
-                         hailo_status &output_status) :
-    BaseVStream(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status),
-                shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status){}
-
-Expected<std::shared_ptr<InputVStreamImpl>> InputVStreamImpl::create(const hailo_vstream_info_t &vstream_info,
-    const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
-    std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event,
-    AccumulatorPtr pipeline_latency_accumulator)
-{
-    hailo_status status = HAILO_UNINITIALIZED;
-
-    if (nullptr != pipeline_latency_accumulator) {
-        if (pipeline_exit) {
-            pipeline_exit->sink().set_push_complete_callback([pipeline_latency_accumulator](const PipelineBuffer::Metadata& metadata) {
-                    const auto duration_sec = std::chrono::duration_cast<std::chrono::duration<double>>(
-                        std::chrono::steady_clock::now() - metadata.get_start_time()).count();
-                    pipeline_latency_accumulator->add_data_point(duration_sec);
-                });
-        }
-    }
-
-    auto vstream_ptr = std::shared_ptr<InputVStreamImpl>(new InputVStreamImpl(vstream_info, quant_infos, vstream_params, std::move(pipeline_entry), std::move(pipeline),
-        std::move(pipeline_status), shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), status));
-    CHECK_SUCCESS_AS_EXPECTED(status, "Failed to create virtual stream");
-
-    return vstream_ptr;
-}
-
-InputVStreamImpl::InputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
-    std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator,
-    EventPtr core_op_activated_event, hailo_status &output_status) :
-    InputVStreamInternal(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status),
-        shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status)
-{
-    // TODO: propagate a flag instead of using dynamic_pointer_cast (will be disabled when we'll disable RTTI)
-    m_is_multi_planar = (nullptr != std::dynamic_pointer_cast<PixBufferElement>(pipeline_entry));
-
-    if (HAILO_SUCCESS != output_status) {
-        return;
-    }
-
-    LOGGER__INFO("Creating {}...", name());
-}
-
-InputVStreamImpl::~InputVStreamImpl()
-{
-    (void)stop_vstream();
-}
-
-hailo_status InputVStreamImpl::write(const MemoryView &buffer)
-{
-    if (nullptr != m_core_op_activated_event) {
-        CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "Failed to write buffer! Virtual stream {} is not activated!", name());
-        auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
-        CHECK(HAILO_TIMEOUT != status, HAILO_NETWORK_GROUP_NOT_ACTIVATED,
-            "Trying to write to vstream {} before its network group is activated", name());
-    }
-
-    assert(1 == m_entry_element->sinks().size());
-    auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(buffer, false, nullptr, m_measure_pipeline_latency));
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Sending to VStream was shutdown!");
-        status = m_pipeline_status->load();
-    }
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("Sending to VStream was aborted!");
-        return HAILO_STREAM_ABORTED_BY_USER;
-    }
-    return status;
-}
-
-hailo_status InputVStreamImpl::write(const hailo_pix_buffer_t &buffer)
-{
-    if (nullptr != m_core_op_activated_event) {
-        CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "Failed to write buffer! Virtual stream {} is not activated!", name());
-        auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
-        CHECK(HAILO_TIMEOUT != status, HAILO_NETWORK_GROUP_NOT_ACTIVATED,
-            "Trying to write to vstream {} before its network group is activated", name());
-    }
-
-    assert(1 == m_entry_element->sinks().size());
-    auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(buffer));
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Sending to VStream was shutdown!");
-        status = m_pipeline_status->load();
-    }
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("Sending to VStream was aborted!");
-        return HAILO_STREAM_ABORTED_BY_USER;
-    }
-    return status;
-}
-
-hailo_status InputVStreamImpl::flush()
-{
-    assert(1 == m_entry_element->sinks().size());
-    auto status =  m_entry_element->sinks()[0].run_push(PipelineBuffer(PipelineBuffer::Type::FLUSH));
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("Sending to VStream was aborted!");
-        return HAILO_STREAM_ABORTED_BY_USER;
-    }
-    CHECK_SUCCESS(status);
-
-    status = m_entry_element->flush();
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-bool InputVStreamImpl::is_multi_planar() const
-{
-    return m_is_multi_planar;
-}
-
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-Expected<std::shared_ptr<InputVStreamClient>> InputVStreamClient::create(VStreamIdentifier &&identifier)
-{
-    grpc::ChannelArguments ch_args;
-    ch_args.SetMaxReceiveMessageSize(-1);
-    auto channel = grpc::CreateCustomChannel(hailort::HAILORT_SERVICE_ADDRESS, grpc::InsecureChannelCredentials(), ch_args);
-    CHECK_AS_EXPECTED(channel != nullptr, HAILO_INTERNAL_FAILURE);
-
-    auto client = make_unique_nothrow<HailoRtRpcClient>(channel);
-    CHECK_AS_EXPECTED(client != nullptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    auto user_buffer_format = client->InputVStream_get_user_buffer_format(identifier);
-    CHECK_EXPECTED(user_buffer_format);
-
-    auto vstream_info = client->InputVStream_get_info(identifier);
-    CHECK_EXPECTED(vstream_info);
-
-    return std::shared_ptr<InputVStreamClient>(new InputVStreamClient(std::move(client), std::move(identifier),
-        user_buffer_format.release(), vstream_info.release()));
-}
-
-InputVStreamClient::InputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
-    hailo_vstream_info_t &&info) :
-        m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info) {}
-
-InputVStreamClient::~InputVStreamClient()
-{
-    auto reply = m_client->InputVStream_release(m_identifier, OsUtils::get_curr_pid());
-    if (reply != HAILO_SUCCESS) {
-        LOGGER__CRITICAL("InputVStream_release failed!");
-    }
-}
-
-hailo_status InputVStreamClient::write(const MemoryView &buffer)
-{
-    return m_client->InputVStream_write(m_identifier, buffer);
-}
-
-hailo_status InputVStreamClient::write(const hailo_pix_buffer_t &buffer)
-{
-    return m_client->InputVStream_write(m_identifier, buffer);
-}
-
-hailo_status InputVStreamClient::flush()
-{
-    return m_client->InputVStream_flush(m_identifier);
-}
-
-bool InputVStreamClient::is_multi_planar() const
-{
-    auto is_multi_planar_exp = m_client->InputVStream_is_multi_planar(m_identifier);
-    if (!is_multi_planar_exp) {
-        LOGGER__CRITICAL("InputVStream_is_multi_planar failed with status={}", is_multi_planar_exp.status());
-        return true;
-    }
-    return is_multi_planar_exp.release();
-}
-
-hailo_status InputVStreamClient::abort()
-{
-    auto expected_client = HailoRtRpcClientUtils::create_client();
-    CHECK_EXPECTED_AS_STATUS(expected_client);
-    auto abort_client = expected_client.release();
-    return abort_client->InputVStream_abort(m_identifier);
-}
-
-hailo_status InputVStreamClient::resume()
-{
-    return m_client->InputVStream_resume(m_identifier);
-}
-
-hailo_status InputVStreamClient::stop_and_clear()
-{
-    auto expected_client = HailoRtRpcClientUtils::create_client();
-    CHECK_EXPECTED_AS_STATUS(expected_client);
-    auto stop_and_clear_client = expected_client.release();
-
-    return stop_and_clear_client->InputVStream_stop_and_clear(m_identifier);
-}
-
-hailo_status InputVStreamClient::start_vstream()
-{
-    auto expected_client = HailoRtRpcClientUtils::create_client();
-    CHECK_EXPECTED_AS_STATUS(expected_client);
-    auto start_vstream_client = expected_client.release();
-
-    return start_vstream_client->InputVStream_start_vstream(m_identifier);
-}
-
-size_t InputVStreamClient::get_frame_size() const
-{
-    auto frame_size = m_client->InputVStream_get_frame_size(m_identifier);
-    if (!frame_size) {
-        LOGGER__CRITICAL("InputVStream_get_frame_size failed with status={}", frame_size.status());
-        return 0;
-    }
-    return frame_size.release();
-}
-
-const hailo_vstream_info_t &InputVStreamClient::get_info() const
-{
-    return m_info;
-}
-
-const hailo_format_t &InputVStreamClient::get_user_buffer_format() const
-{
-    return m_user_buffer_format;
-}
-
-std::string InputVStreamClient::name() const
-{
-    auto expected_name = m_client->InputVStream_name(m_identifier);
-    if (!expected_name) {
-        LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status());
-        return "";
-    }
-    return expected_name.release();
-}
+        AccumulatorPtr accumulator = nullptr;
+        if (AccumulatorType::FPS == accumulator_type) {
+            accumulator = elem->get_fps_accumulator();
+        } else if (AccumulatorType::LATENCY == accumulator_type) {
+            accumulator = elem->get_latency_accumulator();
+        } else {
+            continue;
+        }
 
-std::string InputVStreamClient::network_name() const
-{
-    auto expected_name = m_client->InputVStream_network_name(m_identifier);
-    if (!expected_name) {
-        LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status());
-        return "";
+        if (nullptr != accumulator) {
+            result.emplace(elem->name(), accumulator);
+        }
     }
-    return expected_name.release();
-}
-
-const std::map<std::string, AccumulatorPtr> &InputVStreamClient::get_fps_accumulators() const
-{
-    LOGGER__ERROR("InputVStream::get_fps_accumulators function is not supported when using multi-process service");
-    return m_fps_accumulators;
-}
-const std::map<std::string, AccumulatorPtr> &InputVStreamClient::get_latency_accumulators() const
-{
-    LOGGER__ERROR("InputVStream::get_latency_accumulators function is not supported when using multi-process service");
-    return m_latency_accumulators;
-}
-
-const std::map<std::string, std::vector<AccumulatorPtr>> &InputVStreamClient::get_queue_size_accumulators() const
-{
-    LOGGER__ERROR("InputVStream::get_queue_size_accumulators function is not supported when using multi-process service");
-    return m_queue_size_accumulators;
-}
-AccumulatorPtr InputVStreamClient::get_pipeline_latency_accumulator() const
-{
-    LOGGER__ERROR("InputVStream::get_pipeline_latency_accumulator function is not supported when using multi-process service");
-    return m_pipeline_latency_accumulator;
-}
-const std::vector<std::shared_ptr<PipelineElement>> &InputVStreamClient::get_pipeline() const
-{
-    LOGGER__ERROR("InputVStream::get_pipeline function is not supported when using multi-process service");
-    return m_pipeline;
-}
-
-hailo_status InputVStreamClient::create_client()
-{
-    auto expected_client = HailoRtRpcClientUtils::create_client();
-    CHECK_EXPECTED_AS_STATUS(expected_client);
-    m_client = expected_client.release();
-    return HAILO_SUCCESS;
-}
-
-hailo_status InputVStreamClient::before_fork()
-{
-    m_client.reset();
-    return HAILO_SUCCESS;
-}
 
-hailo_status InputVStreamClient::after_fork_in_parent()
-{
-    return create_client();
+    return result;
 }
 
-hailo_status InputVStreamClient::after_fork_in_child()
+std::map<std::string, std::vector<AccumulatorPtr>> get_pipeline_queue_size_accumulators(
+    const std::vector<std::shared_ptr<PipelineElement>> &pipeline)
 {
-    return create_client();
-}
+    std::map<std::string, std::vector<AccumulatorPtr>> result;
+    for (const auto &elem : pipeline) {
+        if (nullptr == elem) {
+            continue;
+        }
 
-bool InputVStreamClient::is_aborted()
-{
-    auto is_aborted_exp = m_client->InputVStream_is_aborted(m_identifier);
-    if (!is_aborted_exp) {
-        LOGGER__CRITICAL("InputVStream_is_aborted failed with status={}", is_aborted_exp.status());
-        return true;
+        const auto accumulators = elem->get_queue_size_accumulators();
+        if (0 != accumulators.size()) {
+            result.emplace(elem->name(), accumulators);
+        }
     }
-    return is_aborted_exp.release();
-}
-
-#endif // HAILO_SUPPORT_MULTI_PROCESS
 
-std::string InputVStreamInternal::get_pipeline_description() const
-{
-    std::stringstream pipeline_str;
-    pipeline_str << "Input pipeline '" << name() << "': ";
-    for (const auto &element : m_pipeline) {
-        pipeline_str << element->description() << " >> ";
-    }
-    pipeline_str << "HW";
-    return pipeline_str.str();
+    return result;
 }
 
-Expected<std::shared_ptr<OutputVStreamInternal>> OutputVStreamInternal::create(
-        const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
-        std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event,
-        EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator)
+Expected<std::shared_ptr<InputVStreamInternal>> InputVStreamInternal::create(const hailo_vstream_info_t &vstream_info,
+    const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
+    std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr core_op_activated_event,
+    AccumulatorPtr pipeline_latency_accumulator)
 {
-    auto vstream = OutputVStreamImpl::create(vstream_info, quant_infos, vstream_params, pipeline_entry,
-        std::move(pipeline), std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator);
+    auto vstream = InputVStreamImpl::create(vstream_info, quant_infos, vstream_params, pipeline_entry, pipeline_exit,
+        std::move(pipeline), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator);
     CHECK_EXPECTED(vstream);
-    auto vstream_ptr = std::shared_ptr<OutputVStreamInternal>(vstream.release());
+    auto vstream_ptr = std::shared_ptr<InputVStreamInternal>(vstream.release());
     return vstream_ptr;
 }
 
-OutputVStreamInternal::OutputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
-                                             std::shared_ptr<PipelineElement> pipeline_entry,
-                                             std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-                                             std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event,
-                                             AccumulatorPtr pipeline_latency_accumulator,
-                                             EventPtr core_op_activated_event, hailo_status &output_status) :
+InputVStreamInternal::InputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
+    const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator, EventPtr &&core_op_activated_event,
+    hailo_status &output_status) :
     BaseVStream(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status),
-                shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status)
-{
-    // Reversing the order of pipeline-elements, for the destruction flow to work in the right order (from user-side to hw-side)
-    std::reverse(m_pipeline.begin(), m_pipeline.end());
-}
+        pipeline_latency_accumulator, std::move(core_op_activated_event), output_status){}
 
-Expected<std::shared_ptr<OutputVStreamImpl>> OutputVStreamImpl::create(const hailo_vstream_info_t &vstream_info,
-    const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
-    std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event,
-    EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator)
+Expected<std::shared_ptr<InputVStreamImpl>> InputVStreamImpl::create(const hailo_vstream_info_t &vstream_info,
+    const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
+    std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr core_op_activated_event,
+    AccumulatorPtr pipeline_latency_accumulator)
 {
     hailo_status status = HAILO_UNINITIALIZED;
 
-    CHECK_AS_EXPECTED(1 == pipeline_entry->sources().size(), HAILO_INVALID_ARGUMENT,
-        "OutputVStream's entry element is expected to have one source");
-
     if (nullptr != pipeline_latency_accumulator) {
-        pipeline_entry->sources()[0].set_pull_complete_callback([pipeline_latency_accumulator](const PipelineBuffer::Metadata& metadata) {
-                const auto duration_sec = std::chrono::duration_cast<std::chrono::duration<double>>(
-                    std::chrono::steady_clock::now() - metadata.get_start_time()).count();
-                pipeline_latency_accumulator->add_data_point(duration_sec);
-            });
+        if (pipeline_exit) {
+            pipeline_exit->sink().set_push_complete_callback([pipeline_latency_accumulator](const PipelineBuffer::Metadata& metadata) {
+                    const auto duration_sec = std::chrono::duration_cast<std::chrono::duration<double>>(
+                        std::chrono::steady_clock::now() - metadata.get_start_time()).count();
+                    pipeline_latency_accumulator->add_data_point(duration_sec);
+                });
+        }
     }
 
-    auto vstream_ptr = std::shared_ptr<OutputVStreamImpl>(new OutputVStreamImpl(vstream_info, quant_infos, vstream_params, std::move(pipeline_entry), std::move(pipeline),
-        std::move(pipeline_status), shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), status));
+    auto vstream_ptr = std::shared_ptr<InputVStreamImpl>(new InputVStreamImpl(vstream_info, quant_infos, vstream_params, std::move(pipeline_entry), std::move(pipeline),
+        std::move(pipeline_status), pipeline_latency_accumulator, std::move(core_op_activated_event), status));
     CHECK_SUCCESS_AS_EXPECTED(status, "Failed to create virtual stream");
 
     return vstream_ptr;
 }
 
-std::string OutputVStreamInternal::get_pipeline_description() const
+InputVStreamImpl::InputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
+    std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator,
+    EventPtr core_op_activated_event, hailo_status &output_status) :
+    InputVStreamInternal(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status),
+        pipeline_latency_accumulator, std::move(core_op_activated_event), output_status)
 {
-    std::stringstream pipeline_str;
-    pipeline_str << "Output pipeline '" << name() << "': HW";
-    for (const auto &element : m_pipeline) {
-        pipeline_str << " >> " << element->description();
-    }
-    return pipeline_str.str();
-}
+    // TODO: propagate a flag instead of using dynamic_pointer_cast (will be disabled when we'll disable RTTI)
+    m_is_multi_planar = (nullptr != std::dynamic_pointer_cast<PixBufferElement>(pipeline_entry));
 
-OutputVStreamImpl::OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
-                                     const hailo_vstream_params_t &vstream_params,
-                                     std::shared_ptr<PipelineElement> pipeline_entry,
-                                     std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-                                     std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event,
-                                     AccumulatorPtr pipeline_latency_accumulator,
-                                     EventPtr core_op_activated_event, hailo_status &output_status) :
-    OutputVStreamInternal(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status),
-                shutdown_event, pipeline_latency_accumulator, std::move(core_op_activated_event), output_status)
-{
     if (HAILO_SUCCESS != output_status) {
         return;
     }
@@ -2187,85 +754,80 @@ OutputVStreamImpl::OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, c
     LOGGER__INFO("Creating {}...", name());
 }
 
-OutputVStreamImpl::~OutputVStreamImpl()
+InputVStreamImpl::~InputVStreamImpl()
 {
     (void)stop_vstream();
 }
 
-hailo_status OutputVStreamImpl::read(MemoryView buffer)
+hailo_status InputVStreamImpl::write(const MemoryView &buffer)
 {
     if (nullptr != m_core_op_activated_event) {
-        CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "read() failed! Virtual stream {} is not activated!", name());
+        CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "Failed to write buffer! Virtual stream {} is not activated!", name());
         auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
-        if (HAILO_TIMEOUT == status) {
-            LOGGER__INFO("Trying to read from vstream {} before its network_group is activated", name());
-            return HAILO_NETWORK_GROUP_NOT_ACTIVATED;
-        }
-        CHECK_SUCCESS(status);
+        CHECK(HAILO_TIMEOUT != status, HAILO_NETWORK_GROUP_NOT_ACTIVATED,
+            "Trying to write to vstream {} before its network group is activated", name());
     }
 
-    assert(1 == m_entry_element->sources().size());
-    auto recv_buffer = m_entry_element->sources()[0].run_pull(PipelineBuffer(buffer, false, nullptr, m_measure_pipeline_latency));
-    auto status = recv_buffer.status();
+    assert(1 == m_entry_element->sinks().size());
+    auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(buffer, [](hailo_status){}, HAILO_SUCCESS, false, nullptr, m_measure_pipeline_latency));
     if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-        LOGGER__INFO("Receiving to VStream was shutdown!");
+        LOGGER__INFO("Sending to VStream was shutdown!");
         status = m_pipeline_status->load();
     }
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("Receiving to VStream was aborted!");
-        m_entry_element->wait_for_finish();
-        return HAILO_STREAM_ABORTED_BY_USER;
+    if (HAILO_STREAM_ABORT == status) {
+        LOGGER__INFO("Sending to VStream was aborted!");
+        return HAILO_STREAM_ABORT;
     }
     return status;
 }
 
-hailo_status OutputVStreamImpl::set_nms_score_threshold(float32_t threshold)
+hailo_status InputVStreamImpl::write(const hailo_pix_buffer_t &buffer)
 {
-    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
-    for (auto &elem : m_pipeline) {
-        auto elem_status = elem->set_nms_score_threshold(threshold);
-        if (HAILO_SUCCESS == elem_status) {
-            status = elem_status; // 1 element is enough to call this setter successful
-        }
-    }
-    CHECK_SUCCESS(status, "Unable to set NMS score threshold in {}", name());
-
-    return HAILO_SUCCESS;
-}
+    CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer.memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!");
 
-hailo_status OutputVStreamImpl::set_nms_iou_threshold(float32_t threshold)
-{
-    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
-    for (auto &elem : m_pipeline) {
-        auto elem_status = elem->set_nms_iou_threshold(threshold);
-        if (HAILO_SUCCESS == elem_status) {
-            status = elem_status; // 1 element is enough to call this setter successful
-        }
+    if (nullptr != m_core_op_activated_event) {
+        CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "Failed to write buffer! Virtual stream {} is not activated!", name());
+        auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
+        CHECK(HAILO_TIMEOUT != status, HAILO_NETWORK_GROUP_NOT_ACTIVATED,
+            "Trying to write to vstream {} before its network group is activated", name());
     }
-    CHECK_SUCCESS(status, "Unable to set NMS IoU threshold in {}", name());
 
-    return HAILO_SUCCESS;
+    assert(1 == m_entry_element->sinks().size());
+    auto status = m_entry_element->sinks()[0].run_push(PipelineBuffer(buffer));
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("Sending to VStream was shutdown!");
+        status = m_pipeline_status->load();
+    }
+    if (HAILO_STREAM_ABORT == status) {
+        LOGGER__INFO("Sending to VStream was aborted!");
+        return HAILO_STREAM_ABORT;
+    }
+    return status;
 }
 
-hailo_status OutputVStreamImpl::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
+hailo_status InputVStreamImpl::flush()
 {
-    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
-    for (auto &elem : m_pipeline) {
-        auto elem_status = elem->set_nms_max_proposals_per_class(max_proposals_per_class);
-        if (HAILO_SUCCESS == elem_status) {
-            status = elem_status; // 1 element is enough to call this setter successful
-        }
+    assert(1 == m_entry_element->sinks().size());
+    auto status =  m_entry_element->sinks()[0].run_push(PipelineBuffer(PipelineBuffer::Type::FLUSH));
+    if (HAILO_STREAM_ABORT == status) {
+        LOGGER__INFO("Sending to VStream was aborted!");
+        return HAILO_STREAM_ABORT;
     }
-    CHECK_SUCCESS(status, "Unable to set NMS max proposals per class in {}", name());
+    CHECK_SUCCESS(status);
 
-    // Update vstream info
-    m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
+    status = m_entry_element->flush();
+    CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
 }
 
+bool InputVStreamImpl::is_multi_planar() const
+{
+    return m_is_multi_planar;
+}
+
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
-Expected<std::shared_ptr<OutputVStreamClient>> OutputVStreamClient::create(const VStreamIdentifier &&identifier)
+Expected<std::shared_ptr<InputVStreamClient>> InputVStreamClient::create(VStreamIdentifier &&identifier)
 {
     grpc::ChannelArguments ch_args;
     ch_args.SetMaxReceiveMessageSize(-1);
@@ -2275,132 +837,152 @@ Expected<std::shared_ptr<OutputVStreamClient>> OutputVStreamClient::create(const
     auto client = make_unique_nothrow<HailoRtRpcClient>(channel);
     CHECK_AS_EXPECTED(client != nullptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    auto user_buffer_format = client->OutputVStream_get_user_buffer_format(identifier);
+    auto user_buffer_format = client->InputVStream_get_user_buffer_format(identifier);
     CHECK_EXPECTED(user_buffer_format);
 
-    auto info = client->OutputVStream_get_info(identifier);
-    CHECK_EXPECTED(info);
+    auto vstream_info = client->InputVStream_get_info(identifier);
+    CHECK_EXPECTED(vstream_info);
 
-    return std::shared_ptr<OutputVStreamClient>(new OutputVStreamClient(std::move(client), std::move(identifier),
-        user_buffer_format.release(), info.release()));
+    return std::shared_ptr<InputVStreamClient>(new InputVStreamClient(std::move(client), std::move(identifier),
+        user_buffer_format.release(), vstream_info.release()));
 }
 
-OutputVStreamClient::OutputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, const VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
+InputVStreamClient::InputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
     hailo_vstream_info_t &&info) :
         m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info) {}
 
-OutputVStreamClient::~OutputVStreamClient()
+InputVStreamClient::~InputVStreamClient()
 {
-    auto reply = m_client->OutputVStream_release(m_identifier, OsUtils::get_curr_pid());
+    auto reply = m_client->InputVStream_release(m_identifier, OsUtils::get_curr_pid());
     if (reply != HAILO_SUCCESS) {
-        LOGGER__CRITICAL("OutputVStream_release failed!");
+        LOGGER__CRITICAL("InputVStream_release failed!");
     }
 }
 
-hailo_status OutputVStreamClient::read(MemoryView buffer)
+hailo_status InputVStreamClient::write(const MemoryView &buffer)
 {
-    return m_client->OutputVStream_read(m_identifier, buffer);
+    return m_client->InputVStream_write(m_identifier, buffer);
 }
 
-hailo_status OutputVStreamClient::abort()
+hailo_status InputVStreamClient::write(const hailo_pix_buffer_t &buffer)
+{
+    return m_client->InputVStream_write(m_identifier, buffer);
+}
+
+hailo_status InputVStreamClient::flush()
+{
+    return m_client->InputVStream_flush(m_identifier);
+}
+
+bool InputVStreamClient::is_multi_planar() const
+{
+    auto is_multi_planar_exp = m_client->InputVStream_is_multi_planar(m_identifier);
+    if (!is_multi_planar_exp) {
+        LOGGER__CRITICAL("InputVStream_is_multi_planar failed with status={}", is_multi_planar_exp.status());
+        return true;
+    }
+    return is_multi_planar_exp.release();
+}
+
+hailo_status InputVStreamClient::abort()
 {
     auto expected_client = HailoRtRpcClientUtils::create_client();
     CHECK_EXPECTED_AS_STATUS(expected_client);
     auto abort_client = expected_client.release();
-    return abort_client->OutputVStream_abort(m_identifier);
+    return abort_client->InputVStream_abort(m_identifier);
 }
 
-hailo_status OutputVStreamClient::resume()
+hailo_status InputVStreamClient::resume()
 {
-    return m_client->OutputVStream_resume(m_identifier);
+    return m_client->InputVStream_resume(m_identifier);
 }
 
-hailo_status OutputVStreamClient::stop_and_clear()
+hailo_status InputVStreamClient::stop_and_clear()
 {
     auto expected_client = HailoRtRpcClientUtils::create_client();
     CHECK_EXPECTED_AS_STATUS(expected_client);
     auto stop_and_clear_client = expected_client.release();
 
-    return stop_and_clear_client->OutputVStream_stop_and_clear(m_identifier);
+    return stop_and_clear_client->InputVStream_stop_and_clear(m_identifier);
 }
 
-hailo_status OutputVStreamClient::start_vstream()
+hailo_status InputVStreamClient::start_vstream()
 {
     auto expected_client = HailoRtRpcClientUtils::create_client();
     CHECK_EXPECTED_AS_STATUS(expected_client);
     auto start_vstream_client = expected_client.release();
 
-    return start_vstream_client->OutputVStream_start_vstream(m_identifier);
+    return start_vstream_client->InputVStream_start_vstream(m_identifier);
 }
 
-size_t OutputVStreamClient::get_frame_size() const
+size_t InputVStreamClient::get_frame_size() const
 {
-    auto frame_size =  m_client->OutputVStream_get_frame_size(m_identifier);
+    auto frame_size = m_client->InputVStream_get_frame_size(m_identifier);
     if (!frame_size) {
-        LOGGER__CRITICAL("OutputVStream_get_frame_size failed with status={}", frame_size.status());
+        LOGGER__CRITICAL("InputVStream_get_frame_size failed with status={}", frame_size.status());
         return 0;
     }
     return frame_size.release();
 }
 
-const hailo_vstream_info_t &OutputVStreamClient::get_info() const
+const hailo_vstream_info_t &InputVStreamClient::get_info() const
 {
     return m_info;
 }
 
-const hailo_format_t &OutputVStreamClient::get_user_buffer_format() const
+const hailo_format_t &InputVStreamClient::get_user_buffer_format() const
 {
     return m_user_buffer_format;
 }
 
-std::string OutputVStreamClient::name() const
+std::string InputVStreamClient::name() const
 {
-    auto expected_name = m_client->OutputVStream_name(m_identifier);
+    auto expected_name = m_client->InputVStream_name(m_identifier);
     if (!expected_name) {
-        LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status());
+        LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status());
         return "";
     }
     return expected_name.release();
 }
 
-std::string OutputVStreamClient::network_name() const
+std::string InputVStreamClient::network_name() const
 {
-    auto expected_name = m_client->OutputVStream_network_name(m_identifier);
+    auto expected_name = m_client->InputVStream_network_name(m_identifier);
     if (!expected_name) {
-        LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status());
+        LOGGER__CRITICAL("InputVStream_name failed with status={}", expected_name.status());
         return "";
     }
     return expected_name.release();
 }
 
-const std::map<std::string, AccumulatorPtr> &OutputVStreamClient::get_fps_accumulators() const
+const std::map<std::string, AccumulatorPtr> &InputVStreamClient::get_fps_accumulators() const
 {
-    LOGGER__ERROR("OutputVStream::get_fps_accumulators function is not supported when using multi-process service");
+    LOGGER__ERROR("InputVStream::get_fps_accumulators function is not supported when using multi-process service");
     return m_fps_accumulators;
 }
-const std::map<std::string, AccumulatorPtr> &OutputVStreamClient::get_latency_accumulators() const
+const std::map<std::string, AccumulatorPtr> &InputVStreamClient::get_latency_accumulators() const
 {
-    LOGGER__ERROR("OutputVStream::get_latency_accumulators functoin is not supported when using multi-process service");
+    LOGGER__ERROR("InputVStream::get_latency_accumulators function is not supported when using multi-process service");
     return m_latency_accumulators;
 }
 
-const std::map<std::string, std::vector<AccumulatorPtr>> &OutputVStreamClient::get_queue_size_accumulators() const
+const std::map<std::string, std::vector<AccumulatorPtr>> &InputVStreamClient::get_queue_size_accumulators() const
 {
-    LOGGER__ERROR("OutputVStream::get_queue_size_accumulators function is not supported when using multi-process service");
+    LOGGER__ERROR("InputVStream::get_queue_size_accumulators function is not supported when using multi-process service");
     return m_queue_size_accumulators;
 }
-AccumulatorPtr OutputVStreamClient::get_pipeline_latency_accumulator() const
+AccumulatorPtr InputVStreamClient::get_pipeline_latency_accumulator() const
 {
-    LOGGER__ERROR("OutputVStream::get_pipeline_latency_accumulator function is not supported when using multi-process service");
+    LOGGER__ERROR("InputVStream::get_pipeline_latency_accumulator function is not supported when using multi-process service");
     return m_pipeline_latency_accumulator;
 }
-const std::vector<std::shared_ptr<PipelineElement>> &OutputVStreamClient::get_pipeline() const
+const std::vector<std::shared_ptr<PipelineElement>> &InputVStreamClient::get_pipeline() const
 {
-    LOGGER__ERROR("OutputVStream::get_pipeline function is not supported when using multi-process service");
+    LOGGER__ERROR("InputVStream::get_pipeline function is not supported when using multi-process service");
     return m_pipeline;
 }
 
-hailo_status OutputVStreamClient::create_client()
+hailo_status InputVStreamClient::create_client()
 {
     auto expected_client = HailoRtRpcClientUtils::create_client();
     CHECK_EXPECTED_AS_STATUS(expected_client);
@@ -2408,831 +990,448 @@ hailo_status OutputVStreamClient::create_client()
     return HAILO_SUCCESS;
 }
 
-hailo_status OutputVStreamClient::before_fork()
+hailo_status InputVStreamClient::before_fork()
 {
     m_client.reset();
     return HAILO_SUCCESS;
 }
 
-hailo_status OutputVStreamClient::after_fork_in_parent()
+hailo_status InputVStreamClient::after_fork_in_parent()
 {
     return create_client();
 }
 
-hailo_status OutputVStreamClient::after_fork_in_child()
+hailo_status InputVStreamClient::after_fork_in_child()
 {
     return create_client();
 }
 
-bool OutputVStreamClient::is_aborted()
+bool InputVStreamClient::is_aborted()
 {
-    auto is_aborted_exp = m_client->OutputVStream_is_aborted(m_identifier);
+    auto is_aborted_exp = m_client->InputVStream_is_aborted(m_identifier);
     if (!is_aborted_exp) {
-        LOGGER__CRITICAL("OutputVStream_is_aborted failed with status={}", is_aborted_exp.status());
+        LOGGER__CRITICAL("InputVStream_is_aborted failed with status={}", is_aborted_exp.status());
         return true;
     }
     return is_aborted_exp.release();
 }
 
-hailo_status OutputVStreamClient::set_nms_score_threshold(float32_t threshold)
-{
-    auto expected_client = HailoRtRpcClientUtils::create_client();
-    CHECK_EXPECTED_AS_STATUS(expected_client);
-    auto vstream_client = expected_client.release();
-
-    CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_score_threshold(m_identifier, threshold));
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status OutputVStreamClient::set_nms_iou_threshold(float32_t threshold)
-{
-    auto expected_client = HailoRtRpcClientUtils::create_client();
-    CHECK_EXPECTED_AS_STATUS(expected_client);
-    auto vstream_client = expected_client.release();
-
-    CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_iou_threshold(m_identifier, threshold));
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status OutputVStreamClient::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
-{
-    auto expected_client = HailoRtRpcClientUtils::create_client();
-    CHECK_EXPECTED_AS_STATUS(expected_client);
-    auto vstream_client = expected_client.release();
-
-    CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_max_proposals_per_class(m_identifier, max_proposals_per_class));
-    m_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
-
-    return HAILO_SUCCESS;
-}
-
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
-Expected<std::shared_ptr<HwReadElement>> HwReadElement::create(std::shared_ptr<OutputStreamBase> stream, const std::string &name, std::chrono::milliseconds timeout,
-    size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction)
-{
-    auto buffer_pool = BufferPool::create(stream->get_frame_size(), buffer_pool_size, shutdown_event, elem_flags, vstream_flags);
-    CHECK_EXPECTED(buffer_pool, "Failed creating BufferPool for {}", name);
-
-    // On HwReadElement the stream always owns the buffer, hence, we set the mode explicitly.
-    auto status = stream->set_buffer_mode(StreamBufferMode::OWNING);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-
-    auto hw_read_elem_ptr = make_shared_nothrow<HwReadElement>(stream, buffer_pool.release(), name, timeout,
-        duration_collector.release(), shutdown_event, std::move(pipeline_status), pipeline_direction);
-    CHECK_AS_EXPECTED(nullptr != hw_read_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    LOGGER__INFO("Created {}", hw_read_elem_ptr->name());
-
-    return hw_read_elem_ptr;
-}
-
-HwReadElement::HwReadElement(std::shared_ptr<OutputStreamBase> stream, BufferPoolPtr buffer_pool, const std::string &name,
-                             std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-                             EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                             PipelineDirection pipeline_direction) :
-    SourceElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr),
-    m_stream(stream),
-    m_pool(buffer_pool),
-    m_timeout(timeout),
-    m_shutdown_event(shutdown_event),
-    m_activation_wait_or_shutdown(stream->get_core_op_activated_event(), shutdown_event)
-{}
-
-uint32_t HwReadElement::get_invalid_frames_count()
-{
-    return m_stream->get_invalid_frames_count();
-}
-
-std::string HwReadElement::description() const
-{
-    std::stringstream element_description;
-    element_description << "(" << this->name() << " | hw_frame_size: " << m_stream->get_info().hw_frame_size << ")";   
-
-    return element_description.str();
-}
-
-hailo_status HwReadElement::execute_post_deactivate(bool should_clear_abort)
-{
-    if (should_clear_abort) {
-        auto status = execute_clear_abort();
-        CHECK(((HAILO_SUCCESS == status) || (HAILO_STREAM_NOT_ACTIVATED == status)), status,
-            "Failed to clear abort stream in {}", name());
-    }
-    return HAILO_SUCCESS;
-}
-
-hailo_status HwReadElement::execute_clear()
-{
-    return HAILO_SUCCESS;
-}
-
-hailo_status HwReadElement::execute_flush()
-{
-    return HAILO_INVALID_OPERATION;
-}
-
-hailo_status HwReadElement::execute_abort()
-{
-    return m_stream->abort_impl();
-}
-
-hailo_status HwReadElement::execute_clear_abort()
-{
-    return m_stream->clear_abort_impl();
-}
-
-hailo_status HwReadElement::execute_wait_for_finish()
-{
-    return HAILO_SUCCESS;
-}
-
-std::vector<AccumulatorPtr> HwReadElement::get_queue_size_accumulators()
+std::string InputVStreamInternal::get_pipeline_description() const
 {
-    if (nullptr == m_pool->get_queue_size_accumulator()) {
-        return std::vector<AccumulatorPtr>();
+    std::stringstream pipeline_str;
+    pipeline_str << "Input pipeline '" << name() << "': ";
+    for (const auto &element : m_pipeline) {
+        pipeline_str << element->description() << " >> ";
     }
-    return {m_pool->get_queue_size_accumulator()};
-}
-
-void HwReadElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    LOGGER__ERROR("run_push_async is not supported for {}", name());
-    assert(false);
+    pipeline_str << "HW";
+    return pipeline_str.str();
 }
 
-hailo_status HwReadElement::run_push(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
+Expected<std::shared_ptr<OutputVStreamInternal>> OutputVStreamInternal::create(const hailo_vstream_info_t &vstream_info,
+    const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
+    std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator)
 {
-    return HAILO_INVALID_OPERATION;
+    auto vstream = OutputVStreamImpl::create(vstream_info, quant_infos, vstream_params, pipeline_entry,
+        std::move(pipeline), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator);
+    CHECK_EXPECTED(vstream);
+    auto vstream_ptr = std::shared_ptr<OutputVStreamInternal>(vstream.release());
+    return vstream_ptr;
 }
 
-Expected<PipelineBuffer> HwReadElement::run_pull(PipelineBuffer &&optional, const PipelinePad &/*source*/)
+OutputVStreamInternal::OutputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
+    const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
+    std::vector<std::shared_ptr<PipelineElement>> &&pipeline, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    AccumulatorPtr pipeline_latency_accumulator, EventPtr core_op_activated_event, hailo_status &output_status) :
+    BaseVStream(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status),
+        pipeline_latency_accumulator, std::move(core_op_activated_event), output_status)
 {
-    auto buffer = m_pool->get_available_buffer(std::move(optional), m_timeout);
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == buffer.status()) {
-        return make_unexpected(buffer.status());
-    }
-    CHECK_EXPECTED(buffer, "{} (D2H) failed with status={}", name(), buffer.status());
-
-    while (true) {
-        if (!m_stream->is_scheduled()) {
-            auto status = m_activation_wait_or_shutdown.wait(m_timeout);
-            if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-                return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-            }
-            if (HAILO_TIMEOUT == status) {
-                return make_unexpected(HAILO_NETWORK_GROUP_NOT_ACTIVATED);
-            }
-            CHECK_SUCCESS_AS_EXPECTED(status);
-        } else {
-            auto status = m_activation_wait_or_shutdown.wait(std::chrono::milliseconds(0));
-            if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
-                return make_unexpected(HAILO_SHUTDOWN_EVENT_SIGNALED);
-            }
-        }
-
-        MemoryView buffer_view(buffer.value().as_view());
-        m_duration_collector.start_measurement();
-        auto status = m_stream->read(buffer_view);
-        if (HAILO_INVALID_FRAME == status) {
-            m_stream->increase_invalid_frames_count(1);
-            status = HAILO_SUCCESS;
-        }
-        if (HAILO_STREAM_NOT_ACTIVATED == status) {
-            // Try again
-            continue;
-        }
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
-            LOGGER__INFO("Reading from stream was aborted!");
-            return make_unexpected(HAILO_STREAM_ABORTED_BY_USER);
-        }
-        CHECK_SUCCESS_AS_EXPECTED(status, "{} (D2H) failed with status={}", name(), status);
-        m_duration_collector.complete_measurement();
-
-        return buffer.release();
-    }
+    // Reversing the order of pipeline-elements, for the destruction flow to work in the right order (from user-side to hw-side)
+    std::reverse(m_pipeline.begin(), m_pipeline.end());
 }
 
-hailo_status HwReadElement::execute_activate()
+Expected<std::shared_ptr<OutputVStreamImpl>> OutputVStreamImpl::create(const hailo_vstream_info_t &vstream_info,
+    const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
+    std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
+    EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator)
 {
-    return HAILO_SUCCESS;
-}
+    hailo_status status = HAILO_UNINITIALIZED;
 
-hailo_status HwReadElement::execute_deactivate()
-{
-    auto signal_shutdown_status = m_shutdown_event->signal();
-    if (HAILO_SUCCESS != signal_shutdown_status) {
-        LOGGER__ERROR("Signaling {} shutdown event failed with {}", name(), signal_shutdown_status);
-    }
+    CHECK_AS_EXPECTED(1 == pipeline_entry->sources().size(), HAILO_INVALID_ARGUMENT,
+        "OutputVStream's entry element is expected to have one source");
 
-    auto abort_status = execute_abort();
-    if ((HAILO_SUCCESS != abort_status) && (HAILO_STREAM_NOT_ACTIVATED != abort_status)) {
-        LOGGER__ERROR("Abort {} failed with {}", name(), abort_status);
-        return abort_status;
+    if (nullptr != pipeline_latency_accumulator) {
+        pipeline_entry->sources()[0].set_pull_complete_callback([pipeline_latency_accumulator](const PipelineBuffer::Metadata& metadata) {
+                const auto duration_sec = std::chrono::duration_cast<std::chrono::duration<double>>(
+                    std::chrono::steady_clock::now() - metadata.get_start_time()).count();
+                pipeline_latency_accumulator->add_data_point(duration_sec);
+            });
     }
 
-    return signal_shutdown_status;
+    auto vstream_ptr = std::shared_ptr<OutputVStreamImpl>(new OutputVStreamImpl(vstream_info, quant_infos, vstream_params, std::move(pipeline_entry),
+        std::move(pipeline), std::move(pipeline_status), pipeline_latency_accumulator, std::move(core_op_activated_event), status));
+    CHECK_SUCCESS_AS_EXPECTED(status, "Failed to create virtual stream");
+
+    return vstream_ptr;
 }
 
-Expected<std::shared_ptr<HwWriteElement>> HwWriteElement::create(std::shared_ptr<InputStreamBase> stream, const std::string &name,
-    hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    PipelineDirection pipeline_direction)
+std::string OutputVStreamInternal::get_pipeline_description() const
 {
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-
-    auto got_flush_event = Event::create_shared(Event::State::not_signalled);
-    CHECK_EXPECTED(got_flush_event);
+    // We save elements in a reverse order for destruction order, so we reverse again befor printing.
+    std::vector<std::shared_ptr<PipelineElement>> reversed_pipeline;
+    std::reverse_copy(m_pipeline.begin(), m_pipeline.end(), std::back_inserter(reversed_pipeline));
 
-    // On HwWriteElement the stream always owns the buffer, hence, we set the mode explicitly.
-    auto status = stream->set_buffer_mode(StreamBufferMode::OWNING);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    auto hw_write_elem_ptr = make_shared_nothrow<HwWriteElement>(stream, name,
-        duration_collector.release(), std::move(pipeline_status), got_flush_event.release(), pipeline_direction);
-    CHECK_AS_EXPECTED(nullptr != hw_write_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    std::stringstream pipeline_str;
+    pipeline_str << "Output pipeline '" << name() << "': HW";
+    for (const auto &element : reversed_pipeline) {
+        pipeline_str << " >> " << element->description();
+    }
+    return pipeline_str.str();
+}
 
-    LOGGER__INFO("Created {}", hw_write_elem_ptr->name());
+OutputVStreamImpl::OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
+    const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
+    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator,
+    EventPtr core_op_activated_event, hailo_status &output_status) :
+    OutputVStreamInternal(vstream_info, quant_infos, vstream_params, pipeline_entry, std::move(pipeline), std::move(pipeline_status),
+        pipeline_latency_accumulator, std::move(core_op_activated_event), output_status)
+{
+    if (HAILO_SUCCESS != output_status) {
+        return;
+    }
 
-    return hw_write_elem_ptr;
+    LOGGER__INFO("Creating {}...", name());
 }
 
-HwWriteElement::HwWriteElement(std::shared_ptr<InputStreamBase> stream, const std::string &name, DurationCollector &&duration_collector,
-                               std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction) :
-    SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr),
-    m_stream(stream), m_got_flush_event(got_flush_event)
-{}
-
-Expected<PipelineBuffer> HwWriteElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+OutputVStreamImpl::~OutputVStreamImpl()
 {
-    return make_unexpected(HAILO_INVALID_OPERATION);
+    (void)stop_vstream();
 }
 
-hailo_status HwWriteElement::run_push(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
+hailo_status OutputVStreamImpl::read(MemoryView buffer)
 {
-    if (PipelineBuffer::Type::FLUSH == buffer.get_type()) {
-        hailo_status flush_status = m_stream->flush();
-        if (HAILO_STREAM_ABORTED_BY_USER == flush_status) {
-            LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string());
-        } else if (HAILO_SUCCESS != flush_status) {
-            LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status);
+    if (nullptr != m_core_op_activated_event) {
+        CHECK(m_is_activated, HAILO_VSTREAM_PIPELINE_NOT_ACTIVATED, "read() failed! Virtual stream {} is not activated!", name());
+        auto status = m_core_op_activated_event->wait(std::chrono::milliseconds(0));
+        if (HAILO_TIMEOUT == status) {
+            LOGGER__INFO("Trying to read from vstream {} before its network_group is activated", name());
+            return HAILO_NETWORK_GROUP_NOT_ACTIVATED;
         }
-        hailo_status status = m_got_flush_event->signal();
         CHECK_SUCCESS(status);
-        return HAILO_SUCCESS;
     }
 
-    m_duration_collector.start_measurement();
-    const auto status = m_stream->write(MemoryView(buffer.data(), buffer.size()));
-    m_duration_collector.complete_measurement();
-
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("Failed to send on input stream {} because stream was aborted", m_stream->to_string());
-        return HAILO_STREAM_ABORTED_BY_USER;
+    assert(1 == m_entry_element->sources().size());
+    auto recv_buffer = m_entry_element->sources()[0].run_pull(PipelineBuffer(buffer, [](hailo_status){},  HAILO_SUCCESS, false, nullptr, m_measure_pipeline_latency));
+    auto status = recv_buffer.status();
+    if (HAILO_SHUTDOWN_EVENT_SIGNALED == status) {
+        LOGGER__INFO("Receiving to VStream was shutdown!");
+        status = m_pipeline_status->load();
     }
-    CHECK_SUCCESS(status, "{} (H2D) failed with status={}", name(), status);
-
-    return HAILO_SUCCESS;
-}
-
-void HwWriteElement::run_push_async(PipelineBuffer &&/*buffer*/, const PipelinePad &/*sink*/)
-{
-    LOGGER__ERROR("run_push_async is not supported for {}", name());
-    assert(false);
-}
 
-hailo_status HwWriteElement::execute_activate()
-{
-    return HAILO_SUCCESS;
+    return status;
 }
 
-hailo_status HwWriteElement::execute_deactivate()
+hailo_status OutputVStreamImpl::set_nms_score_threshold(float32_t threshold)
 {
-    // The flush operation will block until all buffers currently in the pipeline will be processed.
-    // We assume that no buffers are sent after the call for deactivate.
-    hailo_status flush_status = m_stream->flush();
-    if (HAILO_STREAM_ABORTED_BY_USER == flush_status) {
-        LOGGER__INFO("Failed flushing input stream {} because stream was aborted", m_stream->to_string());
-        return HAILO_SUCCESS;
-    } else if (HAILO_STREAM_NOT_ACTIVATED == flush_status) {
-        LOGGER__INFO("Failed flushing input stream {} because stream is not activated", m_stream->to_string());
-        return HAILO_SUCCESS;
-    } else if (HAILO_SUCCESS != flush_status) {
-        LOGGER__ERROR("flush has failed in {} with status {}", name(), flush_status);
+    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
+    for (auto &elem : m_pipeline) {
+        auto elem_status = elem->set_nms_score_threshold(threshold);
+        if (HAILO_SUCCESS == elem_status) {
+            status = elem_status; // 1 element is enough to call this setter successful
+        }
     }
+    CHECK_SUCCESS(status, "Unable to set NMS score threshold in {}", name());
 
-    auto abort_status = execute_abort();
-    CHECK(((abort_status == HAILO_SUCCESS) || (abort_status == HAILO_STREAM_NOT_ACTIVATED)), abort_status,
-        "Failed to abort stream in {}", name());
     return HAILO_SUCCESS;
 }
 
-hailo_status HwWriteElement::execute_post_deactivate(bool should_clear_abort)
+hailo_status OutputVStreamImpl::set_nms_iou_threshold(float32_t threshold)
 {
-    if (should_clear_abort) {
-        auto status = execute_clear_abort();
-        CHECK(((status == HAILO_SUCCESS) || (status == HAILO_STREAM_NOT_ACTIVATED)), status,
-            "Failed to clear abort stream in {}", name());
+    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
+    for (auto &elem : m_pipeline) {
+        auto elem_status = elem->set_nms_iou_threshold(threshold);
+        if (HAILO_SUCCESS == elem_status) {
+            status = elem_status; // 1 element is enough to call this setter successful
+        }
     }
-    return HAILO_SUCCESS;
-}
+    CHECK_SUCCESS(status, "Unable to set NMS IoU threshold in {}", name());
 
-hailo_status HwWriteElement::execute_clear()
-{
     return HAILO_SUCCESS;
 }
 
-hailo_status HwWriteElement::execute_flush()
+hailo_status OutputVStreamImpl::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
 {
-    hailo_status status = m_got_flush_event->wait(m_stream->get_timeout());
-    CHECK_SUCCESS(status);
-
-    status = m_got_flush_event->reset();
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
+    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
+    std::shared_ptr<UserBufferQueueElement> user_buffer_queue_element = nullptr;
+    for (auto &elem : m_pipeline) {
+        if (nullptr != std::dynamic_pointer_cast<UserBufferQueueElement>(elem)) {
+            user_buffer_queue_element = std::dynamic_pointer_cast<UserBufferQueueElement>(elem);
+        }
 
-hailo_status HwWriteElement::execute_abort()
-{
-    return m_stream->abort_impl();
-}
+        auto elem_status = elem->set_nms_max_proposals_per_class(max_proposals_per_class);
+        if (HAILO_SUCCESS == elem_status) {
+            status = elem_status; // 1 element is enough to call this setter successful
 
-hailo_status HwWriteElement::execute_clear_abort()
-{
-    return m_stream->clear_abort_impl();
-}
+            // Update vstream info and frame size
+            m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
+            auto set_buffer_size_status = user_buffer_queue_element->set_buffer_pool_buffer_size(HailoRTCommon::get_frame_size(m_vstream_info,
+                m_vstream_params.user_buffer_format));
+            CHECK_SUCCESS(set_buffer_size_status, "Failed to update buffer size in {}", name());
+        }
+    }
+    CHECK_SUCCESS(status, "Unable to set NMS max proposals per class in {}", name());
 
-hailo_status HwWriteElement::execute_wait_for_finish()
-{
     return HAILO_SUCCESS;
 }
 
-std::string HwWriteElement::description() const
-{
-    std::stringstream element_description;
-    element_description << "(" << this->name() << " | hw_frame_size: " << m_stream->get_info().hw_frame_size << ")";   
-
-    return element_description.str();
-}
-
-Expected<std::shared_ptr<LastAsyncElement>> LastAsyncElement::create(const std::string &name,
-    hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction)
+hailo_status OutputVStreamImpl::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size)
 {
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
+    auto status = HAILO_INVALID_OPERATION; // Assuming there is no valid element
+    std::shared_ptr<UserBufferQueueElement> user_buffer_queue_element = nullptr;
+    for (auto &elem : m_pipeline) {
+        if (nullptr != std::dynamic_pointer_cast<UserBufferQueueElement>(elem)) {
+            user_buffer_queue_element = std::dynamic_pointer_cast<UserBufferQueueElement>(elem);
+        }
 
-    auto last_async_elem_ptr = make_shared_nothrow<LastAsyncElement>(name,
-        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline);
-    CHECK_NOT_NULL_AS_EXPECTED(last_async_elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+        auto elem_status = elem->set_nms_max_accumulated_mask_size(max_accumulated_mask_size);
+        if (HAILO_SUCCESS == elem_status) {
+            status = elem_status; // 1 element is enough to call this setter successful
 
-    LOGGER__INFO("Created {}", last_async_elem_ptr->name());
+            // Update vstream info and frame size
+            m_vstream_info.nms_shape.max_accumulated_mask_size = max_accumulated_mask_size;
+            auto set_buffer_size_status = user_buffer_queue_element->set_buffer_pool_buffer_size(HailoRTCommon::get_frame_size(m_vstream_info,
+                m_vstream_params.user_buffer_format));
+            CHECK_SUCCESS(set_buffer_size_status, "Failed to update buffer size in {}", name());
+        }
+    }
+    CHECK_SUCCESS(status, "Unable to set NMS max accumulated mask size in {}", name());
 
-    return last_async_elem_ptr;
-}
 
-Expected<std::shared_ptr<LastAsyncElement>> LastAsyncElement::create(const std::string &name,
-    const ElementBuildParams &build_params, std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction)
-{
-    return LastAsyncElement::create(name, build_params.elem_stats_flags,
-        build_params.pipeline_status, async_pipeline, pipeline_direction);
+    return HAILO_SUCCESS;
 }
 
-LastAsyncElement::LastAsyncElement(const std::string &name, DurationCollector &&duration_collector,
-                               std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-                               PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline):
-    SinkElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline)
-{}
-
-Expected<PipelineBuffer> LastAsyncElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+#ifdef HAILO_SUPPORT_MULTI_PROCESS
+Expected<std::shared_ptr<OutputVStreamClient>> OutputVStreamClient::create(const VStreamIdentifier &&identifier)
 {
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
+    grpc::ChannelArguments ch_args;
+    ch_args.SetMaxReceiveMessageSize(-1);
+    auto channel = grpc::CreateCustomChannel(hailort::HAILORT_SERVICE_ADDRESS, grpc::InsecureChannelCredentials(), ch_args);
+    CHECK_AS_EXPECTED(channel != nullptr, HAILO_INTERNAL_FAILURE);
 
-hailo_status LastAsyncElement::run_push(PipelineBuffer &&/*optional*/, const PipelinePad &/*sink*/)
-{
-    return HAILO_INVALID_OPERATION;
-}
+    auto client = make_unique_nothrow<HailoRtRpcClient>(channel);
+    CHECK_AS_EXPECTED(client != nullptr, HAILO_OUT_OF_HOST_MEMORY);
 
-void LastAsyncElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &/*sink*/)
-{
-    auto exec_done_cb = buffer.get_exec_done_cb();
-    exec_done_cb(buffer.action_status());
-}
+    auto user_buffer_format = client->OutputVStream_get_user_buffer_format(identifier);
+    CHECK_EXPECTED(user_buffer_format);
 
-std::string LastAsyncElement::description() const
-{
-    std::stringstream element_description;
-    element_description << "(" << this->name() << ")";
+    auto info = client->OutputVStream_get_info(identifier);
+    CHECK_EXPECTED(info);
 
-    return element_description.str();
+    return std::shared_ptr<OutputVStreamClient>(new OutputVStreamClient(std::move(client), std::move(identifier),
+        user_buffer_format.release(), info.release()));
 }
 
-hailo_status LastAsyncElement::execute_activate()
-{
-    return HAILO_SUCCESS;
-}
+OutputVStreamClient::OutputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, const VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
+    hailo_vstream_info_t &&info) :
+        m_client(std::move(client)), m_identifier(std::move(identifier)), m_user_buffer_format(user_buffer_format), m_info(info) {}
 
-hailo_status LastAsyncElement::execute_wait_for_finish()
+OutputVStreamClient::~OutputVStreamClient()
 {
-    return HAILO_SUCCESS;
+    auto reply = m_client->OutputVStream_release(m_identifier, OsUtils::get_curr_pid());
+    if (reply != HAILO_SUCCESS) {
+        LOGGER__CRITICAL("OutputVStream_release failed!");
+    }
 }
 
-hailo_status LastAsyncElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
+hailo_status OutputVStreamClient::read(MemoryView buffer)
 {
-    (void)source_name;
-    return m_sinks[0].prev()->element().enqueue_execution_buffer(mem_view, exec_done, m_sinks[0].prev()->name());
+    return m_client->OutputVStream_read(m_identifier, buffer);
 }
 
-Expected<bool> LastAsyncElement::can_push_buffer_upstream(const uint32_t /*source_index*/)
+hailo_status OutputVStreamClient::abort()
 {
-    auto source_index = m_sinks[0].prev()->element().get_source_index_from_source_name(m_sinks[0].prev()->name());
-    CHECK_EXPECTED(source_index);
-    return m_sinks[0].prev()->element().can_push_buffer_upstream(*source_index);
+    auto expected_client = HailoRtRpcClientUtils::create_client();
+    CHECK_EXPECTED_AS_STATUS(expected_client);
+    auto abort_client = expected_client.release();
+    return abort_client->OutputVStream_abort(m_identifier);
 }
 
-hailo_status LastAsyncElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t /*source_index*/)
+hailo_status OutputVStreamClient::resume()
 {
-    auto source_index = m_sinks[0].prev()->element().get_source_index_from_source_name(m_sinks[0].prev()->name());
-    CHECK_EXPECTED_AS_STATUS(source_index);
-    return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
+    return m_client->OutputVStream_resume(m_identifier);
 }
 
-Expected<bool> LastAsyncElement::can_push_buffer_upstream(const std::string &/*source_name*/)
+hailo_status OutputVStreamClient::stop_and_clear()
 {
-    return m_sinks[0].prev()->element().can_push_buffer_upstream(m_sinks[0].prev()->name());
-}
+    auto expected_client = HailoRtRpcClientUtils::create_client();
+    CHECK_EXPECTED_AS_STATUS(expected_client);
+    auto stop_and_clear_client = expected_client.release();
 
-hailo_status LastAsyncElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &/*source_name*/)
-{
-    return m_sinks[0].prev()->element().fill_buffer_pool(is_dma_able, num_of_buffers, m_sinks[0].prev()->name());
+    return stop_and_clear_client->OutputVStream_stop_and_clear(m_identifier);
 }
 
-Expected<std::shared_ptr<AsyncHwElement>> AsyncHwElement::create(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
-    std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
-    hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, const std::string &name,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<ConfiguredNetworkGroup> net_group,
-    PipelineDirection pipeline_direction, bool is_last_copy_element, std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status OutputVStreamClient::start_vstream()
 {
-    std::vector<BufferPoolPtr> output_streams_pools;
-    for (const auto &stream_info_pair : named_stream_infos) {
-        if (HAILO_D2H_STREAM == stream_info_pair.second.direction) {
-            auto buffer_pool = BufferPool::create(stream_info_pair.second.hw_frame_size, buffer_pool_size, shutdown_event, elem_flags, vstream_flags,
-                is_last_copy_element);
-            CHECK_EXPECTED(buffer_pool);
-            output_streams_pools.emplace_back(buffer_pool.release());
-        }
-    }
-
-    auto duration_collector = DurationCollector::create(elem_flags);
-    CHECK_EXPECTED(duration_collector);
-
-    auto min_buffer_pool_size = net_group->get_min_buffer_pool_size();
-    CHECK_EXPECTED(min_buffer_pool_size);
-
-    auto elem_ptr = make_shared_nothrow<AsyncHwElement>(named_stream_infos, timeout, std::move(output_streams_pools), name,
-        duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, net_group,
-        min_buffer_pool_size.release());
-    CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    LOGGER__INFO("Created {}", elem_ptr->name());
+    auto expected_client = HailoRtRpcClientUtils::create_client();
+    CHECK_EXPECTED_AS_STATUS(expected_client);
+    auto start_vstream_client = expected_client.release();
 
-    return elem_ptr;
+    return start_vstream_client->OutputVStream_start_vstream(m_identifier);
 }
 
-AsyncHwElement::AsyncHwElement(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::chrono::milliseconds timeout,
-    std::vector<BufferPoolPtr> &&output_streams_pools, const std::string &name, DurationCollector &&duration_collector,
-    std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
-    std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<ConfiguredNetworkGroup> net_group,
-    const size_t max_ongoing_transfers) :
-        PipelineElementInternal(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, async_pipeline),
-        m_timeout(timeout),
-        m_pools(std::move(output_streams_pools)),
-        m_net_group(net_group),
-        m_max_ongoing_transfers(max_ongoing_transfers)
+size_t OutputVStreamClient::get_frame_size() const
 {
-    uint32_t sinks_count = 0;
-    uint32_t sources_count = 0;
-    for (const auto &stream_info_pair : named_stream_infos) {
-        if (HAILO_D2H_STREAM == stream_info_pair.second.direction) {
-            m_sources.emplace_back(*this, name, PipelinePad::Type::SOURCE);
-            const auto &source_name = m_sources[sources_count++].name();
-            m_source_name_to_stream_name[source_name] = stream_info_pair.first;
-
-            m_source_name_to_index[source_name] = static_cast<uint32_t>(m_sources.size() - 1);
-        } else {
-            m_sinks.emplace_back(*this, name, PipelinePad::Type::SINK);
-            const auto &sink_name = m_sinks[sinks_count++].name();
-            m_sink_name_to_stream_name[sink_name] = stream_info_pair.first;
-            m_sink_name_to_index[sink_name] = static_cast<uint32_t>(m_sinks.size() - 1);
-            m_sink_has_arrived[sink_name] = false;
-        }
+    auto frame_size =  m_client->OutputVStream_get_frame_size(m_identifier);
+    if (!frame_size) {
+        LOGGER__CRITICAL("OutputVStream_get_frame_size failed with status={}", frame_size.status());
+        return 0;
     }
+    return frame_size.release();
 }
 
-bool AsyncHwElement::has_all_sinks_arrived()
+const hailo_vstream_info_t &OutputVStreamClient::get_info() const
 {
-    for (const auto &current_sink : m_sink_has_arrived) {
-        if (!current_sink.second) {
-            return false;
-        }
-    }
-    return true;
+    return m_info;
 }
 
-// This func overides the regular dataflow of this element and calls all next elements run_push_async directly
-// (normally, the run_push_async of the next elements will be called by the LL async read_done)
-void AsyncHwElement::handle_error_in_hw_async_elem(hailo_status error_status)
+const hailo_format_t &OutputVStreamClient::get_user_buffer_format() const
 {
-    for (auto &name_output_stream_pair : m_source_name_to_index) {
-        auto source_index = name_output_stream_pair.second;
-        assert(source_index < m_pools.size());
-        assert(source_index < m_sources.size());
-        auto expected_buffer = m_pools[source_index]->acquire_buffer_ptr(m_timeout);
-        if (HAILO_SUCCESS == expected_buffer.status()) {
-            expected_buffer->get()->set_action_status(error_status);
-            m_sources[source_index].next()->run_push_async(std::move(*expected_buffer.value()));
-        } else {
-            m_sources[source_index].next()->run_push_async(PipelineBuffer(error_status));
-        }
-    }
-
-    for (const auto &sink : m_sinks) {
-        m_sink_has_arrived[sink.name()] = false;
-    }
-    m_input_buffers.clear();
-
-    return;
+    return m_user_buffer_format;
 }
 
-void AsyncHwElement::run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink)
+std::string OutputVStreamClient::name() const
 {
-    assert(contains(m_sink_name_to_stream_name, sink.name()));
-
-    std::unique_lock<std::mutex> lock(m_mutex);
-    m_sink_has_arrived[sink.name()] = true;
-    m_input_buffers[sink.name()] = std::move(buffer);
-
-    if (has_all_sinks_arrived()) {
-        hailo_status all_buffers_status = HAILO_SUCCESS;
-        for (auto &input_buffer : m_input_buffers) {
-            if (HAILO_SUCCESS != input_buffer.second.action_status()) {
-                all_buffers_status = input_buffer.second.action_status();
-                break;  // error from one buffer is enough
-            }
-        }
-
-        if (HAILO_SUCCESS != all_buffers_status) {
-            handle_error_in_hw_async_elem(all_buffers_status);
-            // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-            lock.unlock();
-            m_cv.notify_all();
-        } else {
-            std::unordered_map<std::string, std::shared_ptr<PipelineBuffer>> source_name_to_output_buffer;
-            for (auto &name_to_index_pair : m_source_name_to_index) {
-                auto expected_buffer = m_pools[name_to_index_pair.second]->acquire_buffer_ptr(m_timeout);
-                if (HAILO_SUCCESS != expected_buffer.status()) {
-                    handle_non_recoverable_async_error(expected_buffer.status());
-                    m_input_buffers.clear();
-                    // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-                    lock.unlock();
-                    m_cv.notify_all();
-                    return;
-                }
-                source_name_to_output_buffer[name_to_index_pair.first] = expected_buffer.release();
-            }
-
-            NamedBuffersCallbacks named_buffers_callbacks;
-
-            for (auto &input_buffer : m_input_buffers) {
-                const auto &stream_name = m_sink_name_to_stream_name.at(input_buffer.first);
-                named_buffers_callbacks.emplace(stream_name, std::make_pair(input_buffer.second.as_view(), input_buffer.second.get_exec_done_cb()));
-            }
-
-            for (auto &output_buffer : source_name_to_output_buffer) {
-                const auto &stream_name = m_source_name_to_stream_name.at(output_buffer.first);
-                named_buffers_callbacks.emplace(stream_name, std::make_pair(output_buffer.second->as_view(),
-                    [this, buffer = output_buffer.second, source_name = output_buffer.first](hailo_status status){
-                        buffer->set_action_status(status);
-                        if (HAILO_SUCCESS == m_pipeline_status->load()) {
-                            assert(contains(m_source_name_to_index, source_name));
-                            // If pipeline_status is not success, someone already handled this error and no reason for this buffer to be pushed
-                            assert(contains(m_source_name_to_index, source_name));
-                            m_sources[m_source_name_to_index[source_name]].next()->run_push_async(std::move(*buffer));
-                        }
-                }));
-            }
-
-            auto done_cb = [](hailo_status){};
-            auto status = m_net_group->wait_for_callbacks_to_maintain_below_threshold(m_max_ongoing_transfers);
-            if (HAILO_SUCCESS != status ) {
-                handle_non_recoverable_async_error(status);
-            }
-
-            status = m_net_group->infer_async(named_buffers_callbacks, done_cb);
-            if (HAILO_SUCCESS != status ) {
-                handle_non_recoverable_async_error(status);
-            }
-
-            for (const auto &curr_sink : m_sinks) {
-                m_sink_has_arrived[curr_sink.name()] = false;
-            }
-            m_input_buffers.clear();
-
-            // Manual unlocking is done before notifying, to avoid waking up the waiting thread only to block again
-            lock.unlock();
-            m_cv.notify_all();
-        }
-    } else {
-        bool done = m_cv.wait_for(lock, m_timeout, [&](){
-            if (m_pipeline_status->load() != HAILO_SUCCESS) {
-                return true; // so we can exit this flow
-            }
-            return !m_sink_has_arrived[sink.name()];
-        });
-
-        if (!done) {
-            LOGGER__ERROR("Waiting for other threads in AsyncHwElement {} has reached a timeout (timeout={}ms)", name(), m_timeout.count());
-            handle_non_recoverable_async_error(HAILO_TIMEOUT);
-        }
-
-        if (m_pipeline_status->load() == HAILO_STREAM_ABORTED_BY_USER) {
-            lock.unlock();
-            m_cv.notify_all();
-        }
+    auto expected_name = m_client->OutputVStream_name(m_identifier);
+    if (!expected_name) {
+        LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status());
+        return "";
     }
+    return expected_name.release();
 }
 
-hailo_status AsyncHwElement::run_push(PipelineBuffer &&/*optional*/, const PipelinePad &/*sink*/)
-{
-    return HAILO_INVALID_OPERATION;
-}
-
-hailo_status AsyncHwElement::enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name)
+std::string OutputVStreamClient::network_name() const
 {
-    CHECK(contains(m_source_name_to_index, source_name), HAILO_INTERNAL_FAILURE);
-    auto source_index = m_source_name_to_index[source_name];
-
-    auto status = m_pools[source_index]->enqueue_buffer(mem_view, exec_done);
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
+    auto expected_name = m_client->OutputVStream_network_name(m_identifier);
+    if (!expected_name) {
+        LOGGER__CRITICAL("OutputVStream_name failed with status={}", expected_name.status());
+        return "";
+    }
+    return expected_name.release();
 }
 
-hailo_status AsyncHwElement::execute_dequeue_user_buffers(hailo_status error_status)
+const std::map<std::string, AccumulatorPtr> &OutputVStreamClient::get_fps_accumulators() const
 {
-    for (auto pool : m_pools) {
-        auto status = empty_buffer_pool(pool, error_status, m_timeout);
-        CHECK_SUCCESS(status);
-    }
-    return PipelineElement::execute_dequeue_user_buffers(error_status);
+    LOGGER__ERROR("OutputVStream::get_fps_accumulators function is not supported when using multi-process service");
+    return m_fps_accumulators;
 }
-
-Expected<bool> AsyncHwElement::can_push_buffer_upstream(const uint32_t source_index)
+const std::map<std::string, AccumulatorPtr> &OutputVStreamClient::get_latency_accumulators() const
 {
-    CHECK_AS_EXPECTED(source_index < m_pools.size(), HAILO_NOT_FOUND);
-    return !m_pools[source_index]->is_full();
+    LOGGER__ERROR("OutputVStream::get_latency_accumulators functoin is not supported when using multi-process service");
+    return m_latency_accumulators;
 }
 
-hailo_status AsyncHwElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index)
+const std::map<std::string, std::vector<AccumulatorPtr>> &OutputVStreamClient::get_queue_size_accumulators() const
 {
-    CHECK(source_index < m_pools.size(), HAILO_NOT_FOUND);
-    CHECK_SUCCESS(m_pools[source_index]->allocate_buffers(is_dma_able, num_of_buffers));
-
-    return HAILO_SUCCESS;
+    LOGGER__ERROR("OutputVStream::get_queue_size_accumulators function is not supported when using multi-process service");
+    return m_queue_size_accumulators;
 }
-
-Expected<bool> AsyncHwElement::can_push_buffer_upstream(const std::string &source_name)
+AccumulatorPtr OutputVStreamClient::get_pipeline_latency_accumulator() const
 {
-    auto source_index = get_source_index_from_source_name(source_name);
-    CHECK_EXPECTED(source_index);
-    return can_push_buffer_upstream(*source_index);
+    LOGGER__ERROR("OutputVStream::get_pipeline_latency_accumulator function is not supported when using multi-process service");
+    return m_pipeline_latency_accumulator;
 }
-
-hailo_status AsyncHwElement::fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name)
+const std::vector<std::shared_ptr<PipelineElement>> &OutputVStreamClient::get_pipeline() const
 {
-    auto source_index = get_source_index_from_source_name(source_name);
-    CHECK_EXPECTED_AS_STATUS(source_index);
-    return fill_buffer_pool(is_dma_able, num_of_buffers, *source_index);
+    LOGGER__ERROR("OutputVStream::get_pipeline function is not supported when using multi-process service");
+    return m_pipeline;
 }
 
-Expected<uint32_t> AsyncHwElement::get_source_index_from_output_stream_name(const std::string &output_stream_name)
+hailo_status OutputVStreamClient::create_client()
 {
-    for (const auto &name_pair : m_source_name_to_stream_name) {
-        if (name_pair.second == output_stream_name) {
-            assert(contains(m_source_name_to_index, name_pair.first));
-            uint32_t ret_val = m_source_name_to_index.at(name_pair.first);
-            return ret_val;
-        }
-    }
-    return make_unexpected(HAILO_NOT_FOUND);
+    auto expected_client = HailoRtRpcClientUtils::create_client();
+    CHECK_EXPECTED_AS_STATUS(expected_client);
+    m_client = expected_client.release();
+    return HAILO_SUCCESS;
 }
 
-Expected<uint32_t> AsyncHwElement::get_source_index_from_source_name(const std::string &source_name)
+hailo_status OutputVStreamClient::before_fork()
 {
-    CHECK_AS_EXPECTED(contains(m_source_name_to_index, source_name), HAILO_NOT_FOUND, "couldnt find src '{}'", source_name);
-    auto ret_val = m_source_name_to_index.at(source_name);
-    return ret_val;
+    m_client.reset();
+    return HAILO_SUCCESS;
 }
 
-Expected<uint32_t> AsyncHwElement::get_sink_index_from_input_stream_name(const std::string &input_stream_name)
+hailo_status OutputVStreamClient::after_fork_in_parent()
 {
-    for (const auto &name_pair : m_sink_name_to_stream_name) {
-        if (name_pair.second == input_stream_name) {
-            return Expected<uint32_t>(m_sink_name_to_index.at(name_pair.first));
-        }
-    }
-    return make_unexpected(HAILO_INVALID_ARGUMENT);
+    return create_client();
 }
 
-Expected<PipelineBuffer> AsyncHwElement::run_pull(PipelineBuffer &&/*optional*/, const PipelinePad &/*source*/)
+hailo_status OutputVStreamClient::after_fork_in_child()
 {
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+    return create_client();
 }
 
-std::vector<PipelinePad*> AsyncHwElement::execution_pads()
+bool OutputVStreamClient::is_aborted()
 {
-    std::vector<PipelinePad*> result;
-    result.reserve(m_sources.size());
-    for (auto& pad : m_sources) {
-        result.push_back(pad.next());
+    auto is_aborted_exp = m_client->OutputVStream_is_aborted(m_identifier);
+    if (!is_aborted_exp) {
+        LOGGER__CRITICAL("OutputVStream_is_aborted failed with status={}", is_aborted_exp.status());
+        return true;
     }
-    return result;
+    return is_aborted_exp.release();
 }
 
-hailo_status AsyncHwElement::execute_terminate(hailo_status error_status)
+hailo_status OutputVStreamClient::set_nms_score_threshold(float32_t threshold)
 {
-    if (m_is_terminated) {
-        return HAILO_SUCCESS;
-    }
-
-    if (!m_is_terminating_element) {
-        {
-            // There is a case where the other thread is halted (via context switch) before the wait_for() function,
-            // then we call notify_all() here, and then the wait_for() is called - resulting in a timeout.
-            // notify_all() only works on threads which are already waiting, so that's why we acquire the lock here.
-            std::unique_lock<std::mutex> lock(m_mutex);
-        }
-        m_cv.notify_all();
-    }
+    auto expected_client = HailoRtRpcClientUtils::create_client();
+    CHECK_EXPECTED_AS_STATUS(expected_client);
+    auto vstream_client = expected_client.release();
 
-    // Checking success of shutdown is best effort (terminate should be called even if shutdown fails)
-    auto shutdown_status = m_net_group->shutdown();
-    auto wait_for_callbacks_finish_status = m_net_group->wait_for_callbacks_finish();
-    auto terminate_status = PipelineElement::execute_terminate(error_status);
-    CHECK_SUCCESS(shutdown_status);
-    CHECK_SUCCESS(wait_for_callbacks_finish_status);
-    CHECK_SUCCESS(terminate_status);
+    CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_score_threshold(m_identifier, threshold));
 
     return HAILO_SUCCESS;
 }
 
-Expected<std::shared_ptr<CopyBufferElement>> CopyBufferElement::create(const std::string &name,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout, PipelineDirection pipeline_direction,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+hailo_status OutputVStreamClient::set_nms_iou_threshold(float32_t threshold)
 {
-    auto duration_collector = DurationCollector::create(HAILO_PIPELINE_ELEM_STATS_NONE);
-    CHECK_EXPECTED(duration_collector);
-    auto elem_ptr = make_shared_nothrow<CopyBufferElement>(name, duration_collector.release(), std::move(pipeline_status),
-        timeout, pipeline_direction, async_pipeline);
-    CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    auto expected_client = HailoRtRpcClientUtils::create_client();
+    CHECK_EXPECTED_AS_STATUS(expected_client);
+    auto vstream_client = expected_client.release();
 
-    LOGGER__INFO("Created {}", elem_ptr->name());
+    CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_iou_threshold(m_identifier, threshold));
 
-    return elem_ptr;
+    return HAILO_SUCCESS;
 }
 
-CopyBufferElement::CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, 
-                                     std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::chrono::milliseconds timeout,
-                                     PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
-    FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, nullptr, timeout, async_pipeline)
-{}
-
-PipelinePad &CopyBufferElement::next_pad()
+hailo_status OutputVStreamClient::set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
 {
-    if (PipelineDirection::PUSH == m_pipeline_direction){
-        return *m_sources[0].next();
-    }
-    return *m_sinks[0].prev();
+    auto expected_client = HailoRtRpcClientUtils::create_client();
+    CHECK_EXPECTED_AS_STATUS(expected_client);
+    auto vstream_client = expected_client.release();
+
+    CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_max_proposals_per_class(m_identifier, max_proposals_per_class));
+    m_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
+
+    return HAILO_SUCCESS;
 }
 
-Expected<PipelineBuffer> CopyBufferElement::action(PipelineBuffer &&input, PipelineBuffer &&optional)
+hailo_status OutputVStreamClient::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size)
 {
-    CHECK_AS_EXPECTED(optional, HAILO_INVALID_ARGUMENT, "Optional buffer must be passed to CopyBufferElement!");
+    auto expected_client = HailoRtRpcClientUtils::create_client();
+    CHECK_EXPECTED_AS_STATUS(expected_client);
+    auto vstream_client = expected_client.release();
 
-    CHECK_AS_EXPECTED(optional.size() == input.size(), HAILO_INVALID_ARGUMENT, "Optional buffer size does not equal to the input buffer size!");
-    memcpy(optional.data(), input.data(), optional.size());
+    CHECK_SUCCESS(vstream_client->OutputVStream_set_nms_max_accumulated_mask_size(m_identifier, max_accumulated_mask_size));
+    m_info.nms_shape.max_accumulated_mask_size = max_accumulated_mask_size;
 
-    return std::move(optional);
+    return HAILO_SUCCESS;
 }
 
+#endif // HAILO_SUPPORT_MULTI_PROCESS
+
 Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> VStreamsBuilder::create_vstreams(
     ConfiguredNetworkGroup &net_group, bool /*unused*/, hailo_format_type_t format_type,
     const std::string &network_name)
@@ -3273,15 +1472,6 @@ Expected<std::pair<std::vector<InputVStream>, std::vector<OutputVStream>>> VStre
             expected_all_inputs.release(), expected_all_outputs.release());
 }
 
-static hailo_vstream_params_t expand_vstream_params_autos(const hailo_stream_info_t &stream_info,
-    const hailo_vstream_params_t &vstream_params)
-{
-    auto local_vstream_params = vstream_params;
-    local_vstream_params.user_buffer_format = HailoRTDefaults::expand_auto_format(vstream_params.user_buffer_format,
-        stream_info.format);
-    return local_vstream_params;
-}
-
 Expected<std::vector<InputVStream>> VStreamsBuilder::create_input_vstreams(ConfiguredNetworkGroup &net_group,
     const std::map<std::string, hailo_vstream_params_t> &inputs_params)
 {
@@ -3294,1201 +1484,4 @@ Expected<std::vector<OutputVStream>> VStreamsBuilder::create_output_vstreams(Con
     return net_group.create_output_vstreams(outputs_params);
 }
 
-Expected<std::vector<InputVStream>> VStreamsBuilderUtils::create_inputs(
-    std::vector<std::shared_ptr<InputStreamBase>> input_streams, const hailo_vstream_info_t &vstream_info,
-    const hailo_vstream_params_t &vstream_params)
-{
-    CHECK_AS_EXPECTED(!input_streams.empty(), HAILO_INVALID_ARGUMENT, "input streams can't be empty");
-    // if input streams has more than 1 value, it will be handled by handle_pix_buffer_splitter_flow. For all other purposes,
-    // assuming there is only 1 stream is valid
-    std::shared_ptr<InputStreamBase> input_stream = input_streams.front();
-
-    // TODO (HRT-4522): Support this measurement
-    CHECK_AS_EXPECTED(!(vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
-        "Pipeline FPS statistics measurement is not implemented");
-
-    std::vector<std::shared_ptr<PipelineElement>> elements;
-    std::vector<InputVStream> vstreams;
-
-    EventPtr core_op_activated_event = nullptr;
-    if (!input_stream->is_scheduled()) {
-        core_op_activated_event = input_stream->get_core_op_activated_event();
-    }
-
-    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
-    CHECK_EXPECTED(shutdown_event_exp);
-    auto shutdown_event = shutdown_event_exp.release();
-
-    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
-    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
-
-    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
-    CHECK_EXPECTED(pipeline_latency_accumulator);
-
-    auto user_timeout = std::chrono::milliseconds(vstream_params.timeout_ms);
-
-    if (input_streams.size() > 1) {
-        CHECK_SUCCESS_AS_EXPECTED(handle_pix_buffer_splitter_flow(input_streams, vstream_info,
-            std::move(elements), vstreams, vstream_params, shutdown_event, pipeline_status, core_op_activated_event,
-            pipeline_latency_accumulator.value()));
-    } else {
-        auto hw_write_elem = HwWriteElement::create(input_stream,
-            PipelineObject::create_element_name("HwWriteElement", input_stream->name(), input_stream->get_info().index),
-            vstream_params.pipeline_elements_stats_flags, pipeline_status);
-        CHECK_EXPECTED(hw_write_elem);
-        elements.insert(elements.begin(), hw_write_elem.value());
-
-        auto should_transform = InputTransformContext::is_transformation_required(input_stream->get_info().shape,
-            vstream_params.user_buffer_format, input_stream->get_info().hw_shape, input_stream->get_info().format,
-            input_stream->get_quant_infos());
-        CHECK_EXPECTED(should_transform);
-
-        if (should_transform.value()) {
-            std::shared_ptr<SinkElement> elem_after_post_infer = hw_write_elem.value();
-            auto queue_elem = PushQueueElement::create(
-                PipelineObject::create_element_name("PushQueueElement", input_stream->get_info().name, input_stream->get_info().index),
-                vstream_params, shutdown_event, pipeline_status);
-            CHECK_EXPECTED(queue_elem);
-            elements.insert(elements.begin(), queue_elem.value());
-            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(queue_elem.value(), hw_write_elem.value()));
-
-            auto pre_infer_elem = PreInferElement::create(input_stream->get_info().shape, vstream_params.user_buffer_format,
-                input_stream->get_info().hw_shape, input_stream->get_info().format, input_stream->get_quant_infos(),
-                PipelineObject::create_element_name("PreInferElement", input_stream->get_info().name, input_stream->get_info().index),
-                vstream_params, shutdown_event, pipeline_status);
-            CHECK_EXPECTED(pre_infer_elem);
-            elements.insert(elements.begin(), pre_infer_elem.value());
-            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value()));
-
-            input_stream->set_timeout(user_timeout);
-            auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, pre_infer_elem.release(), hw_write_elem.release(), std::move(elements),
-                std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-            CHECK_EXPECTED(vstream);
-            vstreams.emplace_back(vstream.release());
-        } else {
-            input_stream->set_timeout(user_timeout);
-            auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, hw_write_elem.value(), hw_write_elem.value(), std::move(elements),
-                std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-            CHECK_EXPECTED(vstream);
-            vstreams.emplace_back(vstream.release());
-        }
-    }
-
-    for (const auto &vstream : vstreams) {
-       LOGGER__INFO("{}", vstream.get_pipeline_description());
-    }
-
-    return vstreams;
-}
-
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_outputs(std::shared_ptr<OutputStreamBase> output_stream,
-    NameToVStreamParamsMap &vstreams_params_map, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
-{
-    std::vector<std::shared_ptr<PipelineElement>> elements;
-    std::vector<OutputVStream> vstreams;
-
-    if (0 != (HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_stream->get_info().format.flags))
-    {
-        LOGGER__WARNING("Using legacy implementation of Argmax in host. Please re-compile your model with latest DFC version");
-    }
-
-    EventPtr core_op_activated_event = nullptr;
-    if (!output_stream->is_scheduled()) {
-        core_op_activated_event = output_stream->get_core_op_activated_event();
-    }
-
-    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
-    CHECK_EXPECTED(shutdown_event_exp);
-    auto shutdown_event = shutdown_event_exp.release();
-
-    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
-    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
-
-    assert(!vstreams_params_map.empty());
-
-    // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
-    //       pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags.
-    hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
-    hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE;
-    size_t buffer_pool_size = 0;
-    for (const auto &elem_name_params : vstreams_params_map) {
-        hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
-        hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags;
-        buffer_pool_size += elem_name_params.second.queue_size;
-    }
-
-    // TODO (HRT-4522): Support this measurement
-    CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
-        "Pipeline FPS statistics measurement is not implemented");
-
-    auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event,
-        buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags);
-    CHECK_EXPECTED(hw_read_element);
-
-    if (output_stream->get_info().is_mux) {
-        hailo_status status = add_demux(output_stream, vstreams_params_map, std::move(elements), vstreams, hw_read_element.value(),
-            shutdown_event, pipeline_status, output_vstream_infos);
-        CHECK_SUCCESS_AS_EXPECTED(status);
-    } else {
-        auto vstream_info = output_vstream_infos.find(output_stream->name());
-        CHECK_AS_EXPECTED(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
-            "Failed to find vstream info of {}", output_stream->name());
-        assert(1 == vstreams_params_map.size());
-        auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), vstreams_params_map.begin()->second);
-
-        auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
-        CHECK_EXPECTED(pipeline_latency_accumulator);
-
-        auto should_transform = OutputTransformContext::is_transformation_required(output_stream->get_info().hw_shape, 
-            output_stream->get_info().format, output_stream->get_info().shape, 
-            vstream_params.user_buffer_format, output_stream->get_quant_infos());
-        CHECK_EXPECTED(should_transform);
-
-        if (should_transform.value()) {
-            auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read",
-                shutdown_event, vstream_params);
-            CHECK_EXPECTED(hw_read_queue_element);
-            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
-
-            auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements,
-                "PostInferElement", vstream_params, shutdown_event);
-            CHECK_EXPECTED(post_infer_element);
-            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value()));
-            auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
-                "UserBufferQueueElement", shutdown_event, vstream_params);
-            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), user_buffer_queue_element.value()));
-            output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-            hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-            auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
-                std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-            CHECK_EXPECTED(vstream);
-            vstreams.emplace_back(vstream.release());
-        } else {
-            output_stream->set_timeout(std::chrono::milliseconds(vstream_params.timeout_ms));
-            auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, hw_read_element.release(), std::move(elements),
-                std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-            CHECK_EXPECTED(vstream);
-            vstreams.emplace_back(vstream.release());
-        }
-    }
-
-    for (const auto &vstream : vstreams) {
-        LOGGER__INFO("{}", vstream.get_pipeline_description());
-    }
-
-    return vstreams;
-}
-
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_iou(std::shared_ptr<OutputStreamBase> output_stream,
-    hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata)
-{
-    std::vector<std::shared_ptr<PipelineElement>> elements;
-    std::vector<OutputVStream> vstreams;
-
-    EventPtr core_op_activated_event = nullptr;
-    if (!output_stream->is_scheduled()) {
-        core_op_activated_event = output_stream->get_core_op_activated_event();
-    }
-
-    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
-    CHECK_AS_EXPECTED(shutdown_event_exp, HAILO_OUT_OF_HOST_MEMORY);
-    auto shutdown_event = shutdown_event_exp.release();
-
-    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
-    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
-
-    vstream_params.user_buffer_format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format,
-        iou_op_metadata->type());
-
-    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
-    CHECK_EXPECTED(pipeline_latency_accumulator);
-
-    auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event,
-        vstream_params.queue_size, vstream_params.pipeline_elements_stats_flags, vstream_params.vstream_stats_flags);
-    CHECK_EXPECTED(hw_read_element);
-
-    auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read",
-        shutdown_event, vstream_params);
-    CHECK_EXPECTED(hw_read_queue_element);
-    hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
-
-    auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements,
-        "PostInferElement", vstream_params, shutdown_event);
-    CHECK_EXPECTED(post_infer_element);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value()));
-
-    auto pre_nms_convert_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_nms_convert",
-        shutdown_event, vstream_params);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_nms_convert_queue_element.value()));
-
-    auto nms_to_detections_element = add_nms_to_detections_convert_element(output_stream, pipeline_status, elements, "NmsFormatToDetectionsElement",
-        vstream_params, iou_op_metadata, vstream_params.queue_size, std::chrono::milliseconds(HAILO_INFINITE), vstream_params.vstream_stats_flags, shutdown_event);
-    CHECK_EXPECTED(nms_to_detections_element);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_nms_convert_queue_element.value(), nms_to_detections_element.value()));
-
-    auto pre_remove_overlapping_bboxes_element_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_bboxes_removing",
-        shutdown_event, vstream_params);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(nms_to_detections_element.value(), pre_remove_overlapping_bboxes_element_queue_element.value()));
-
-    auto remove_overlapping_bboxes_element = add_remove_overlapping_bboxes_element(output_stream, pipeline_status, elements, "RemoveOverlappingBboxesElement",
-        vstream_params, iou_op_metadata, vstream_params.queue_size, std::chrono::milliseconds(HAILO_INFINITE), vstream_params.vstream_stats_flags, shutdown_event);
-    CHECK_EXPECTED(remove_overlapping_bboxes_element);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_remove_overlapping_bboxes_element_queue_element.value(), remove_overlapping_bboxes_element.value()));
-
-    auto pre_fill_nms_format_element_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_fill_nms_format",
-        shutdown_event, vstream_params);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(remove_overlapping_bboxes_element.value(), pre_fill_nms_format_element_queue_element.value()));
-
-    auto fill_nms_format_element = add_fill_nms_format_element(output_stream, pipeline_status, elements, "FillNmsFormatElement",
-        vstream_params, iou_op_metadata, vstream_params.queue_size, std::chrono::milliseconds(HAILO_INFINITE), vstream_params.vstream_stats_flags, shutdown_event);
-    CHECK_EXPECTED(fill_nms_format_element);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_fill_nms_format_element_queue_element.value(), fill_nms_format_element.value()));
-
-    auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
-        "UserBufferQueueElement", shutdown_event, vstream_params);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(fill_nms_format_element.value(), user_buffer_queue_element.value()));
-    output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-
-    auto output_vstream_info = iou_op_metadata->get_output_vstream_info();
-    CHECK_EXPECTED(output_vstream_info);
-
-    auto vstream = OutputVStream::create(output_vstream_info.value(), output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
-        std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-    CHECK_EXPECTED(vstream);
-    vstreams.emplace_back(vstream.release());
-
-    for (const auto &curr_vstream : vstreams) {
-        LOGGER__INFO("{}", curr_vstream.get_pipeline_description());
-    }
-
-    return vstreams;
-}
-
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_softmax(std::shared_ptr<OutputStreamBase> output_stream,
-    const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
-    const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata)
-{
-    std::vector<std::shared_ptr<PipelineElement>> elements;
-    std::vector<OutputVStream> vstreams;
-
-    EventPtr core_op_activated_event = nullptr;
-    if (!output_stream->is_scheduled()) {
-        core_op_activated_event = output_stream->get_core_op_activated_event();
-    }
-
-    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
-    CHECK_EXPECTED(shutdown_event_exp);
-    auto shutdown_event = shutdown_event_exp.release();
-
-    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
-    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
-
-    assert(!vstreams_params_map.empty());
-
-    // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
-    //       pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags.
-    hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
-    hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE;
-    size_t buffer_pool_size = 0;
-    for (const auto &elem_name_params : vstreams_params_map) {
-        hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
-        hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags;
-        buffer_pool_size += elem_name_params.second.queue_size;
-    }
-
-    // TODO (HRT-4522): Support this measurement
-    CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
-        "Pipeline FPS statistics measurement is not implemented");
-
-    assert(1 == vstreams_params_map.size());
-    auto op_input_format = softmax_op_metadata->inputs_metadata().begin()->second.format;
-    auto vstream_params = vstreams_params_map.begin()->second;
-    vstream_params.user_buffer_format = net_flow::SoftmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format);
-
-    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
-    CHECK_EXPECTED(pipeline_latency_accumulator);
-
-    auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event,
-        buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags);
-    CHECK_EXPECTED(hw_read_element);
-
-    auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read",
-        shutdown_event, vstream_params);
-    CHECK_EXPECTED(hw_read_queue_element);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
-
-    auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements,
-        "PostInferElement", vstream_params, shutdown_event);
-    CHECK_EXPECTED(post_infer_element);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value()));
-
-    auto pre_softmax_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_pre_softmax",
-        shutdown_event, vstream_params);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_softmax_queue_element.value()));
-
-    auto softmax_element = add_softmax_element(output_stream, pipeline_status, elements, "SoftmaxPostProcessElement",
-        vstream_params, softmax_op_metadata, buffer_pool_size, std::chrono::milliseconds(HAILO_INFINITE), hw_read_stream_stats_flags, shutdown_event);
-    CHECK_EXPECTED(softmax_element);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_softmax_queue_element.value(), softmax_element.value()));
-    auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
-        "UserBufferQueueElement", shutdown_event, vstream_params);
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(softmax_element.value(), user_buffer_queue_element.value()));
-    output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-    hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-
-    auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
-        std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-    CHECK_EXPECTED(vstream);
-    vstreams.emplace_back(vstream.release());
-
-    for (const auto &curr_vstream : vstreams) {
-        LOGGER__INFO("{}", curr_vstream.get_pipeline_description());
-    }
-
-    return vstreams;
-}
-
-InputVStream VStreamsBuilderUtils::create_input(std::shared_ptr<InputVStreamInternal> input_vstream)
-{
-    return InputVStream(std::move(input_vstream));
-}
-
-OutputVStream VStreamsBuilderUtils::create_output(std::shared_ptr<OutputVStreamInternal> output_vstream)
-{
-    return OutputVStream(std::move(output_vstream));
-}
-
-static bool are_formats_equal(const hailo_format_t &format1, const hailo_format_t &format2) {
-    return ((format1.order == format2.order) && (format1.flags == format2.flags) && (format1.type == format2.type));
-}
-
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams,
-    OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params,
-    const std::unordered_map<std::string, net_flow::PostProcessOpMetadataPtr> &post_process_ops_metadata,
-    const std::unordered_map<stream_name_t, op_name_t> &op_inputs_to_op_name, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos_map)
-{
-    auto first_stream_info = output_streams[0]->get_info();
-    if ((HailoRTCommon::is_nms(first_stream_info)) && (first_stream_info.nms_info.is_defused)) {
-        // Case defuse NMS
-        return create_output_nms(output_streams, vstream_params, output_vstream_infos_map);
-    } else if (contains(op_inputs_to_op_name, static_cast<stream_name_t>(first_stream_info.name))) {
-        // Case post-process on host
-        auto &op_name = op_inputs_to_op_name.at(first_stream_info.name);
-        auto &op_metadata = post_process_ops_metadata.at(op_name);
-        switch (op_metadata->type()) {
-        case net_flow::OperationType::YOLOX:
-        case net_flow::OperationType::YOLOV8:
-        case net_flow::OperationType::SSD:
-        case net_flow::OperationType::YOLOV5:
-        case net_flow::OperationType::YOLOV5SEG:
-        case net_flow::OperationType::IOU:
-        {
-            assert(1 <= op_metadata->outputs_metadata().size());
-            auto updated_outputs_metadata = op_metadata->outputs_metadata();
-            updated_outputs_metadata.begin()->second.format =
-                net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format, op_metadata->type());
-            op_metadata->set_outputs_metadata(updated_outputs_metadata);
-            CHECK_SUCCESS_AS_EXPECTED(op_metadata->validate_format_info());
-
-            std::shared_ptr<hailort::net_flow::Op> op;
-            switch (op_metadata->type()) {
-            case (net_flow::OperationType::YOLOX):
-            {
-                auto metadata = std::dynamic_pointer_cast<net_flow::YoloxOpMetadata>(op_metadata);
-                assert(nullptr != metadata);
-                auto op_expected = net_flow::YOLOXPostProcessOp::create(metadata);
-                CHECK_EXPECTED(op_expected);
-                op = op_expected.release();
-                break;
-            }
-            case (net_flow::OperationType::YOLOV8):
-            {
-                auto metadata = std::dynamic_pointer_cast<net_flow::Yolov8OpMetadata>(op_metadata);
-                assert(nullptr != metadata);
-                auto op_expected = net_flow::YOLOV8PostProcessOp::create(metadata);
-                CHECK_EXPECTED(op_expected);
-                op = op_expected.release();
-                break;
-            }
-            case (net_flow::OperationType::YOLOV5):
-            {
-                auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5OpMetadata>(op_metadata);
-                assert(nullptr != metadata);
-                auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata);
-                CHECK_EXPECTED(op_expected);
-                op = op_expected.release();
-                break;
-            }
-            case (net_flow::OperationType::YOLOV5SEG):
-            {
-                auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5SegOpMetadata>(op_metadata);
-                assert(nullptr != metadata);
-                auto op_expected = net_flow::Yolov5SegPostProcess::create(metadata);
-                CHECK_EXPECTED(op_expected);
-                op = op_expected.release();
-                break;
-            }
-            case (net_flow::OperationType::SSD):
-            {
-                auto metadata = std::dynamic_pointer_cast<net_flow::SSDOpMetadata>(op_metadata);
-                assert(nullptr != metadata);
-                auto op_expected = net_flow::SSDPostProcessOp::create(metadata);
-                CHECK_EXPECTED(op_expected);
-                op = op_expected.release();
-                break;
-            }
-            case (net_flow::OperationType::IOU):
-            {
-                return create_output_post_process_iou(output_streams[0], vstream_params, op_metadata);
-            }
-            default:
-                break;
-            }
-
-            return create_output_post_process_nms(output_streams, vstream_params, output_vstream_infos_map, op);
-        }
-
-        case net_flow::OperationType::ARGMAX:
-        {
-            assert(output_streams.size() == 1);
-            NameToVStreamParamsMap name_to_vstream_params_map;
-            for (auto &output_stream : all_output_streams) {
-                if (output_stream.first->get_info().name == output_streams[0]->get_info().name) {
-                    for (auto &vstream : output_stream.second) {
-                        name_to_vstream_params_map.insert(vstream);
-                    }
-                }
-            }
-            auto output_vstream_info = op_metadata->get_output_vstream_info();
-            CHECK_EXPECTED(output_vstream_info);
-            return create_output_post_process_argmax(output_streams[0], name_to_vstream_params_map, output_vstream_info.release(), op_metadata);
-        }
-
-        case net_flow::OperationType::SOFTMAX:
-        {
-            assert(output_streams.size() == 1);
-            NameToVStreamParamsMap name_to_vstream_params_map;
-            for (auto &output_stream : all_output_streams) {
-                if (output_stream.first->get_info().name == output_streams[0]->get_info().name) {
-                    for (auto &vstream : output_stream.second) {
-                        name_to_vstream_params_map.insert(vstream);
-                    }
-                }
-            }
-            auto output_vstream_info = op_metadata->get_output_vstream_info();
-            CHECK_EXPECTED(output_vstream_info);
-            return create_output_post_process_softmax(output_streams[0], name_to_vstream_params_map, output_vstream_info.release(), op_metadata);
-            }
-
-        default:
-            LOGGER__ERROR("op type {} of op {} is not in any of the supported post process OP types", net_flow::OpMetadata::get_operation_type_str(op_metadata->type()), op_name);
-            return make_unexpected(HAILO_INVALID_OPERATION);
-        }
-    } else {
-        // All other cases
-        assert(output_streams.size() == 1);
-        NameToVStreamParamsMap name_to_vstream_params_map;
-        for (auto &output_stream : all_output_streams) {
-            if (output_stream.first->get_info().name == output_streams[0]->get_info().name) {
-                for (auto &vstream : output_stream.second) {
-                    name_to_vstream_params_map.insert(vstream);
-                }
-            }
-        }
-        return create_outputs(output_streams[0], name_to_vstream_params_map, output_vstream_infos_map);
-    }
-}
-
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_nms(OutputStreamPtrVector &output_streams,
-    hailo_vstream_params_t vstreams_params,
-    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
-{
-    for (const auto &out_stream : output_streams) {
-        CHECK_AS_EXPECTED(are_formats_equal(output_streams[0]->get_info().format, out_stream->get_info().format),
-            HAILO_INVALID_ARGUMENT, "All nms streams of the same virtual output must have the same format");
-    }
-
-    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
-    CHECK_EXPECTED(shutdown_event_exp);
-    auto shutdown_event = shutdown_event_exp.release();
-
-    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
-    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
-
-    std::vector<std::shared_ptr<PipelineElement>> elements;
-    std::vector<OutputVStream> vstreams;
-
-    hailo_status status = add_nms_fuse(output_streams, vstreams_params, elements, vstreams, shutdown_event,
-        pipeline_status, output_vstream_infos);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    for (const auto &vstream : vstreams) {
-        LOGGER__INFO("{}", vstream.get_pipeline_description());
-    }
-
-    return vstreams;
-}
-
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_nms(OutputStreamPtrVector &output_streams,
-    hailo_vstream_params_t vstreams_params,
-    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
-    const std::shared_ptr<hailort::net_flow::Op> &nms_op)
-{
-    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
-    CHECK_EXPECTED(shutdown_event_exp);
-    auto shutdown_event = shutdown_event_exp.release();
-
-    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
-    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
-
-    std::vector<std::shared_ptr<PipelineElement>> elements;
-    std::vector<OutputVStream> vstreams;
-
-    hailo_status status = add_nms_post_process(output_streams, vstreams_params, elements, vstreams, shutdown_event,
-        pipeline_status, output_vstream_infos, nms_op);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    for (const auto &vstream : vstreams) {
-        LOGGER__INFO("{}", vstream.get_pipeline_description());
-    }
-
-    return vstreams;
-}
-
-Expected<std::shared_ptr<HwReadElement>> VStreamsBuilderUtils::add_hw_read_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size,
-        const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags)
-{
-    auto hw_read_elem = HwReadElement::create(output_stream,
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        HAILO_INFINITE_TIMEOUT, buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags, shutdown_event, pipeline_status);
-    CHECK_EXPECTED(hw_read_elem);
-    elements.push_back(hw_read_elem.value());
-    return hw_read_elem;
-}
-
-Expected<std::shared_ptr<PullQueueElement>> VStreamsBuilderUtils::add_pull_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
-    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-    const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params)
-{
-    auto pull_queue_elem = PullQueueElement::create(
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        vstream_params, shutdown_event, pipeline_status);
-    CHECK_EXPECTED(pull_queue_elem);
-    elements.push_back(pull_queue_elem.value());
-    return pull_queue_elem;
-}
-
-Expected<std::shared_ptr<ArgmaxPostProcessElement>> VStreamsBuilderUtils::add_argmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
-    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-    const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata,
-    size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
-{
-    // Updating metadata according to user request. TODO: HRT-9737
-    auto updated_outputs_metadata = argmax_op_metadata.get()->outputs_metadata();
-    updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
-    auto metadata = std::dynamic_pointer_cast<net_flow::ArgmaxOpMetadata>(argmax_op_metadata);
-    assert(nullptr != metadata);
-    metadata->set_outputs_metadata(updated_outputs_metadata);
-    CHECK_SUCCESS_AS_EXPECTED(metadata->validate_format_info());
-    // Updating metadata according to use request. TODO: HRT-9737 - End
-
-    auto op_expected = net_flow::ArgmaxPostProcessOp::create(metadata);
-    CHECK_EXPECTED(op_expected);
-    auto argmax_op = op_expected.release();
-
-    auto argmax_element = ArgmaxPostProcessElement::create(argmax_op,
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        vstream_params.pipeline_elements_stats_flags, pipeline_status, buffer_pool_size, timeout, vstream_flags, shutdown_event);
-    CHECK_EXPECTED(argmax_element);
-    elements.push_back(argmax_element.value());
-    return argmax_element;
-}
-
-Expected<std::shared_ptr<SoftmaxPostProcessElement>> VStreamsBuilderUtils::add_softmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
-    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-    const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata,
-    size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
-{
-    // Updating metadata according to user request. TODO: HRT-9737
-    // Currently softmax only supports inputs to be float32 and order NHWC or NC
-    auto updated_inputs_metadata = softmax_op_metadata.get()->inputs_metadata();
-    updated_inputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
-    auto updated_outputs_metadata = softmax_op_metadata.get()->outputs_metadata();
-    updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
-    auto metadata = std::dynamic_pointer_cast<net_flow::SoftmaxOpMetadata>(softmax_op_metadata);
-    assert(nullptr != metadata);
-    metadata->set_outputs_metadata(updated_outputs_metadata);
-    metadata->set_inputs_metadata(updated_inputs_metadata);
-    CHECK_SUCCESS_AS_EXPECTED(metadata->validate_format_info());
-    // Updating metadata according to use request. TODO: HRT-9737 - End
-
-    auto op_expected = net_flow::SoftmaxPostProcessOp::create(metadata);
-    CHECK_EXPECTED(op_expected);
-    auto softmax_op = op_expected.release();
-    auto softmax_element = SoftmaxPostProcessElement::create(softmax_op,
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        vstream_params.pipeline_elements_stats_flags, pipeline_status, buffer_pool_size, timeout, vstream_flags, shutdown_event);
-    CHECK_EXPECTED(softmax_element);
-    elements.push_back(softmax_element.value());
-    return softmax_element;
-}
-
-Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> VStreamsBuilderUtils::add_nms_to_detections_convert_element(std::shared_ptr<OutputStreamBase> &output_stream,
-    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-    const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
-{
-    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
-    assert(nullptr != metadata);
-
-    auto nms_to_detections_element = ConvertNmsToDetectionsElement::create(metadata->nms_info(),
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        vstream_params.pipeline_elements_stats_flags, pipeline_status, timeout, vstream_flags, shutdown_event, buffer_pool_size);
-    CHECK_EXPECTED(nms_to_detections_element);
-    elements.push_back(nms_to_detections_element.value());
-    return nms_to_detections_element;
-}
-
-Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> VStreamsBuilderUtils::add_remove_overlapping_bboxes_element(std::shared_ptr<OutputStreamBase> &output_stream,
-    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-    const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
-{
-    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
-    assert(nullptr != metadata);
-
-    auto remove_overlapping_bboxes_element = RemoveOverlappingBboxesElement::create(metadata->nms_config(),
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        vstream_params.pipeline_elements_stats_flags, pipeline_status, timeout, vstream_flags, shutdown_event, buffer_pool_size);
-    CHECK_EXPECTED(remove_overlapping_bboxes_element);
-    elements.push_back(remove_overlapping_bboxes_element.value());
-    return remove_overlapping_bboxes_element;
-}
-
-Expected<std::shared_ptr<FillNmsFormatElement>> VStreamsBuilderUtils::add_fill_nms_format_element(std::shared_ptr<OutputStreamBase> &output_stream,
-    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-    const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event)
-{
-    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
-    assert(nullptr != metadata);
-
-    auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_info(), vstream_params.user_buffer_format, metadata->nms_config(),
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        vstream_params.pipeline_elements_stats_flags, pipeline_status, timeout, vstream_flags, shutdown_event, buffer_pool_size);
-    CHECK_EXPECTED(fill_nms_format_element);
-    elements.push_back(fill_nms_format_element.value());
-    return fill_nms_format_element;
-}
-
-Expected<std::shared_ptr<UserBufferQueueElement>> VStreamsBuilderUtils::add_user_buffer_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
-    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-    const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params)
-{
-    auto post_argmax_queue_element = UserBufferQueueElement::create(
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        vstream_params, shutdown_event, pipeline_status);
-    CHECK_EXPECTED(post_argmax_queue_element);
-    elements.push_back(post_argmax_queue_element.value());
-    return post_argmax_queue_element;
-}
-
-Expected<std::shared_ptr<PostInferElement>> VStreamsBuilderUtils::add_post_infer_element(std::shared_ptr<OutputStreamBase> &output_stream,
-    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-    const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event)
-{
-    auto post_infer_element = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format,
-        output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_quant_infos(), output_stream->get_info().nms_info,
-        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        vstream_params, pipeline_status, shutdown_event);
-    CHECK_EXPECTED(post_infer_element);
-    elements.push_back(post_infer_element.value());
-    return post_infer_element;
-}
-
-Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_argmax(std::shared_ptr<OutputStreamBase> output_stream,
-    const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
-    const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata)
-{
-    std::vector<std::shared_ptr<PipelineElement>> elements;
-    std::vector<OutputVStream> vstreams;
-
-    EventPtr core_op_activated_event = nullptr;
-    if (!output_stream->is_scheduled()) {
-        core_op_activated_event = output_stream->get_core_op_activated_event();
-    }
-
-    auto shutdown_event_exp = Event::create_shared(Event::State::not_signalled);
-    CHECK_EXPECTED(shutdown_event_exp);
-    auto shutdown_event = shutdown_event_exp.release();
-
-    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
-    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
-
-    assert(!vstreams_params_map.empty());
-
-    // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
-    //       pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags.
-    hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
-    hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE;
-    size_t buffer_pool_size = 0;
-    for (const auto &elem_name_params : vstreams_params_map) {
-        hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
-        hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags;
-        buffer_pool_size += elem_name_params.second.queue_size;
-    }
-
-    // TODO (HRT-4522): Support this measurement
-    CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
-        "Pipeline FPS statistics measurement is not implemented");
-
-    auto hw_read_element = add_hw_read_element(output_stream, pipeline_status, elements, "HwReadElement", shutdown_event,
-        buffer_pool_size, hw_read_element_stats_flags, hw_read_stream_stats_flags);
-    CHECK_EXPECTED(hw_read_element);
-
-    assert(1 == vstreams_params_map.size());
-    auto op_input_format = argmax_op_metadata->inputs_metadata().begin()->second.format;
-    auto vstream_params = vstreams_params_map.begin()->second;
-    vstream_params.user_buffer_format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format);
-
-    auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQueueElement_hw_read",
-        shutdown_event, vstream_params);
-    CHECK_EXPECTED(hw_read_queue_element);
-
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
-
-    auto argmax_element = add_argmax_element(output_stream, pipeline_status, elements, "ArgmaxPostProcessElement",
-        vstream_params, argmax_op_metadata, buffer_pool_size, std::chrono::milliseconds(HAILO_INFINITE), hw_read_stream_stats_flags, shutdown_event);
-    CHECK_EXPECTED(argmax_element);
-
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), argmax_element.value()));
-
-    auto post_argmax_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
-        "UserBufferQueueElement_post_argmax", shutdown_event, vstream_params);
-    CHECK_EXPECTED(post_argmax_queue_element);
-
-    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(argmax_element.value(), post_argmax_queue_element.value()));
-
-    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
-    CHECK_EXPECTED(pipeline_latency_accumulator);
-
-    output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-    hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
-    auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, post_argmax_queue_element.release(), std::move(elements),
-        std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-    CHECK_EXPECTED(vstream);
-    vstreams.emplace_back(vstream.release());
-
-    for (const auto &current_vstream : vstreams) {
-        LOGGER__INFO("{}", current_vstream.get_pipeline_description());
-    }
-
-    return vstreams;
-}
-
-hailo_status VStreamsBuilderUtils::handle_pix_buffer_splitter_flow(std::vector<std::shared_ptr<InputStreamBase>> streams,
-    const hailo_vstream_info_t &vstream_info, std::vector<std::shared_ptr<PipelineElement>> &&base_elements,
-    std::vector<InputVStream> &vstreams, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event,
-    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr &core_op_activated_event,
-    AccumulatorPtr accumalator)
-{
-    // sorting the streams based on their plane index -> we count on order to know which plane belongs to which stream
-    auto compartor = [](std::shared_ptr<InputStreamBase> a, std::shared_ptr<InputStreamBase> b) {
-        return a->get_layer_info().plane_index < b->get_layer_info().plane_index;
-    };
-    std::sort(streams.begin(), streams.end(), compartor);
-
-    auto duration_collector_expected = DurationCollector::create(vstream_params.pipeline_elements_stats_flags);
-    CHECK_EXPECTED_AS_STATUS(duration_collector_expected);
-
-    auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufferElement",
-        vstream_info.name, 0), std::chrono::milliseconds(HAILO_INFINITE), duration_collector_expected.release(),
-        pipeline_status, vstream_info.format.order);
-    CHECK_EXPECTED_AS_STATUS(planes_splitter);
-    base_elements.push_back(planes_splitter.value());
-
-    uint32_t stream_number = 0;
-
-    for (const auto &stream : streams){
-         auto hw_write_elem = HwWriteElement::create(stream,
-            PipelineObject::create_element_name("HwWriteElement", stream->name(), stream->get_info().index),
-            vstream_params.pipeline_elements_stats_flags, pipeline_status);
-        CHECK_EXPECTED_AS_STATUS(hw_write_elem);
-        base_elements.insert(base_elements.begin(), hw_write_elem.value());
-
-        auto &stream_info = stream->get_info();
-        auto &src_image_shape = stream_info.shape;
-        auto &dst_image_shape = stream_info.hw_shape;
-        auto &dst_format = stream_info.format;
-        auto src_format = vstream_params.user_buffer_format;
-        /* the format order of each plane (stream) is determined by the stream's order.
-            type and flags are determined by the vstream params */
-        src_format.order = dst_format.order;
-        auto quant_infos = std::vector<hailo_quant_info_t>{stream_info.quant_info};
-
-        auto should_transform_expected = InputTransformContext::is_transformation_required(src_image_shape, src_format,
-            dst_image_shape, dst_format, quant_infos);
-        CHECK_EXPECTED_AS_STATUS(should_transform_expected);
-
-        if(should_transform_expected.value()){
-            auto pre_infer_elem = PreInferElement::create(src_image_shape, src_format,
-                dst_image_shape, dst_format, quant_infos, PipelineObject::create_element_name( "PreInferElement",
-                stream->get_info().name, stream->get_info().index), vstream_params, shutdown_event, pipeline_status);
-
-            CHECK_EXPECTED_AS_STATUS(pre_infer_elem);
-            base_elements.push_back(pre_infer_elem.value());
-
-            auto queue_elem = PushQueueElement::create(
-                PipelineObject::create_element_name("PushQueueElement", stream_info.name, stream_info.index),
-                vstream_params, shutdown_event, pipeline_status);
-
-            CHECK_EXPECTED_AS_STATUS(queue_elem);
-            base_elements.push_back((queue_elem.value()));
-
-            CHECK_SUCCESS(PipelinePad::link_pads(planes_splitter.value(), pre_infer_elem.value(), stream_number, 0));
-            CHECK_SUCCESS(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value()));
-            CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), *hw_write_elem));
-        } else {
-            CHECK_SUCCESS(PipelinePad::link_pads(planes_splitter.value(), *hw_write_elem, stream_number, 0));
-
-        }
-        stream_number++;
-    }
-
-    auto vstream = InputVStream::create(vstream_info, { vstream_info.quant_info }, vstream_params, planes_splitter.value(),
-        nullptr, std::move(base_elements), std::move(pipeline_status), shutdown_event,
-        core_op_activated_event, accumalator);
-    CHECK_EXPECTED_AS_STATUS(vstream);
-    vstreams.emplace_back(vstream.release());
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr<OutputStreamBase> output_stream, NameToVStreamParamsMap &vstreams_params_map,
-    std::vector<std::shared_ptr<PipelineElement>> &&base_elements, std::vector<OutputVStream> &vstreams,
-    std::shared_ptr<HwReadElement> hw_read_elem, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
-{
-    auto expected_demuxer = OutputDemuxer::create(*output_stream);
-    CHECK_EXPECTED_AS_STATUS(expected_demuxer);
-
-    std::shared_ptr<OutputDemuxer> demuxer_ptr = expected_demuxer.release();
-    CHECK(nullptr != demuxer_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    auto status = output_stream->set_timeout(HAILO_INFINITE_TIMEOUT);
-    CHECK_SUCCESS(status);
-
-    // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
-    //       pipeline_elements_stats_flags for the demux_elem as bitwise or of all the flags.
-    hailo_pipeline_elem_stats_flags_t demux_elem_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
-    hailo_vstream_stats_flags_t demux_vstream_stats_flags = HAILO_VSTREAM_STATS_NONE;
-    size_t buffer_pool_size = 0;
-    for (const auto &elem_name_params : vstreams_params_map) {
-        demux_elem_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
-        demux_vstream_stats_flags |= elem_name_params.second.vstream_stats_flags;
-        buffer_pool_size += elem_name_params.second.queue_size;
-    }
-
-    auto demux_elem = TransformDemuxElement::create(demuxer_ptr,
-        PipelineObject::create_element_name("TransformDemuxElement", output_stream->name(), output_stream->get_info().index),
-        std::chrono::milliseconds(HAILO_INFINITE), buffer_pool_size, demux_elem_stats_flags, demux_vstream_stats_flags, shutdown_event, pipeline_status);
-    CHECK_EXPECTED_AS_STATUS(demux_elem);
-    base_elements.push_back(demux_elem.value());
-    CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem, demux_elem.value()));
-
-    EventPtr core_op_activated_event = nullptr;
-    if (!output_stream->is_scheduled()) {
-        core_op_activated_event = output_stream->get_core_op_activated_event();
-    }
-
-    uint32_t i = 0;
-    for (auto &edge_info : demuxer_ptr->get_edges_stream_info()) {
-        auto name_params_pair = vstreams_params_map.find(edge_info.name);
-        CHECK(name_params_pair != vstreams_params_map.end(), HAILO_NOT_FOUND,
-            "Failed to find vstreams params of edge {}", edge_info.name);
-
-        const auto vstream_info = output_vstream_infos.find(edge_info.name);
-        CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
-            "Failed to find vstream info of {}", edge_info.name);
-
-        const auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), name_params_pair->second);
-
-        // For each mux vstream, we create a copy of the previous elements
-        auto current_vstream_elements = base_elements;
-
-        // For muxed VStreams we use the same pipeline_status for all
-        auto pipeline_status_copy = pipeline_status;
-        auto demux_queue_elem = PullQueueElement::create(
-            PipelineObject::create_element_name("PullQueueElement_demux", edge_info.name, edge_info.index),
-            vstream_params, shutdown_event, pipeline_status);
-        CHECK_EXPECTED_AS_STATUS(demux_queue_elem);
-        current_vstream_elements.push_back(demux_queue_elem.value());
-        CHECK_SUCCESS(PipelinePad::link_pads(demux_elem.value(), demux_queue_elem.value(), i, 0));
-
-        CHECK_SUCCESS(demux_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT));
-
-        auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
-        CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator);
-        auto should_transform = OutputTransformContext::is_transformation_required(edge_info.hw_shape, 
-            edge_info.format, edge_info.shape, vstream_params.user_buffer_format, std::vector<hailo_quant_info_t>{edge_info.quant_info}); // TODO: Get quant vector (HRT-11077)
-        CHECK_EXPECTED_AS_STATUS(should_transform);
-
-        if (should_transform.value()) {
-            auto post_infer_elem = PostInferElement::create(edge_info.hw_shape, edge_info.format, 
-                edge_info.shape, vstream_params.user_buffer_format, { edge_info.quant_info }, edge_info.nms_info, // TODO: Get quant vector (HRT-11077)
-                PipelineObject::create_element_name("PostInferElement", edge_info.name, edge_info.index),
-                vstream_params, pipeline_status, shutdown_event);
-            CHECK_EXPECTED_AS_STATUS(post_infer_elem);
-            current_vstream_elements.push_back(post_infer_elem.value());
-            CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), post_infer_elem.value()));
-
-            auto post_infer_queue_elem = UserBufferQueueElement::create(
-                PipelineObject::create_element_name("UserBufferQueueElement_post_infer", edge_info.name, edge_info.index),
-                vstream_params, shutdown_event, pipeline_status);
-            CHECK_EXPECTED_AS_STATUS(post_infer_queue_elem);
-            current_vstream_elements.push_back(post_infer_queue_elem.value());
-            CHECK_SUCCESS(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value()));
-
-            // TODO: Replace output_stream->get_quant_infos() with mux quant info
-            auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, post_infer_queue_elem.release(), std::move(current_vstream_elements), // TODO: Get quant vector (HRT-11077)
-                std::move(pipeline_status_copy), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-            CHECK_EXPECTED_AS_STATUS(vstream);
-            vstreams.emplace_back(vstream.release());
-        } else {
-            // TODO: HRT-4179
-            auto user_copy_elem = CopyBufferElement::create(
-                PipelineObject::create_element_name("CopyBufferElement", edge_info.name, edge_info.index),
-                pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms));
-            CHECK_EXPECTED_AS_STATUS(user_copy_elem);
-            current_vstream_elements.push_back(user_copy_elem.value());
-            CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), user_copy_elem.value()));
-
-            // TODO: Replace output_stream->get_quant_infos() with mux quant info
-            auto vstream = OutputVStream::create(vstream_info->second, { edge_info.quant_info }, vstream_params, user_copy_elem.release(), std::move(current_vstream_elements), // TODO: Get quant vector (HRT-11077)
-                std::move(pipeline_status_copy), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-            CHECK_EXPECTED_AS_STATUS(vstream);
-            vstreams.emplace_back(vstream.release());
-        }
-        i++;
-    }
-    return HAILO_SUCCESS;
-}
-
-hailo_status VStreamsBuilderUtils::add_nms_fuse(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params,
-    std::vector<std::shared_ptr<PipelineElement>> &elements, std::vector<OutputVStream> &vstreams,
-    EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
-{
-    std::vector<hailo_nms_info_t> nms_infos;
-    nms_infos.reserve(output_streams.size());
-    for (const auto &out_stream : output_streams) {
-        CHECK(out_stream->get_info().nms_info.defuse_info.class_group_index <= output_streams.size(),
-            HAILO_INVALID_ARGUMENT, "Not all defused nms outputs were grouped correctly!");
-        nms_infos.emplace_back(out_stream->get_info().nms_info);
-    }
-
-    // To get the fused layer name and src stream format, we use the stream info of one of the defuses
-    auto first_defused_stream_info = output_streams[0]->get_info();
-    auto fused_layer_name = first_defused_stream_info.nms_info.defuse_info.original_name;
-    auto src_stream_format = first_defused_stream_info.format;
-
-    auto vstream_info = output_vstream_infos.find(fused_layer_name);
-    CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
-        "Failed to find vstream info of {}. Could be due to use of old HEF. Try to re-compile network with newer Dataflow Compiler version", fused_layer_name);
-
-    vstreams_params = expand_vstream_params_autos(first_defused_stream_info, vstreams_params);
-    auto nms_elem = NmsMuxElement::create(nms_infos,
-        PipelineObject::create_element_name("NmsMuxElement", fused_layer_name, 0),
-        vstreams_params, shutdown_event, pipeline_status);
-    CHECK_EXPECTED_AS_STATUS(nms_elem);
-    auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info();
-
-    for (uint32_t i = 0; i < output_streams.size(); ++i) {
-        const auto &curr_stream_info = output_streams[i]->get_info();
-        output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT);
-
-        auto hw_read_elem = HwReadElement::create(output_streams[i],
-            PipelineObject::create_element_name("HwReadElement", curr_stream_info.name, curr_stream_info.index),
-            HAILO_INFINITE_TIMEOUT, vstreams_params.queue_size, vstreams_params.pipeline_elements_stats_flags,
-            vstreams_params.vstream_stats_flags, shutdown_event, pipeline_status);
-        CHECK_EXPECTED_AS_STATUS(hw_read_elem);
-        elements.push_back(hw_read_elem.value());
-
-        auto nms_source_queue_elem = PullQueueElement::create(
-            PipelineObject::create_element_name("PullQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index),
-            vstreams_params, shutdown_event, pipeline_status);
-        CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem);
-        elements.push_back(nms_source_queue_elem.value());
-        nms_source_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT);
-        CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem.value(), nms_source_queue_elem.value()));
-        CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i));
-    }
-    elements.push_back(nms_elem.value());
-
-    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstreams_params);
-    CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator);
-
-    auto should_transform = OutputTransformContext::is_transformation_required({}, src_stream_format, {},
-        vstreams_params.user_buffer_format, std::vector<hailo_quant_info_t>{vstream_info->second.quant_info}); // TODO: Get quant vector (HRT-11078)
-    CHECK_EXPECTED_AS_STATUS(should_transform);
-
-    EventPtr core_op_activated_event = nullptr;
-    if (!output_streams[0]->is_scheduled()) {
-        core_op_activated_event = output_streams[0]->get_core_op_activated_event();
-    }
-
-    if (should_transform.value()) {
-        auto nms_queue_elem = PullQueueElement::create(
-            PipelineObject::create_element_name("PullQueueElement_nms", fused_layer_name, 0),
-            vstreams_params, shutdown_event, pipeline_status);
-        CHECK_EXPECTED_AS_STATUS(nms_queue_elem);
-        nms_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT);
-        elements.push_back(nms_queue_elem.value());
-        CHECK_SUCCESS(PipelinePad::link_pads(nms_elem.value(), nms_queue_elem.value()));
-
-        auto post_infer_elem = PostInferElement::create({}, src_stream_format,
-            {}, vstreams_params.user_buffer_format, { vstream_info->second.quant_info }, fused_layer_nms_info, // TODO: Get quant vector (HRT-11078)
-            PipelineObject::create_element_name("PostInferElement", fused_layer_name, 0), vstreams_params, pipeline_status,
-            shutdown_event);
-        CHECK_EXPECTED_AS_STATUS(post_infer_elem);
-
-        elements.push_back(post_infer_elem.value());
-        CHECK_SUCCESS(PipelinePad::link_pads(nms_queue_elem.value(), post_infer_elem.value()));
-
-        auto post_infer_queue_elem = UserBufferQueueElement::create(
-            PipelineObject::create_element_name("UserBufferQueueElement_post_infer", fused_layer_name, 0),
-            vstreams_params, shutdown_event, pipeline_status);
-        CHECK_EXPECTED_AS_STATUS(post_infer_queue_elem);
-        elements.push_back(post_infer_queue_elem.value());
-        CHECK_SUCCESS(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value()));
-
-        // TODO: Check with SDK where should we take the quant infos from (output_streams[0]->get_quant_infos() might be good) (HRT-11078)
-        auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, post_infer_queue_elem.release(), std::move(elements), // TODO: Get quant vector (HRT-11078)
-            std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-        CHECK_EXPECTED_AS_STATUS(vstream);
-        vstreams.emplace_back(vstream.release());
-    } else {
-        // TODO: Check with SDK where should we take the quant infos from (output_streams[0]->get_quant_infos() might be good) (HRT-11078)
-        auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, nms_elem.release(), std::move(elements), // TODO: Get quant vector (HRT-11078)
-            std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-        CHECK_EXPECTED_AS_STATUS(vstream);
-        vstreams.emplace_back(vstream.release());
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status VStreamsBuilderUtils::add_nms_post_process(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params,
-    std::vector<std::shared_ptr<PipelineElement>> &elements, std::vector<OutputVStream> &vstreams,
-    EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
-    const std::shared_ptr<hailort::net_flow::Op> &nms_op)
-{
-    auto first_stream_info = output_streams[0]->get_info();
-    vstreams_params.user_buffer_format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
-        vstreams_params.user_buffer_format, nms_op->metadata()->type());
-    CHECK(vstreams_params.user_buffer_format.type == HAILO_FORMAT_TYPE_FLOAT32, HAILO_INVALID_ARGUMENT,
-        "NMS output format type must be HAILO_FORMAT_TYPE_FLOAT32");
-    CHECK(HailoRTCommon::is_nms(vstreams_params.user_buffer_format.order), HAILO_INVALID_ARGUMENT,
-        "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK");
-
-    std::unordered_map<std::string, net_flow::BufferMetaData> inputs_metadata;
-    std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
-    for (uint32_t i = 0; i < output_streams.size(); ++i) {
-        const auto &curr_stream_info = output_streams[i]->get_info();
-        net_flow::BufferMetaData input_metadata = {
-            curr_stream_info.shape,
-            curr_stream_info.hw_shape,
-            curr_stream_info.format,
-            curr_stream_info.quant_info
-        };
-        inputs_metadata.insert({curr_stream_info.name, input_metadata});
-    }
-
-    const auto &output_pads = nms_op->outputs_metadata();
-    assert(output_pads.size() == 1);
-    auto vstream_info = output_vstream_infos.find(output_pads.begin()->first);
-    CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
-        "Failed to find vstream info of {}", nms_op->metadata()->get_name());
-    net_flow::BufferMetaData output_metadata = {
-        vstream_info->second.shape,
-        vstream_info->second.shape,
-        vstream_info->second.format,
-        vstream_info->second.quant_info
-    };
-    outputs_metadata.insert({vstream_info->first, output_metadata});
-
-    auto op_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(nms_op->metadata());
-    assert(nullptr != op_metadata);
-    auto nms_elem = NmsPostProcessMuxElement::create(nms_op,
-        PipelineObject::create_element_name("NmsPostProcessMuxElement", nms_op->get_name(), 0),
-        vstreams_params, shutdown_event, pipeline_status);
-    CHECK_EXPECTED_AS_STATUS(nms_elem);
-
-    hailo_format_t nms_src_format;
-    nms_src_format.flags = HAILO_FORMAT_FLAGS_NONE;
-    nms_src_format.order = HAILO_FORMAT_ORDER_NHCW;
-    nms_src_format.type = first_stream_info.format.type;
-
-    for (uint32_t i = 0; i < output_streams.size(); ++i) {
-        const auto &curr_stream_info = output_streams[i]->get_info();
-        output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT);
-
-        auto should_transform = OutputTransformContext::is_transformation_required(curr_stream_info.hw_shape, curr_stream_info.format,
-            curr_stream_info.hw_shape, nms_src_format, output_streams[i]->get_quant_infos());
-        CHECK_EXPECTED_AS_STATUS(should_transform);
-
-        CHECK(!(should_transform.value()), HAILO_INVALID_ARGUMENT, "Unexpected transformation required for {}", curr_stream_info.name);
-
-        auto hw_read_elem = HwReadElement::create(output_streams[i],
-            PipelineObject::create_element_name("HwReadElement", curr_stream_info.name, curr_stream_info.index),
-            HAILO_INFINITE_TIMEOUT, vstreams_params.queue_size, vstreams_params.pipeline_elements_stats_flags,
-            vstreams_params.vstream_stats_flags, shutdown_event, pipeline_status);
-        CHECK_EXPECTED_AS_STATUS(hw_read_elem);
-        elements.push_back(hw_read_elem.value());
-
-        auto nms_source_queue_elem = PullQueueElement::create(
-            PipelineObject::create_element_name("PullQueueElement_nms_source", curr_stream_info.name, curr_stream_info.index),
-            vstreams_params, shutdown_event, pipeline_status);
-        CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem);
-        nms_source_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT);
-        elements.push_back(nms_source_queue_elem.value());
-        CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem.value(), nms_source_queue_elem.value()));
-        CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i));
-        nms_elem.value()->add_sink_name(curr_stream_info.name);
-    }
-    elements.push_back(nms_elem.value());
-
-    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstreams_params);
-    CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator);
-
-    EventPtr core_op_activated_event = nullptr;
-    if (!output_streams[0]->is_scheduled()) {
-        core_op_activated_event = output_streams[0]->get_core_op_activated_event();
-    }
-
-    // If user uses HailoRT++ we can assume he won't use Output Scale by Feature
-    auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, nms_elem.release(), std::move(elements),
-        std::move(pipeline_status), shutdown_event, core_op_activated_event, pipeline_latency_accumulator.release());
-    CHECK_EXPECTED_AS_STATUS(vstream);
-    vstreams.emplace_back(vstream.release());
-
-    return HAILO_SUCCESS;
-}
-
-Expected<AccumulatorPtr> VStreamsBuilderUtils::create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params)
-{
-    AccumulatorPtr pipeline_latency_accumulator = nullptr;
-    const auto measure_latency = ((vstreams_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_LATENCY) != 0);
-    if (measure_latency) {
-        pipeline_latency_accumulator = make_shared_nothrow<FullAccumulator<double>>("latency");
-        CHECK_AS_EXPECTED(nullptr != pipeline_latency_accumulator, HAILO_OUT_OF_HOST_MEMORY);
-    }
-
-    return pipeline_latency_accumulator;
-}
-
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp
new file mode 100644
index 00000000..522726af
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp
@@ -0,0 +1,1291 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file vstream_builder.cpp
+ * @brief Vstream builder impl
+ **/
+
+#include "vstream_builder.hpp"
+#include "hailo/vstream.hpp"
+#include "net_flow/ops/nms_post_process.hpp"
+#include "net_flow/ops/ssd_post_process.hpp"
+#include "net_flow/ops/yolox_post_process.hpp"
+#include "net_flow/ops/yolov8_post_process.hpp"
+#include "net_flow/ops/yolov5_post_process.hpp"
+#include "net_flow/ops/yolov5_bbox_only_post_process.hpp"
+#include "net_flow/ops/argmax_post_process.hpp"
+#include "net_flow/ops/softmax_post_process.hpp"
+#include "net_flow/ops/yolov5_seg_post_process.hpp"
+#include "common/runtime_statistics_internal.hpp"
+
+namespace hailort
+{
+Expected<std::vector<InputVStream>> VStreamsBuilderUtils::create_inputs(
+    std::vector<std::shared_ptr<InputStreamBase>> input_streams, const hailo_vstream_info_t &vstream_info,
+    const hailo_vstream_params_t &vstream_params)
+{
+    CHECK_AS_EXPECTED(!input_streams.empty(), HAILO_INVALID_ARGUMENT, "input streams can't be empty");
+    // if input streams has more than 1 value, it will be handled by handle_pix_buffer_splitter_flow. For all other purposes,
+    // assuming there is only 1 stream is valid
+    std::shared_ptr<InputStreamBase> input_stream = input_streams.front();
+
+    // TODO (HRT-4522): Support this measurement
+    CHECK_AS_EXPECTED(!(vstream_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
+        "Pipeline FPS statistics measurement is not implemented");
+
+    std::vector<std::shared_ptr<PipelineElement>> elements;
+    std::vector<InputVStream> vstreams;
+
+    EventPtr core_op_activated_event = nullptr;
+    if (!input_stream->is_scheduled()) {
+        core_op_activated_event = input_stream->get_core_op_activated_event();
+    }
+
+    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
+    CHECK_EXPECTED(pipeline_latency_accumulator);
+
+    auto user_timeout = std::chrono::milliseconds(vstream_params.timeout_ms);
+
+    if (input_streams.size() > 1) {
+        CHECK_SUCCESS_AS_EXPECTED(handle_pix_buffer_splitter_flow(input_streams, vstream_info,
+            std::move(elements), vstreams, vstream_params, pipeline_status, core_op_activated_event,
+            pipeline_latency_accumulator.value()));
+    } else {
+        auto hw_write_elem = HwWriteElement::create(input_stream,
+            PipelineObject::create_element_name("HwWriteEl", input_stream->name(), input_stream->get_info().index),
+            vstream_params.pipeline_elements_stats_flags, pipeline_status);
+        CHECK_EXPECTED(hw_write_elem);
+        elements.insert(elements.begin(), hw_write_elem.value());
+
+        auto should_transform = InputTransformContext::is_transformation_required(input_stream->get_info().shape,
+            vstream_params.user_buffer_format, input_stream->get_info().hw_shape, input_stream->get_info().format,
+            input_stream->get_quant_infos());
+        CHECK_EXPECTED(should_transform);
+
+        if (should_transform.value()) {
+            auto queue_elem = PushQueueElement::create(
+                PipelineObject::create_element_name("PushQEl", input_stream->get_info().name, input_stream->get_info().index),
+                vstream_params, input_stream->get_info().hw_frame_size, pipeline_status);
+            CHECK_EXPECTED(queue_elem);
+            elements.insert(elements.begin(), queue_elem.value());
+            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(queue_elem.value(), hw_write_elem.value()));
+
+            auto pre_infer_elem = PreInferElement::create(input_stream->get_info().shape, vstream_params.user_buffer_format,
+                input_stream->get_info().hw_shape, input_stream->get_info().format, input_stream->get_quant_infos(),
+                PipelineObject::create_element_name("PreInferEl", input_stream->get_info().name, input_stream->get_info().index),
+                vstream_params, pipeline_status);
+            CHECK_EXPECTED(pre_infer_elem);
+            elements.insert(elements.begin(), pre_infer_elem.value());
+            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value()));
+
+            input_stream->set_timeout(user_timeout);
+            auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, pre_infer_elem.release(),
+                hw_write_elem.release(), std::move(elements), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+            CHECK_EXPECTED(vstream);
+            vstreams.emplace_back(vstream.release());
+        } else {
+            input_stream->set_timeout(user_timeout);
+            auto vstream = InputVStream::create(vstream_info, input_stream->get_quant_infos(), vstream_params, hw_write_elem.value(), hw_write_elem.value(),
+                std::move(elements), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+            CHECK_EXPECTED(vstream);
+            vstreams.emplace_back(vstream.release());
+        }
+    }
+
+    for (const auto &vstream : vstreams) {
+       LOGGER__INFO("{}", vstream.get_pipeline_description());
+    }
+
+    return vstreams;
+}
+
+static hailo_vstream_params_t expand_vstream_params_autos(const hailo_stream_info_t &stream_info,
+    const hailo_vstream_params_t &vstream_params)
+{
+    auto local_vstream_params = vstream_params;
+    local_vstream_params.user_buffer_format = HailoRTDefaults::expand_auto_format(vstream_params.user_buffer_format,
+        stream_info.format);
+    return local_vstream_params;
+}
+
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_outputs(std::shared_ptr<OutputStreamBase> output_stream,
+    NameToVStreamParamsMap &vstreams_params_map, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
+{
+    std::vector<std::shared_ptr<PipelineElement>> elements;
+    std::vector<OutputVStream> vstreams;
+
+    if (0 != (HAILO_FORMAT_FLAGS_HOST_ARGMAX & output_stream->get_info().format.flags))
+    {
+        LOGGER__WARNING("Using legacy implementation of Argmax in host. Please re-compile your model with latest DFC version");
+    }
+
+    EventPtr core_op_activated_event = nullptr;
+    if (!output_stream->is_scheduled()) {
+        core_op_activated_event = output_stream->get_core_op_activated_event();
+    }
+
+    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+    assert(!vstreams_params_map.empty());
+
+    // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
+    //       pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags.
+    hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
+    hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE;
+    size_t buffer_pool_size = 0;
+    for (const auto &elem_name_params : vstreams_params_map) {
+        hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
+        hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags;
+        buffer_pool_size += elem_name_params.second.queue_size;
+    }
+
+    // TODO (HRT-4522): Support this measurement
+    CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
+        "Pipeline FPS statistics measurement is not implemented");
+
+	ElementBuildParams build_params{};
+    build_params.elem_stats_flags = hw_read_element_stats_flags;
+    build_params.pipeline_status = pipeline_status;
+    build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE);
+    build_params.shutdown_event = nullptr;
+    build_params.vstream_stats_flags = hw_read_stream_stats_flags;
+    build_params.buffer_pool_size_edges = buffer_pool_size;
+
+    auto hw_read_element = add_hw_read_element(output_stream, elements, "HwReadEl", build_params);
+    CHECK_EXPECTED(hw_read_element);
+
+    if (output_stream->get_info().is_mux) {
+        hailo_status status = add_demux(output_stream, vstreams_params_map, std::move(elements), vstreams, hw_read_element.value(),
+            pipeline_status, output_vstream_infos);
+        CHECK_SUCCESS_AS_EXPECTED(status);
+    } else {
+        auto vstream_info = output_vstream_infos.find(output_stream->name());
+        CHECK_AS_EXPECTED(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
+            "Failed to find vstream info of {}", output_stream->name());
+        assert(1 == vstreams_params_map.size());
+        auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), vstreams_params_map.begin()->second);
+
+        auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
+        CHECK_EXPECTED(pipeline_latency_accumulator);
+
+        auto should_transform = OutputTransformContext::is_transformation_required(output_stream->get_info().hw_shape, 
+            output_stream->get_info().format, output_stream->get_info().shape, 
+            vstream_params.user_buffer_format, output_stream->get_quant_infos());
+        CHECK_EXPECTED(should_transform);
+
+        if (should_transform.value()) {
+            auto pull_queue = PullQueueElement::create(
+                PipelineObject::create_element_name("PullQEl_hw_read", output_stream->name(), output_stream->get_info().index),
+                build_params.timeout, buffer_pool_size, output_stream->get_frame_size(),
+                hw_read_element_stats_flags, hw_read_stream_stats_flags, pipeline_status);
+            CHECK_EXPECTED(pull_queue);
+            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), pull_queue.value()));
+            elements.push_back(pull_queue.value());
+
+            auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements,
+                "PostInferEl", vstream_params);
+            CHECK_EXPECTED(post_infer_element);
+            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pull_queue.value(), post_infer_element.value()));
+
+            auto post_transform_frame_size = HailoRTCommon::get_frame_size(vstream_info->second, vstream_params.user_buffer_format);
+            auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
+                "UserBuffQEl", vstream_params, post_transform_frame_size);
+            CHECK_EXPECTED(user_buffer_queue_element);
+	        CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), user_buffer_queue_element.value()));
+
+            output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+            pull_queue->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+            auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
+                std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+            CHECK_EXPECTED(vstream);
+            vstreams.emplace_back(vstream.release());
+        } else {
+            auto post_transform_frame_size = HailoRTCommon::get_frame_size(vstream_info->second, vstream_params.user_buffer_format);
+            auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
+                "UserBuffQEl", vstream_params, post_transform_frame_size);
+            CHECK_EXPECTED(user_buffer_queue_element);
+            CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), user_buffer_queue_element.value()));
+
+            output_stream->set_timeout(std::chrono::milliseconds(vstream_params.timeout_ms));
+            auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
+                std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+            CHECK_EXPECTED(vstream);
+            vstreams.emplace_back(vstream.release());
+        }
+    }
+
+    for (const auto &vstream : vstreams) {
+        LOGGER__INFO("{}", vstream.get_pipeline_description());
+    }
+
+    return vstreams;
+}
+
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_iou(std::shared_ptr<OutputStreamBase> output_stream,
+    hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata)
+{
+    std::vector<std::shared_ptr<PipelineElement>> elements;
+    std::vector<OutputVStream> vstreams;
+
+    EventPtr core_op_activated_event = nullptr;
+    if (!output_stream->is_scheduled()) {
+        core_op_activated_event = output_stream->get_core_op_activated_event();
+    }
+
+    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+    auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(iou_op_metadata);
+    assert(nullptr != nms_metadata);
+
+    vstream_params.user_buffer_format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format,
+        iou_op_metadata->type(), nms_metadata->nms_config().bbox_only);
+
+    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
+    CHECK_EXPECTED(pipeline_latency_accumulator);
+
+	ElementBuildParams build_params{};
+    build_params.elem_stats_flags = vstream_params.pipeline_elements_stats_flags;
+    build_params.pipeline_status = pipeline_status;
+    build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE);
+    build_params.shutdown_event = nullptr;
+    build_params.vstream_stats_flags = vstream_params.vstream_stats_flags;
+    build_params.buffer_pool_size_edges = vstream_params.queue_size;
+
+    auto hw_read_element = add_hw_read_element(output_stream, elements, "HwReadEl", build_params);
+    CHECK_EXPECTED(hw_read_element);
+
+    auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_hw_read",
+        vstream_params, output_stream->get_frame_size());
+    CHECK_EXPECTED(hw_read_queue_element);
+    hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
+
+    auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements,
+        "PostInferEl", vstream_params);
+    CHECK_EXPECTED(post_infer_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value()));
+
+    auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(output_stream->get_info().nms_info, vstream_params.user_buffer_format);
+    auto pre_nms_convert_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_pre_nms_convert",
+        vstream_params, post_transform_frame_size);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_nms_convert_queue_element.value()));
+
+    auto nms_to_detections_element = add_nms_to_detections_convert_element(output_stream, elements, "NmsFormatToDetectionsEl",
+        iou_op_metadata, build_params);
+    CHECK_EXPECTED(nms_to_detections_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_nms_convert_queue_element.value(), nms_to_detections_element.value()));
+
+    auto pre_remove_overlapping_bboxes_element_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_pre_bboxes_removing",
+        vstream_params, 0);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(nms_to_detections_element.value(), pre_remove_overlapping_bboxes_element_queue_element.value()));
+
+    auto remove_overlapping_bboxes_element = add_remove_overlapping_bboxes_element(output_stream, elements, "RemoveOverlappingBboxesEl",
+        iou_op_metadata, build_params);
+    CHECK_EXPECTED(remove_overlapping_bboxes_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_remove_overlapping_bboxes_element_queue_element.value(), remove_overlapping_bboxes_element.value()));
+
+    auto pre_fill_nms_format_element_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQElt_pre_fill_nms_format",
+        vstream_params, 0);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(remove_overlapping_bboxes_element.value(), pre_fill_nms_format_element_queue_element.value()));
+
+    auto fill_nms_format_element = add_fill_nms_format_element(output_stream, elements, "FillNmsFormatEl",
+        iou_op_metadata, build_params);
+    CHECK_EXPECTED(fill_nms_format_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_fill_nms_format_element_queue_element.value(), fill_nms_format_element.value()));
+
+    auto output_vstream_info = iou_op_metadata->get_output_vstream_info();
+    CHECK_EXPECTED(output_vstream_info);
+    const auto final_frame_size = HailoRTCommon::get_frame_size(*output_vstream_info, vstream_params.user_buffer_format);
+
+    auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
+        "UserBuffQEl", vstream_params, final_frame_size);
+    CHECK_EXPECTED(user_buffer_queue_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(fill_nms_format_element.value(), user_buffer_queue_element.value()));
+    output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+
+    auto vstream = OutputVStream::create(output_vstream_info.value(), output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(),
+        std::move(elements), std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+    CHECK_EXPECTED(vstream);
+    vstreams.emplace_back(vstream.release());
+
+    for (const auto &curr_vstream : vstreams) {
+        LOGGER__INFO("{}", curr_vstream.get_pipeline_description());
+    }
+
+    return vstreams;
+}
+
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_softmax(std::shared_ptr<OutputStreamBase> output_stream,
+    const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
+    const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata)
+{
+    std::vector<std::shared_ptr<PipelineElement>> elements;
+    std::vector<OutputVStream> vstreams;
+
+    EventPtr core_op_activated_event = nullptr;
+    if (!output_stream->is_scheduled()) {
+        core_op_activated_event = output_stream->get_core_op_activated_event();
+    }
+
+    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+    assert(!vstreams_params_map.empty());
+
+    // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
+    //       pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags.
+    hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
+    hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE;
+    size_t buffer_pool_size = 0;
+    for (const auto &elem_name_params : vstreams_params_map) {
+        hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
+        hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags;
+        buffer_pool_size += elem_name_params.second.queue_size;
+    }
+
+    // TODO (HRT-4522): Support this measurement
+    CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
+        "Pipeline FPS statistics measurement is not implemented");
+
+    assert(1 == vstreams_params_map.size());
+    auto op_input_format = softmax_op_metadata->inputs_metadata().begin()->second.format;
+    auto vstream_params = vstreams_params_map.begin()->second;
+    vstream_params.user_buffer_format = net_flow::SoftmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format);
+
+    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
+    CHECK_EXPECTED(pipeline_latency_accumulator);
+
+	ElementBuildParams build_params{};
+    build_params.elem_stats_flags = hw_read_element_stats_flags;
+    build_params.pipeline_status = pipeline_status;
+    build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE);
+    build_params.vstream_stats_flags = hw_read_stream_stats_flags;
+    build_params.shutdown_event = nullptr;
+    build_params.buffer_pool_size_edges = buffer_pool_size;
+
+    auto hw_read_element = add_hw_read_element(output_stream, elements, "HwReadEl", build_params);
+    CHECK_EXPECTED(hw_read_element);
+
+    auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_hw_read",
+        vstream_params, output_stream->get_frame_size());
+    CHECK_EXPECTED(hw_read_queue_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
+
+    auto post_infer_element = add_post_infer_element(output_stream, pipeline_status, elements,
+        "PostInferEl", vstream_params);
+    CHECK_EXPECTED(post_infer_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value()));
+
+    auto post_transform_frame_size = HailoRTCommon::get_frame_size(output_vstream_info, vstream_params.user_buffer_format);
+
+    auto pre_softmax_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_pre_softmax",
+        vstream_params, post_transform_frame_size);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_softmax_queue_element.value()));
+
+    auto softmax_element = add_softmax_element(output_stream, elements, "SoftmaxPPEl", vstream_params, softmax_op_metadata, build_params);
+    CHECK_EXPECTED(softmax_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_softmax_queue_element.value(), softmax_element.value()));
+
+    auto user_buffer_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
+        "UserBuffQEl", vstream_params, post_transform_frame_size);
+    CHECK_EXPECTED(user_buffer_queue_element);
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(softmax_element.value(), user_buffer_queue_element.value()));
+
+    output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+    hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+
+    auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, user_buffer_queue_element.release(), std::move(elements),
+        std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+    CHECK_EXPECTED(vstream);
+    vstreams.emplace_back(vstream.release());
+
+    for (const auto &curr_vstream : vstreams) {
+        LOGGER__INFO("{}", curr_vstream.get_pipeline_description());
+    }
+
+    return vstreams;
+}
+
+InputVStream VStreamsBuilderUtils::create_input(std::shared_ptr<InputVStreamInternal> input_vstream)
+{
+    return InputVStream(std::move(input_vstream));
+}
+
+OutputVStream VStreamsBuilderUtils::create_output(std::shared_ptr<OutputVStreamInternal> output_vstream)
+{
+    return OutputVStream(std::move(output_vstream));
+}
+
+static bool are_formats_equal(const hailo_format_t &format1, const hailo_format_t &format2) {
+    return ((format1.order == format2.order) && (format1.flags == format2.flags) && (format1.type == format2.type));
+}
+
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams,
+    OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params,
+    const std::unordered_map<std::string, net_flow::PostProcessOpMetadataPtr> &post_process_ops_metadata,
+    const std::unordered_map<stream_name_t, op_name_t> &op_inputs_to_op_name, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos_map)
+{
+    auto first_stream_info = output_streams[0]->get_info();
+    if ((HailoRTCommon::is_nms(first_stream_info)) && (first_stream_info.nms_info.is_defused)) {
+        // Case defuse NMS
+        return create_output_nms(output_streams, vstream_params, output_vstream_infos_map);
+    } else if (contains(op_inputs_to_op_name, static_cast<stream_name_t>(first_stream_info.name))) {
+        // Case post-process on host
+        auto &op_name = op_inputs_to_op_name.at(first_stream_info.name);
+        auto &op_metadata = post_process_ops_metadata.at(op_name);
+        switch (op_metadata->type()) {
+        case net_flow::OperationType::YOLOX:
+        case net_flow::OperationType::YOLOV8:
+        case net_flow::OperationType::SSD:
+        case net_flow::OperationType::YOLOV5:
+        case net_flow::OperationType::YOLOV5SEG:
+        case net_flow::OperationType::IOU:
+        {
+            assert(1 <= op_metadata->outputs_metadata().size());
+            auto updated_outputs_metadata = op_metadata->outputs_metadata();
+            auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
+            assert(nullptr != nms_metadata);
+            updated_outputs_metadata.begin()->second.format =
+                net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(vstream_params.user_buffer_format, op_metadata->type(),
+                nms_metadata->nms_config().bbox_only);
+
+            op_metadata->set_outputs_metadata(updated_outputs_metadata);
+            CHECK_SUCCESS_AS_EXPECTED(op_metadata->validate_format_info());
+            std::shared_ptr<hailort::net_flow::Op> op;
+            switch (op_metadata->type()) {
+            case (net_flow::OperationType::YOLOX):
+            {
+                auto metadata = std::dynamic_pointer_cast<net_flow::YoloxOpMetadata>(op_metadata);
+                assert(nullptr != metadata);
+                auto op_expected = net_flow::YOLOXPostProcessOp::create(metadata);
+                CHECK_EXPECTED(op_expected);
+                op = op_expected.release();
+                break;
+            }
+            case (net_flow::OperationType::YOLOV8):
+            {
+                auto metadata = std::dynamic_pointer_cast<net_flow::Yolov8OpMetadata>(op_metadata);
+                assert(nullptr != metadata);
+                auto op_expected = net_flow::YOLOV8PostProcessOp::create(metadata);
+                CHECK_EXPECTED(op_expected);
+                op = op_expected.release();
+                break;
+            }
+            case (net_flow::OperationType::YOLOV5):
+            {
+                auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5OpMetadata>(op_metadata);
+                assert(nullptr != metadata);
+                if (metadata->nms_config().bbox_only) {
+                    auto bbox_only_metadata = std::dynamic_pointer_cast<net_flow::Yolov5BboxOnlyOpMetadata>(op_metadata);
+                    assert(nullptr != bbox_only_metadata);
+                    auto op_expected = net_flow::YOLOv5BboxOnlyPostProcessOp::create(bbox_only_metadata);
+                    CHECK_EXPECTED(op_expected);
+                    op = op_expected.release();
+                    break;
+                } else {
+                    auto op_expected = net_flow::YOLOv5PostProcessOp::create(metadata);
+                    CHECK_EXPECTED(op_expected);
+                    op = op_expected.release();
+                    break;
+                }
+            }
+            case (net_flow::OperationType::YOLOV5SEG):
+            {
+                auto metadata = std::dynamic_pointer_cast<net_flow::Yolov5SegOpMetadata>(op_metadata);
+                assert(nullptr != metadata);
+                auto op_expected = net_flow::Yolov5SegPostProcess::create(metadata);
+                CHECK_EXPECTED(op_expected);
+                op = op_expected.release();
+                break;
+            }
+            case (net_flow::OperationType::SSD):
+            {
+                auto metadata = std::dynamic_pointer_cast<net_flow::SSDOpMetadata>(op_metadata);
+                assert(nullptr != metadata);
+                auto op_expected = net_flow::SSDPostProcessOp::create(metadata);
+                CHECK_EXPECTED(op_expected);
+                op = op_expected.release();
+                break;
+            }
+            case (net_flow::OperationType::IOU):
+            {
+                return create_output_post_process_iou(output_streams[0], vstream_params, op_metadata);
+            }
+            default:
+                break;
+            }
+
+            return create_output_post_process_nms(output_streams, vstream_params, output_vstream_infos_map, op);
+        }
+
+        case net_flow::OperationType::ARGMAX:
+        {
+            assert(output_streams.size() == 1);
+            NameToVStreamParamsMap name_to_vstream_params_map;
+            for (auto &output_stream : all_output_streams) {
+                if (strncmp(output_stream.first->get_info().name, output_streams[0]->get_info().name, HAILO_MAX_STREAM_NAME_SIZE) == 0) {
+                    for (auto &vstream : output_stream.second) {
+                        name_to_vstream_params_map.insert(vstream);
+                    }
+                }
+            }
+            auto output_vstream_info = op_metadata->get_output_vstream_info();
+            CHECK_EXPECTED(output_vstream_info);
+            return create_output_post_process_argmax(output_streams[0], name_to_vstream_params_map, output_vstream_info.release(), op_metadata);
+        }
+
+        case net_flow::OperationType::SOFTMAX:
+        {
+            assert(output_streams.size() == 1);
+            NameToVStreamParamsMap name_to_vstream_params_map;
+            for (auto &output_stream : all_output_streams) {
+                if (strncmp(output_stream.first->get_info().name, output_streams[0]->get_info().name, HAILO_MAX_STREAM_NAME_SIZE) == 0) {
+                    for (auto &vstream : output_stream.second) {
+                        name_to_vstream_params_map.insert(vstream);
+                    }
+                }
+            }
+            auto output_vstream_info = op_metadata->get_output_vstream_info();
+            CHECK_EXPECTED(output_vstream_info);
+            return create_output_post_process_softmax(output_streams[0], name_to_vstream_params_map, output_vstream_info.release(), op_metadata);
+            }
+
+        default:
+            LOGGER__ERROR("op type {} of op {} is not in any of the supported post process OP types", net_flow::OpMetadata::get_operation_type_str(op_metadata->type()), op_name);
+            return make_unexpected(HAILO_INVALID_OPERATION);
+        }
+    } else {
+        // All other cases
+        assert(output_streams.size() == 1);
+        NameToVStreamParamsMap name_to_vstream_params_map;
+        for (auto &output_stream : all_output_streams) {
+            if (strncmp(output_stream.first->get_info().name, output_streams[0]->get_info().name, HAILO_MAX_STREAM_NAME_SIZE) == 0) {
+                for (auto &vstream : output_stream.second) {
+                    name_to_vstream_params_map.insert(vstream);
+                }
+            }
+        }
+        return create_outputs(output_streams[0], name_to_vstream_params_map, output_vstream_infos_map);
+    }
+}
+
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_nms(OutputStreamPtrVector &output_streams,
+    hailo_vstream_params_t vstreams_params,
+    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
+{
+    for (const auto &out_stream : output_streams) {
+        CHECK_AS_EXPECTED(are_formats_equal(output_streams[0]->get_info().format, out_stream->get_info().format),
+            HAILO_INVALID_ARGUMENT, "All nms streams of the same virtual output must have the same format");
+    }
+
+    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+    std::vector<std::shared_ptr<PipelineElement>> elements;
+    std::vector<OutputVStream> vstreams;
+
+    hailo_status status = add_nms_fuse(output_streams, vstreams_params, elements, vstreams,
+        pipeline_status, output_vstream_infos);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    for (const auto &vstream : vstreams) {
+        LOGGER__INFO("{}", vstream.get_pipeline_description());
+    }
+
+    return vstreams;
+}
+
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_nms(OutputStreamPtrVector &output_streams,
+    hailo_vstream_params_t vstreams_params,
+    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
+    const std::shared_ptr<hailort::net_flow::Op> &nms_op)
+{
+    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+    std::vector<std::shared_ptr<PipelineElement>> elements;
+    std::vector<OutputVStream> vstreams;
+
+    hailo_status status = add_nms_post_process(output_streams, vstreams_params, elements, vstreams,
+        pipeline_status, output_vstream_infos, nms_op);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    for (const auto &vstream : vstreams) {
+        LOGGER__INFO("{}", vstream.get_pipeline_description());
+    }
+
+    return vstreams;
+}
+
+Expected<std::shared_ptr<HwReadElement>> VStreamsBuilderUtils::add_hw_read_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const ElementBuildParams &build_params)
+{
+    auto hw_read_elem = HwReadElement::create(output_stream,
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index), 
+        build_params);
+    CHECK_EXPECTED(hw_read_elem);
+    elements.push_back(hw_read_elem.value());
+    return hw_read_elem;
+}
+
+Expected<std::shared_ptr<PullQueueElement>> VStreamsBuilderUtils::add_pull_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
+    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+    const std::string &element_name, const hailo_vstream_params_t &vstream_params, size_t frame_size)
+{
+    auto pull_queue_elem = PullQueueElement::create(
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+        vstream_params, frame_size, pipeline_status);
+    CHECK_EXPECTED(pull_queue_elem);
+    elements.push_back(pull_queue_elem.value());
+    return pull_queue_elem;
+}
+
+Expected<std::shared_ptr<ArgmaxPostProcessElement>> VStreamsBuilderUtils::add_argmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
+    std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, hailo_vstream_params_t &vstream_params, 
+    const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata, const ElementBuildParams &build_params)
+{
+    // Updating metadata according to user request. TODO: HRT-9737
+    auto updated_outputs_metadata = argmax_op_metadata.get()->outputs_metadata();
+    updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
+    auto metadata = std::dynamic_pointer_cast<net_flow::ArgmaxOpMetadata>(argmax_op_metadata);
+    assert(nullptr != metadata);
+    metadata->set_outputs_metadata(updated_outputs_metadata);
+    CHECK_SUCCESS_AS_EXPECTED(metadata->validate_format_info());
+    // Updating metadata according to use request. TODO: HRT-9737 - End
+
+    auto op_expected = net_flow::ArgmaxPostProcessOp::create(metadata);
+    CHECK_EXPECTED(op_expected);
+    auto argmax_op = op_expected.release();
+
+    auto argmax_element = ArgmaxPostProcessElement::create(argmax_op,
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+        build_params);
+    CHECK_EXPECTED(argmax_element);
+    elements.push_back(argmax_element.value());
+    return argmax_element;
+}
+
+Expected<std::shared_ptr<SoftmaxPostProcessElement>> VStreamsBuilderUtils::add_softmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
+    std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, hailo_vstream_params_t &vstream_params,
+    const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata, const ElementBuildParams &build_params)
+{
+    // Updating metadata according to user request. TODO: HRT-9737
+    // Currently softmax only supports inputs to be float32 and order NHWC or NC
+    auto updated_inputs_metadata = softmax_op_metadata.get()->inputs_metadata();
+    updated_inputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
+    auto updated_outputs_metadata = softmax_op_metadata.get()->outputs_metadata();
+    updated_outputs_metadata.begin()->second.format = vstream_params.user_buffer_format;
+    auto metadata = std::dynamic_pointer_cast<net_flow::SoftmaxOpMetadata>(softmax_op_metadata);
+    assert(nullptr != metadata);
+    metadata->set_outputs_metadata(updated_outputs_metadata);
+    metadata->set_inputs_metadata(updated_inputs_metadata);
+    CHECK_SUCCESS_AS_EXPECTED(metadata->validate_format_info());
+    // Updating metadata according to use request. TODO: HRT-9737 - End
+
+    auto op_expected = net_flow::SoftmaxPostProcessOp::create(metadata);
+    CHECK_EXPECTED(op_expected);
+    auto softmax_op = op_expected.release();
+    auto softmax_element = SoftmaxPostProcessElement::create(softmax_op,
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+        build_params.elem_stats_flags, build_params.pipeline_status, build_params.timeout);
+    CHECK_EXPECTED(softmax_element);
+    elements.push_back(softmax_element.value());
+    return softmax_element;
+}
+
+Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> VStreamsBuilderUtils::add_nms_to_detections_convert_element(std::shared_ptr<OutputStreamBase> &output_stream,
+    std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+    const ElementBuildParams &build_params)
+{
+    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
+    assert(nullptr != metadata);
+
+    auto nms_to_detections_element = ConvertNmsToDetectionsElement::create(metadata->nms_info(),
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+        build_params);
+    CHECK_EXPECTED(nms_to_detections_element);
+    elements.push_back(nms_to_detections_element.value());
+    return nms_to_detections_element;
+}
+
+Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> VStreamsBuilderUtils::add_remove_overlapping_bboxes_element(std::shared_ptr<OutputStreamBase> &output_stream,
+    std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+    const ElementBuildParams &build_params)
+{
+    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
+    assert(nullptr != metadata);
+
+    auto remove_overlapping_bboxes_element = RemoveOverlappingBboxesElement::create(metadata->nms_config(),
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+        build_params);
+    CHECK_EXPECTED(remove_overlapping_bboxes_element);
+    elements.push_back(remove_overlapping_bboxes_element.value());
+    return remove_overlapping_bboxes_element;
+}
+
+Expected<std::shared_ptr<FillNmsFormatElement>> VStreamsBuilderUtils::add_fill_nms_format_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
+        const ElementBuildParams &build_params)
+{
+    auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
+    assert(nullptr != metadata);
+
+    auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_config(),
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+        build_params);
+    CHECK_EXPECTED(fill_nms_format_element);
+    elements.push_back(fill_nms_format_element.value());
+    return fill_nms_format_element;
+}
+
+Expected<std::shared_ptr<UserBufferQueueElement>> VStreamsBuilderUtils::add_user_buffer_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
+    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+    const std::string &element_name, const hailo_vstream_params_t &vstream_params, size_t frame_size)
+{
+    auto post_argmax_queue_element = UserBufferQueueElement::create(
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+        vstream_params, frame_size, pipeline_status);
+    CHECK_EXPECTED(post_argmax_queue_element);
+    elements.push_back(post_argmax_queue_element.value());
+    return post_argmax_queue_element;
+}
+
+Expected<std::shared_ptr<PostInferElement>> VStreamsBuilderUtils::add_post_infer_element(std::shared_ptr<OutputStreamBase> &output_stream,
+    std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+    const std::string &element_name, const hailo_vstream_params_t &vstream_params)
+{
+    auto post_infer_element = PostInferElement::create(output_stream->get_info().hw_shape, output_stream->get_info().format,
+        output_stream->get_info().shape, vstream_params.user_buffer_format, output_stream->get_quant_infos(), output_stream->get_info().nms_info,
+        PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
+        vstream_params, pipeline_status);
+    CHECK_EXPECTED(post_infer_element);
+    elements.push_back(post_infer_element.value());
+    return post_infer_element;
+}
+
+Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_process_argmax(std::shared_ptr<OutputStreamBase> output_stream,
+    const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
+    const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata)
+{
+    std::vector<std::shared_ptr<PipelineElement>> elements;
+    std::vector<OutputVStream> vstreams;
+
+    EventPtr core_op_activated_event = nullptr;
+    if (!output_stream->is_scheduled()) {
+        core_op_activated_event = output_stream->get_core_op_activated_event();
+    }
+
+    auto pipeline_status = make_shared_nothrow<std::atomic<hailo_status>>(HAILO_SUCCESS);
+    CHECK_AS_EXPECTED(nullptr != pipeline_status, HAILO_OUT_OF_HOST_MEMORY);
+
+    assert(!vstreams_params_map.empty());
+
+    // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
+    //       pipeline_elements_stats_flags for the hw_read_element as bitwise or of all the flags.
+    hailo_pipeline_elem_stats_flags_t hw_read_element_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
+    hailo_vstream_stats_flags_t hw_read_stream_stats_flags = HAILO_VSTREAM_STATS_NONE;
+    size_t buffer_pool_size = 0;
+    for (const auto &elem_name_params : vstreams_params_map) {
+        hw_read_element_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
+        hw_read_stream_stats_flags |= elem_name_params.second.vstream_stats_flags;
+        buffer_pool_size += elem_name_params.second.queue_size;
+    }
+
+    // TODO (HRT-4522): Support this measurement
+    CHECK_AS_EXPECTED(!(hw_read_stream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_FPS), HAILO_NOT_IMPLEMENTED,
+        "Pipeline FPS statistics measurement is not implemented");
+
+    ElementBuildParams build_params{};
+    build_params.elem_stats_flags = hw_read_element_stats_flags;
+    build_params.pipeline_status = pipeline_status;
+    build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE);
+    build_params.vstream_stats_flags = hw_read_stream_stats_flags;
+    build_params.shutdown_event = nullptr;
+    build_params.buffer_pool_size_edges = buffer_pool_size;
+
+    auto hw_read_element = add_hw_read_element(output_stream, elements, "HwReadEl", build_params);
+    CHECK_EXPECTED(hw_read_element);
+
+    assert(1 == vstreams_params_map.size());
+    auto op_input_format = argmax_op_metadata->inputs_metadata().begin()->second.format;
+    auto vstream_params = vstreams_params_map.begin()->second;
+    vstream_params.user_buffer_format = net_flow::ArgmaxOpMetadata::expand_output_format_autos(vstream_params.user_buffer_format, op_input_format);
+
+    auto hw_read_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_hw_read",
+        vstream_params, output_stream->get_frame_size());
+    CHECK_EXPECTED(hw_read_queue_element);
+
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_element.value(), hw_read_queue_element.value()));
+
+    auto argmax_element = add_argmax_element(output_stream, elements, "ArgmaxPPEl",
+        vstream_params, argmax_op_metadata, build_params);
+    CHECK_EXPECTED(argmax_element);
+
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), argmax_element.value()));
+
+    const auto final_frame_size = HailoRTCommon::get_frame_size(output_vstream_info,
+        vstream_params.user_buffer_format);
+
+    auto post_argmax_queue_element = add_user_buffer_queue_element(output_stream, pipeline_status, elements,
+        "UserBuffQEl_post_argmax", vstream_params, final_frame_size);
+    CHECK_EXPECTED(post_argmax_queue_element);
+
+    CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(argmax_element.value(), post_argmax_queue_element.value()));
+
+    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
+    CHECK_EXPECTED(pipeline_latency_accumulator);
+
+    output_stream->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+    hw_read_queue_element->get()->set_timeout(std::chrono::milliseconds(HAILO_INFINITE));
+    auto vstream = OutputVStream::create(output_vstream_info, output_stream->get_quant_infos(), vstream_params, post_argmax_queue_element.release(), std::move(elements),
+        std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+    CHECK_EXPECTED(vstream);
+    vstreams.emplace_back(vstream.release());
+
+    for (const auto &current_vstream : vstreams) {
+        LOGGER__INFO("{}", current_vstream.get_pipeline_description());
+    }
+
+    return vstreams;
+}
+
+hailo_status VStreamsBuilderUtils::handle_pix_buffer_splitter_flow(std::vector<std::shared_ptr<InputStreamBase>> streams,
+    const hailo_vstream_info_t &vstream_info, std::vector<std::shared_ptr<PipelineElement>> &&base_elements,
+    std::vector<InputVStream> &vstreams, const hailo_vstream_params_t &vstream_params,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr &core_op_activated_event,
+    AccumulatorPtr accumalator)
+{
+    // sorting the streams based on their plane index -> we count on order to know which plane belongs to which stream
+    auto compartor = [](std::shared_ptr<InputStreamBase> a, std::shared_ptr<InputStreamBase> b) {
+        return a->get_layer_info().plane_index < b->get_layer_info().plane_index;
+    };
+    std::sort(streams.begin(), streams.end(), compartor);
+
+    auto duration_collector_expected = DurationCollector::create(vstream_params.pipeline_elements_stats_flags);
+    CHECK_EXPECTED_AS_STATUS(duration_collector_expected);
+
+    auto planes_splitter = PixBufferElement::create(PipelineObject::create_element_name("PixBufferEl",
+        vstream_info.name, 0), std::chrono::milliseconds(HAILO_INFINITE), duration_collector_expected.release(),
+        pipeline_status, vstream_info.format.order);
+    CHECK_EXPECTED_AS_STATUS(planes_splitter);
+    base_elements.push_back(planes_splitter.value());
+
+    uint32_t stream_number = 0;
+
+    for (const auto &stream : streams){
+         auto hw_write_elem = HwWriteElement::create(stream,
+            PipelineObject::create_element_name("HwWriteEl", stream->name(), stream->get_info().index),
+            vstream_params.pipeline_elements_stats_flags, pipeline_status);
+        CHECK_EXPECTED_AS_STATUS(hw_write_elem);
+        base_elements.insert(base_elements.begin(), hw_write_elem.value());
+
+        auto &stream_info = stream->get_info();
+        auto &src_image_shape = stream_info.shape;
+        auto &dst_image_shape = stream_info.hw_shape;
+        auto &dst_format = stream_info.format;
+        auto src_format = vstream_params.user_buffer_format;
+        /* the format order of each plane (stream) is determined by the stream's order.
+            type and flags are determined by the vstream params */
+        src_format.order = dst_format.order;
+        auto quant_infos = std::vector<hailo_quant_info_t>{stream_info.quant_info};
+
+        auto should_transform_expected = InputTransformContext::is_transformation_required(src_image_shape, src_format,
+            dst_image_shape, dst_format, quant_infos);
+        CHECK_EXPECTED_AS_STATUS(should_transform_expected);
+
+        if(should_transform_expected.value()){
+            auto pre_infer_elem = PreInferElement::create(src_image_shape, src_format,
+                dst_image_shape, dst_format, quant_infos, PipelineObject::create_element_name("PreInferEl",
+                stream->get_info().name, stream->get_info().index), vstream_params, pipeline_status);
+
+            CHECK_EXPECTED_AS_STATUS(pre_infer_elem);
+            base_elements.push_back(pre_infer_elem.value());
+
+            auto queue_elem = PushQueueElement::create(
+                PipelineObject::create_element_name("PushQEl", stream_info.name, stream_info.index),
+                vstream_params, stream_info.hw_frame_size, pipeline_status);
+
+            CHECK_EXPECTED_AS_STATUS(queue_elem);
+            base_elements.push_back((queue_elem.value()));
+
+            CHECK_SUCCESS(PipelinePad::link_pads(planes_splitter.value(), pre_infer_elem.value(), stream_number, 0));
+            CHECK_SUCCESS(PipelinePad::link_pads(pre_infer_elem.value(), queue_elem.value()));
+            CHECK_SUCCESS(PipelinePad::link_pads(queue_elem.value(), *hw_write_elem));
+        } else {
+            CHECK_SUCCESS(PipelinePad::link_pads(planes_splitter.value(), *hw_write_elem, stream_number, 0));
+
+        }
+        stream_number++;
+    }
+
+    auto vstream = InputVStream::create(vstream_info, { vstream_info.quant_info }, vstream_params, planes_splitter.value(),
+        nullptr, std::move(base_elements), std::move(pipeline_status), core_op_activated_event, accumalator);
+    CHECK_EXPECTED_AS_STATUS(vstream);
+    vstreams.emplace_back(vstream.release());
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status VStreamsBuilderUtils::add_demux(std::shared_ptr<OutputStreamBase> output_stream, NameToVStreamParamsMap &vstreams_params_map,
+    std::vector<std::shared_ptr<PipelineElement>> &&base_elements, std::vector<OutputVStream> &vstreams,
+    std::shared_ptr<PipelineElement> last_elem, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
+{
+    auto expected_demuxer = OutputDemuxer::create(*output_stream);
+    CHECK_EXPECTED_AS_STATUS(expected_demuxer);
+
+    std::shared_ptr<OutputDemuxer> demuxer_ptr = expected_demuxer.release();
+    CHECK(nullptr != demuxer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    auto status = output_stream->set_timeout(HAILO_INFINITE_TIMEOUT);
+    CHECK_SUCCESS(status);
+
+    // Note: In case of multiple values in vstreams_params_map (e.g. in the case of demux), we'll set the
+    //       pipeline_elements_stats_flags for the demux_elem as bitwise or of all the flags.
+    hailo_pipeline_elem_stats_flags_t demux_elem_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
+    hailo_vstream_stats_flags_t demux_vstream_stats_flags = HAILO_VSTREAM_STATS_NONE;
+    size_t buffer_pool_size = 0;
+    for (const auto &elem_name_params : vstreams_params_map) {
+        demux_elem_stats_flags |= elem_name_params.second.pipeline_elements_stats_flags;
+        demux_vstream_stats_flags |= elem_name_params.second.vstream_stats_flags;
+        buffer_pool_size += elem_name_params.second.queue_size;
+    }
+
+    auto pull_queue_elem = PullQueueElement::create("PreDemuxPullQEl", HAILO_INFINITE_TIMEOUT,
+        buffer_pool_size, output_stream->get_frame_size(), demux_elem_stats_flags, demux_vstream_stats_flags,
+        pipeline_status);
+    CHECK_EXPECTED_AS_STATUS(pull_queue_elem);
+    base_elements.push_back(pull_queue_elem.value());
+    CHECK_SUCCESS(PipelinePad::link_pads(last_elem, pull_queue_elem.value()));
+    last_elem = pull_queue_elem.release();
+
+    auto demux_elem = TransformDemuxElement::create(demuxer_ptr,
+        PipelineObject::create_element_name("TransformDemuxEl", output_stream->name(), output_stream->get_info().index),
+        std::chrono::milliseconds(HAILO_INFINITE), demux_elem_stats_flags, pipeline_status);
+    CHECK_EXPECTED_AS_STATUS(demux_elem);
+    base_elements.push_back(demux_elem.value());
+    CHECK_SUCCESS(PipelinePad::link_pads(last_elem, demux_elem.value()));
+
+    EventPtr core_op_activated_event = nullptr;
+    if (!output_stream->is_scheduled()) {
+        core_op_activated_event = output_stream->get_core_op_activated_event();
+    }
+
+    uint32_t i = 0;
+    for (auto &edge_info : demuxer_ptr->get_edges_stream_info()) {
+        auto name_params_pair = vstreams_params_map.find(edge_info.name);
+        CHECK(name_params_pair != vstreams_params_map.end(), HAILO_NOT_FOUND,
+            "Failed to find vstreams params of edge {}", edge_info.name);
+
+        const auto vstream_info = output_vstream_infos.find(edge_info.name);
+        CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
+            "Failed to find vstream info of {}", edge_info.name);
+
+        const auto vstream_params = expand_vstream_params_autos(output_stream->get_info(), name_params_pair->second);
+
+        // For each mux vstream, we create a copy of the previous elements
+        auto current_vstream_elements = base_elements;
+
+        // For muxed VStreams we use the same pipeline_status for all
+        auto pipeline_status_copy = pipeline_status;
+
+        auto demux_queue_elem = PullQueueElement::create(
+            PipelineObject::create_element_name("PullQueueEl_demux", edge_info.name, edge_info.index),
+            vstream_params, edge_info.hw_frame_size, pipeline_status);
+        CHECK_EXPECTED_AS_STATUS(demux_queue_elem);
+        current_vstream_elements.push_back(demux_queue_elem.value());
+        CHECK_SUCCESS(PipelinePad::link_pads(demux_elem.value(), demux_queue_elem.value(), i, 0));
+
+        CHECK_SUCCESS(demux_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT));
+
+        auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstream_params);
+        CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator);
+        auto should_transform = OutputTransformContext::is_transformation_required(edge_info.hw_shape, 
+            edge_info.format, edge_info.shape, vstream_params.user_buffer_format, std::vector<hailo_quant_info_t>{edge_info.quant_info}); // TODO: Get quant vector (HRT-11077)
+        CHECK_EXPECTED_AS_STATUS(should_transform);
+
+        if (should_transform.value()) {
+            auto post_infer_elem = PostInferElement::create(edge_info.hw_shape, edge_info.format, 
+                edge_info.shape, vstream_params.user_buffer_format, { edge_info.quant_info }, edge_info.nms_info, // TODO: Get quant vector (HRT-11077)
+                PipelineObject::create_element_name("PostInferEl", edge_info.name, edge_info.index),
+                vstream_params, pipeline_status);
+            CHECK_EXPECTED_AS_STATUS(post_infer_elem);
+            current_vstream_elements.push_back(post_infer_elem.value());
+            CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), post_infer_elem.value()));
+
+            auto post_transform_frame_size = HailoRTCommon::get_frame_size(edge_info.shape, vstream_params.user_buffer_format);
+            auto post_infer_queue_elem = UserBufferQueueElement::create(
+                PipelineObject::create_element_name("UserBuffQEl_post_infer", edge_info.name, edge_info.index),
+                vstream_params, post_transform_frame_size, pipeline_status);
+            CHECK_EXPECTED_AS_STATUS(post_infer_queue_elem);
+            current_vstream_elements.push_back(post_infer_queue_elem.value());
+            CHECK_SUCCESS(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value()));
+
+            // TODO: Replace output_stream->get_quant_infos() with mux quant info
+            auto vstream = OutputVStream::create(vstream_info->second, output_stream->get_quant_infos(), vstream_params, post_infer_queue_elem.release(), std::move(current_vstream_elements), // TODO: Get quant vector (HRT-11077)
+                std::move(pipeline_status_copy), core_op_activated_event, pipeline_latency_accumulator.release());
+            CHECK_EXPECTED_AS_STATUS(vstream);
+            vstreams.emplace_back(vstream.release());
+        } else {
+            // TODO: HRT-4179
+            auto user_copy_elem = CopyBufferElement::create(
+                PipelineObject::create_element_name("CopyBufferEl", edge_info.name, edge_info.index),
+                pipeline_status, std::chrono::milliseconds(vstream_params.timeout_ms));
+            CHECK_EXPECTED_AS_STATUS(user_copy_elem);
+            current_vstream_elements.push_back(user_copy_elem.value());
+            CHECK_SUCCESS(PipelinePad::link_pads(demux_queue_elem.value(), user_copy_elem.value()));
+
+            // TODO: Replace output_stream->get_quant_infos() with mux quant info
+            auto vstream = OutputVStream::create(vstream_info->second, { edge_info.quant_info }, vstream_params, user_copy_elem.release(), std::move(current_vstream_elements), // TODO: Get quant vector (HRT-11077)
+                std::move(pipeline_status_copy), core_op_activated_event, pipeline_latency_accumulator.release());
+            CHECK_EXPECTED_AS_STATUS(vstream);
+            vstreams.emplace_back(vstream.release());
+        }
+        i++;
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status VStreamsBuilderUtils::add_nms_fuse(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params,
+    std::vector<std::shared_ptr<PipelineElement>> &elements, std::vector<OutputVStream> &vstreams,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+    const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos)
+{
+    std::vector<hailo_nms_info_t> nms_infos;
+    nms_infos.reserve(output_streams.size());
+    for (const auto &out_stream : output_streams) {
+        CHECK(out_stream->get_info().nms_info.defuse_info.class_group_index <= output_streams.size(),
+            HAILO_INVALID_ARGUMENT, "Not all defused nms outputs were grouped correctly!");
+        nms_infos.emplace_back(out_stream->get_info().nms_info);
+    }
+
+    // To get the fused layer name and src stream format, we use the stream info of one of the defuses
+    auto first_defused_stream_info = output_streams[0]->get_info();
+    auto fused_layer_name = first_defused_stream_info.nms_info.defuse_info.original_name;
+    auto src_stream_format = first_defused_stream_info.format;
+
+    auto vstream_info = output_vstream_infos.find(fused_layer_name);
+    CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
+        "Failed to find vstream info of {}. Could be due to use of old HEF. Try to re-compile network with newer Dataflow Compiler version", fused_layer_name);
+
+    vstreams_params = expand_vstream_params_autos(first_defused_stream_info, vstreams_params);
+    auto nms_elem = NmsMuxElement::create(nms_infos,
+        PipelineObject::create_element_name("NmsMuxEl", fused_layer_name, 0),
+        vstreams_params, pipeline_status);
+    CHECK_EXPECTED_AS_STATUS(nms_elem);
+    auto fused_layer_nms_info = nms_elem.value()->get_fused_nms_info();
+
+    for (uint32_t i = 0; i < output_streams.size(); ++i) {
+
+	    ElementBuildParams build_params{};
+        build_params.elem_stats_flags = vstreams_params.pipeline_elements_stats_flags;
+        build_params.pipeline_status = pipeline_status;
+        build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE);
+        build_params.vstream_stats_flags = vstreams_params.vstream_stats_flags;
+        build_params.shutdown_event = nullptr;
+        build_params.buffer_pool_size_edges = vstreams_params.queue_size;
+
+        const auto &curr_stream_info = output_streams[i]->get_info();
+        output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT);
+
+        auto hw_read_elem = HwReadElement::create(output_streams[i],
+            PipelineObject::create_element_name("HwReadEl", curr_stream_info.name, curr_stream_info.index),
+            build_params);
+        CHECK_EXPECTED_AS_STATUS(hw_read_elem);
+        elements.push_back(hw_read_elem.value());
+
+        auto nms_source_queue_elem = PullQueueElement::create(
+            PipelineObject::create_element_name("PullQueueEl_nms_source", curr_stream_info.name, curr_stream_info.index),
+            vstreams_params, curr_stream_info.hw_frame_size, pipeline_status);
+        CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem);
+        elements.push_back(nms_source_queue_elem.value());
+        nms_source_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT);
+        CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem.value(), nms_source_queue_elem.value()));
+        CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i));
+    }
+    elements.push_back(nms_elem.value());
+
+    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstreams_params);
+    CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator);
+
+    EventPtr core_op_activated_event = nullptr;
+    if (!output_streams[0]->is_scheduled()) {
+        core_op_activated_event = output_streams[0]->get_core_op_activated_event();
+    }
+
+    auto pre_transform_frame_size = HailoRTCommon::get_nms_hw_frame_size(fused_layer_nms_info);
+
+    auto nms_queue_elem = PullQueueElement::create(
+        PipelineObject::create_element_name("PullQEl_post_infer", fused_layer_name, 0),
+        vstreams_params, pre_transform_frame_size, pipeline_status);
+    CHECK_EXPECTED_AS_STATUS(nms_queue_elem);
+    nms_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT);
+    elements.push_back(nms_queue_elem.value());
+    CHECK_SUCCESS(PipelinePad::link_pads(nms_elem.value(), nms_queue_elem.value()));
+
+    auto post_infer_elem = PostInferElement::create({}, src_stream_format,
+        {}, vstreams_params.user_buffer_format, { vstream_info->second.quant_info }, fused_layer_nms_info, // TODO: Get quant vector (HRT-11078)
+        PipelineObject::create_element_name("PostInferEl", fused_layer_name, 0), vstreams_params, pipeline_status);
+    CHECK_EXPECTED_AS_STATUS(post_infer_elem);
+
+    elements.push_back(post_infer_elem.value());
+    CHECK_SUCCESS(PipelinePad::link_pads(nms_queue_elem.value(), post_infer_elem.value()));
+
+    auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(fused_layer_nms_info, vstreams_params.user_buffer_format);
+    auto post_infer_queue_elem = UserBufferQueueElement::create(
+        PipelineObject::create_element_name("UserBufQEl_post_infer", fused_layer_name, 0),
+        vstreams_params, post_transform_frame_size, pipeline_status);
+    CHECK_EXPECTED_AS_STATUS(post_infer_queue_elem);
+    elements.push_back(post_infer_queue_elem.value());
+    CHECK_SUCCESS(PipelinePad::link_pads(post_infer_elem.value(), post_infer_queue_elem.value()));
+
+    // TODO: Check with SDK where should we take the quant infos from (output_streams[0]->get_quant_infos() might be good) (HRT-11078)
+    auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, post_infer_queue_elem.release(), std::move(elements), // TODO: Get quant vector (HRT-11078)
+        std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+    CHECK_EXPECTED_AS_STATUS(vstream);
+    vstreams.emplace_back(vstream.release());
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status VStreamsBuilderUtils::add_nms_post_process(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params,
+    std::vector<std::shared_ptr<PipelineElement>> &elements, std::vector<OutputVStream> &vstreams,
+    std::shared_ptr<std::atomic<hailo_status>> pipeline_status, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
+    const std::shared_ptr<hailort::net_flow::Op> &nms_op)
+{
+    auto first_stream_info = output_streams[0]->get_info();
+    auto op_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(nms_op->metadata());
+    assert(nullptr != op_metadata);
+    vstreams_params.user_buffer_format = net_flow::NmsOpMetadata::expand_output_format_autos_by_op_type(
+        vstreams_params.user_buffer_format, nms_op->metadata()->type(), op_metadata->nms_config().bbox_only);
+    CHECK(vstreams_params.user_buffer_format.type == HAILO_FORMAT_TYPE_FLOAT32, HAILO_INVALID_ARGUMENT,
+        "NMS output format type must be HAILO_FORMAT_TYPE_FLOAT32");
+
+    if (!op_metadata->nms_config().bbox_only) {
+        CHECK(HailoRTCommon::is_nms(vstreams_params.user_buffer_format.order), HAILO_INVALID_ARGUMENT,
+            "NMS output format order must be HAILO_FORMAT_ORDER_HAILO_NMS or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK");
+    }
+
+    std::unordered_map<std::string, net_flow::BufferMetaData> inputs_metadata;
+    std::unordered_map<std::string, net_flow::BufferMetaData> outputs_metadata;
+    for (uint32_t i = 0; i < output_streams.size(); ++i) {
+        const auto &curr_stream_info = output_streams[i]->get_info();
+        net_flow::BufferMetaData input_metadata = {
+            curr_stream_info.shape,
+            curr_stream_info.hw_shape,
+            curr_stream_info.format,
+            curr_stream_info.quant_info
+        };
+        inputs_metadata.insert({curr_stream_info.name, input_metadata});
+    }
+
+    const auto &output_pads = nms_op->outputs_metadata();
+    assert(output_pads.size() == 1);
+    auto vstream_info = output_vstream_infos.find(output_pads.begin()->first);
+    CHECK(vstream_info != output_vstream_infos.end(), HAILO_NOT_FOUND,
+        "Failed to find vstream info of {}", nms_op->metadata()->get_name());
+    net_flow::BufferMetaData output_metadata = {
+        vstream_info->second.shape,
+        vstream_info->second.shape,
+        vstream_info->second.format,
+        vstream_info->second.quant_info
+    };
+    outputs_metadata.insert({vstream_info->first, output_metadata});
+
+    auto nms_elem = NmsPostProcessMuxElement::create(nms_op,
+        PipelineObject::create_element_name("NmsPPMuxEl", nms_op->get_name(), 0),
+        vstreams_params, pipeline_status);
+    CHECK_EXPECTED_AS_STATUS(nms_elem);
+
+    hailo_format_t nms_src_format;
+    nms_src_format.flags = HAILO_FORMAT_FLAGS_NONE;
+    nms_src_format.order = HAILO_FORMAT_ORDER_NHCW;
+    nms_src_format.type = first_stream_info.format.type;
+
+    for (uint32_t i = 0; i < output_streams.size(); ++i) {
+
+        ElementBuildParams build_params{};
+        build_params.elem_stats_flags = vstreams_params.pipeline_elements_stats_flags;
+        build_params.pipeline_status = pipeline_status;
+        build_params.timeout = std::chrono::milliseconds(HAILO_INFINITE);
+        build_params.vstream_stats_flags = vstreams_params.vstream_stats_flags;
+        build_params.shutdown_event = nullptr;
+        build_params.buffer_pool_size_edges = vstreams_params.queue_size;
+
+        const auto &curr_stream_info = output_streams[i]->get_info();
+        output_streams[i]->set_timeout(HAILO_INFINITE_TIMEOUT);
+
+        auto should_transform = OutputTransformContext::is_transformation_required(curr_stream_info.hw_shape, curr_stream_info.format,
+            curr_stream_info.hw_shape, nms_src_format, output_streams[i]->get_quant_infos());
+        CHECK_EXPECTED_AS_STATUS(should_transform);
+
+        CHECK(!(should_transform.value()), HAILO_INVALID_ARGUMENT, "Unexpected transformation required for {}", curr_stream_info.name);
+
+        auto hw_read_elem = HwReadElement::create(output_streams[i],
+            PipelineObject::create_element_name("HwReadEl", curr_stream_info.name, curr_stream_info.index),
+            build_params);
+        CHECK_EXPECTED_AS_STATUS(hw_read_elem);
+        elements.push_back(hw_read_elem.value());
+
+        auto nms_source_queue_elem = PullQueueElement::create(
+            PipelineObject::create_element_name("PullQEl_nms", curr_stream_info.name, curr_stream_info.index),
+            vstreams_params, curr_stream_info.hw_frame_size, pipeline_status);
+        CHECK_EXPECTED_AS_STATUS(nms_source_queue_elem);
+        nms_source_queue_elem.value()->set_timeout(HAILO_INFINITE_TIMEOUT);
+        elements.push_back(nms_source_queue_elem.value());
+        CHECK_SUCCESS(PipelinePad::link_pads(hw_read_elem.value(), nms_source_queue_elem.value()));
+        CHECK_SUCCESS(PipelinePad::link_pads(nms_source_queue_elem.value(), nms_elem.value(), 0, i));
+        nms_elem.value()->add_sink_name(curr_stream_info.name);
+    }
+    elements.push_back(nms_elem.value());
+
+    uint32_t post_transform_frame_size;
+    if (op_metadata->nms_config().bbox_only) {
+        post_transform_frame_size = HailoRTCommon::get_frame_size(vstream_info->second.shape, vstream_info->second.format);
+    } else {
+        post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(vstream_info->second.nms_shape, vstreams_params.user_buffer_format);
+    }
+    auto user_buffer_elem = UserBufferQueueElement::create(
+        PipelineObject::create_element_name("UserBufQEl_post_infer", vstream_info->first, 0),
+        vstreams_params, post_transform_frame_size, pipeline_status);
+    CHECK_EXPECTED_AS_STATUS(user_buffer_elem);
+    elements.push_back(user_buffer_elem.value());
+    CHECK_SUCCESS(PipelinePad::link_pads(nms_elem.value(), user_buffer_elem.value()));
+
+    auto pipeline_latency_accumulator = create_pipeline_latency_accumulator(vstreams_params);
+    CHECK_EXPECTED_AS_STATUS(pipeline_latency_accumulator);
+
+    EventPtr core_op_activated_event = nullptr;
+    if (!output_streams[0]->is_scheduled()) {
+        core_op_activated_event = output_streams[0]->get_core_op_activated_event();
+    }
+
+    // If user uses HailoRT++ we can assume he won't use Output Scale by Feature
+    auto vstream = OutputVStream::create(vstream_info->second, output_streams[0]->get_quant_infos(), vstreams_params, nms_elem.release(), std::move(elements),
+        std::move(pipeline_status), core_op_activated_event, pipeline_latency_accumulator.release());
+    CHECK_EXPECTED_AS_STATUS(vstream);
+    vstreams.emplace_back(vstream.release());
+
+    return HAILO_SUCCESS;
+}
+
+Expected<AccumulatorPtr> VStreamsBuilderUtils::create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params)
+{
+    AccumulatorPtr pipeline_latency_accumulator = nullptr;
+    const auto measure_latency = ((vstreams_params.vstream_stats_flags & HAILO_VSTREAM_STATS_MEASURE_LATENCY) != 0);
+    if (measure_latency) {
+        pipeline_latency_accumulator = make_shared_nothrow<FullAccumulator<double>>("latency");
+        CHECK_AS_EXPECTED(nullptr != pipeline_latency_accumulator, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    return pipeline_latency_accumulator;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp
new file mode 100644
index 00000000..b269003a
--- /dev/null
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp
@@ -0,0 +1,127 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file vstream_builder.hpp
+ * @brief Vstream Builder
+ **/
+
+#ifndef _HAILO_VSTREAM_BUILDER_HPP_
+#define _HAILO_VSTREAM_BUILDER_HPP_
+
+#include "net_flow/pipeline/vstream_internal.hpp"
+
+namespace hailort
+{
+
+class VStreamsBuilderUtils
+{
+public:
+    static Expected<std::vector<InputVStream>> create_inputs(std::vector<std::shared_ptr<InputStreamBase>> input_streams, const hailo_vstream_info_t &input_vstream_infos,
+        const hailo_vstream_params_t &vstreams_params);
+    static Expected<std::vector<OutputVStream>> create_outputs(std::shared_ptr<OutputStreamBase> output_stream,
+        NameToVStreamParamsMap &vstreams_params_map, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
+    static InputVStream create_input(std::shared_ptr<InputVStreamInternal> input_vstream);
+    static OutputVStream create_output(std::shared_ptr<OutputVStreamInternal> output_vstream);
+    static Expected<std::vector<OutputVStream>> create_output_nms(OutputStreamPtrVector &output_streams,
+        hailo_vstream_params_t vstreams_params,
+        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
+    static Expected<std::vector<OutputVStream>> create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams,
+        OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params,
+        const std::unordered_map<std::string, net_flow::PostProcessOpMetadataPtr> &post_process_ops,
+        const std::unordered_map<std::string, std::string> &op_inputs_to_op_name, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos_map);
+    static Expected<std::vector<OutputVStream>> create_output_post_process_nms(OutputStreamPtrVector &output_streams,
+        hailo_vstream_params_t vstreams_params,
+        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
+        const std::shared_ptr<hailort::net_flow::Op> &nms_op);
+    static Expected<std::shared_ptr<HwReadElement>> add_hw_read_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const ElementBuildParams &build_params);
+
+    static Expected<std::shared_ptr<PullQueueElement>> add_pull_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+        const std::string &element_name, const hailo_vstream_params_t &vstream_params, size_t frame_size);
+
+    // Move all post-processes related elements to a dedicated model - HRT-11512
+    static Expected<std::shared_ptr<ArgmaxPostProcessElement>> add_argmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, hailo_vstream_params_t &vstream_params,
+        const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata, const ElementBuildParams &build_params);
+
+    static Expected<std::shared_ptr<SoftmaxPostProcessElement>> add_softmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, hailo_vstream_params_t &vstream_params,
+        const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata, const ElementBuildParams &build_params);
+
+    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> add_nms_to_detections_convert_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, 
+        const ElementBuildParams &build_params);
+
+    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> add_remove_overlapping_bboxes_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
+        const ElementBuildParams &build_params);
+
+    static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
+        const ElementBuildParams &build_params);
+
+    static Expected<std::shared_ptr<UserBufferQueueElement>> add_user_buffer_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+        const std::string &element_name, const hailo_vstream_params_t &vstream_params, size_t frame_size);
+
+    static Expected<std::shared_ptr<PostInferElement>> add_post_infer_element(std::shared_ptr<OutputStreamBase> &output_stream,
+        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
+        const std::string &element_name, const hailo_vstream_params_t &vstream_params);
+
+    static hailo_status add_demux(std::shared_ptr<OutputStreamBase> output_stream, NameToVStreamParamsMap &vstreams_params_map,
+        std::vector<std::shared_ptr<PipelineElement>> &&elements, std::vector<OutputVStream> &vstreams,
+        std::shared_ptr<PipelineElement> last_elem, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
+
+    static hailo_status handle_pix_buffer_splitter_flow(std::vector<std::shared_ptr<InputStreamBase>> streams,
+        const hailo_vstream_info_t &vstream_info, std::vector<std::shared_ptr<PipelineElement>> &&base_elements,
+        std::vector<InputVStream> &vstreams, const hailo_vstream_params_t &vstream_params,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr &core_op_activated_event,
+        AccumulatorPtr accumaltor);
+
+    static hailo_status add_nms_fuse(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, std::vector<OutputVStream> &vstreams,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
+
+    static hailo_status add_nms_post_process(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params,
+        std::vector<std::shared_ptr<PipelineElement>> &elements, std::vector<OutputVStream> &vstreams,
+        std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
+        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
+        const std::shared_ptr<hailort::net_flow::Op> &nms_op);
+
+    static Expected<AccumulatorPtr> create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params);
+
+    static hailo_format_t expand_user_buffer_format_autos_multi_planar(const hailo_vstream_info_t &vstream_info,
+        const hailo_format_t &user_buffer_format)
+    {
+        /* In multi planar case we compare to vstream_info instead of stream_info,
+            as the ll-streams formats doesnt indicate the format of the vstreams */
+        auto expanded_user_buffer_format = user_buffer_format;
+        if (HAILO_FORMAT_TYPE_AUTO == expanded_user_buffer_format.type) {
+            expanded_user_buffer_format.type = vstream_info.format.type;
+        }
+        if (HAILO_FORMAT_ORDER_AUTO == expanded_user_buffer_format.order) {
+            expanded_user_buffer_format.order = vstream_info.format.order;
+        }
+
+        return expanded_user_buffer_format;
+    }
+
+private:
+    static Expected<std::vector<OutputVStream>> create_output_post_process_argmax(std::shared_ptr<OutputStreamBase> output_stream,
+        const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
+        const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata);
+    static Expected<std::vector<OutputVStream>> create_output_post_process_softmax(std::shared_ptr<OutputStreamBase> output_stream,
+        const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
+        const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata);
+    static Expected<std::vector<OutputVStream>> create_output_post_process_iou(std::shared_ptr<OutputStreamBase> output_stream,
+        hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata);
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_VSTREAM_BUILDER_HPP_ */
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
index a78932b7..779ced76 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
@@ -31,8 +31,13 @@
 
 #include "stream_common/stream_internal.hpp"
 
-#include "hef/hef_internal.hpp"
+#include "common/barrier.hpp"
+
 #include "net_flow/pipeline/pipeline.hpp"
+#include "net_flow/pipeline/filter_elements.hpp"
+#include "net_flow/pipeline/queue_elements.hpp"
+#include "net_flow/pipeline/edge_elements.hpp"
+#include "net_flow/pipeline/multi_io_elements.hpp"
 #include "net_flow/ops/yolov5_post_process.hpp"
 #include "network_group/network_group_internal.hpp"
 
@@ -79,7 +84,7 @@ class BaseVStream
 protected:
     BaseVStream(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator,
         EventPtr &&core_op_activated_event, hailo_status &output_status);
     BaseVStream() = default;
 
@@ -94,7 +99,6 @@ class BaseVStream
     volatile bool m_is_activated;
     volatile bool m_is_aborted;
     std::shared_ptr<std::atomic<hailo_status>> m_pipeline_status;
-    EventPtr m_shutdown_event;
     EventPtr m_core_op_activated_event;
     std::map<std::string, AccumulatorPtr> m_fps_accumulators;
     std::map<std::string, AccumulatorPtr> m_latency_accumulators;
@@ -109,7 +113,7 @@ class InputVStreamInternal : public BaseVStream
     static Expected<std::shared_ptr<InputVStreamInternal>> create(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
         const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
         std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr core_op_activated_event,
         AccumulatorPtr pipeline_latency_accumulator);
     InputVStreamInternal(InputVStreamInternal &&other) noexcept = default;
     InputVStreamInternal &operator=(InputVStreamInternal &&other) noexcept = default;
@@ -125,7 +129,7 @@ class InputVStreamInternal : public BaseVStream
 protected:
     InputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator,
         EventPtr &&core_op_activated_event, hailo_status &output_status);
     InputVStreamInternal() = default;
 };
@@ -137,7 +141,7 @@ class OutputVStreamInternal : public BaseVStream
     static Expected<std::shared_ptr<OutputVStreamInternal>> create(
         const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
         EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator);
     OutputVStreamInternal(OutputVStreamInternal &&other) noexcept = default;
     OutputVStreamInternal &operator=(OutputVStreamInternal &&other) noexcept = default;
@@ -150,11 +154,12 @@ class OutputVStreamInternal : public BaseVStream
     virtual hailo_status set_nms_score_threshold(float32_t threshold) = 0;
     virtual hailo_status set_nms_iou_threshold(float32_t threshold) = 0;
     virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) = 0;
+    virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) = 0;
 
 protected:
     OutputVStreamInternal(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator,
         EventPtr core_op_activated_event, hailo_status &output_status);
     OutputVStreamInternal() = default;
 };
@@ -165,7 +170,7 @@ class InputVStreamImpl : public InputVStreamInternal
     static Expected<std::shared_ptr<InputVStreamImpl>> create(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos,
         const hailo_vstream_params_t &vstream_params, std::shared_ptr<PipelineElement> pipeline_entry,
         std::shared_ptr<SinkElement> pipeline_exit, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, EventPtr core_op_activated_event,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr core_op_activated_event,
         AccumulatorPtr pipeline_latency_accumulator);
     InputVStreamImpl(InputVStreamImpl &&) noexcept = default;
     InputVStreamImpl(const InputVStreamImpl &) = delete;
@@ -181,7 +186,7 @@ class InputVStreamImpl : public InputVStreamInternal
 private:
     InputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator,
         EventPtr core_op_activated_event, hailo_status &output_status);
 
     bool m_is_multi_planar;
@@ -193,7 +198,7 @@ class OutputVStreamImpl : public OutputVStreamInternal
     static Expected<std::shared_ptr<OutputVStreamImpl>> create(
         const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
         EventPtr core_op_activated_event, AccumulatorPtr pipeline_latency_accumulator);
     OutputVStreamImpl(OutputVStreamImpl &&) noexcept = default;
     OutputVStreamImpl(const OutputVStreamImpl &) = delete;
@@ -206,11 +211,12 @@ class OutputVStreamImpl : public OutputVStreamInternal
     virtual hailo_status set_nms_score_threshold(float32_t threshold) override;
     virtual hailo_status set_nms_iou_threshold(float32_t threshold) override;
     virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override;
+    virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) override;
 
 private:
     OutputVStreamImpl(const hailo_vstream_info_t &vstream_info, const std::vector<hailo_quant_info_t> &quant_infos, const hailo_vstream_params_t &vstream_params,
         std::shared_ptr<PipelineElement> pipeline_entry, std::vector<std::shared_ptr<PipelineElement>> &&pipeline,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr shutdown_event, AccumulatorPtr pipeline_latency_accumulator,
+        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, AccumulatorPtr pipeline_latency_accumulator,
         EventPtr core_op_activated_event, hailo_status &output_status);
 };
 
@@ -294,6 +300,7 @@ class OutputVStreamClient : public OutputVStreamInternal
     virtual hailo_status set_nms_score_threshold(float32_t threshold) override;
     virtual hailo_status set_nms_iou_threshold(float32_t threshold) override;
     virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override;
+    virtual hailo_status set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size) override;
 
 private:
     OutputVStreamClient(std::unique_ptr<HailoRtRpcClient> client, const VStreamIdentifier &&identifier, hailo_format_t &&user_buffer_format,
@@ -308,640 +315,6 @@ class OutputVStreamClient : public OutputVStreamInternal
 };
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
-class PreInferElement : public FilterElement
-{
-public:
-    static Expected<std::shared_ptr<PreInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
-        const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
-        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<PreInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos, const std::string &name,
-        const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<PreInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PUSH, bool is_dma_able = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    PreInferElement(std::unique_ptr<InputTransformContext> &&transform_context, BufferPoolPtr buffer_pool,
-        const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
-        std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~PreInferElement() = default;
-
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual PipelinePad &next_pad() override;
-    virtual std::string description() const override;
-
-protected:
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
-
-private:
-    std::unique_ptr<InputTransformContext> m_transform_context;
-};
-
-class RemoveOverlappingBboxesElement : public FilterElement
-{
-public:
-    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> create(
-        const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
-        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> create(const net_flow::NmsPostProcessConfig nms_config,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, 
-        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    RemoveOverlappingBboxesElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~RemoveOverlappingBboxesElement() = default;
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual PipelinePad &next_pad() override;
-    virtual std::string description() const override;
-
-    virtual hailo_status set_nms_iou_threshold(float32_t threshold)
-    {
-        m_nms_config.nms_iou_th = threshold;
-        return HAILO_SUCCESS;
-    }
-
-protected:
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
-
-private:
-    net_flow::NmsPostProcessConfig m_nms_config;
-};
-
-class PostInferElement : public FilterElement
-{
-public:
-    static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape,
-        const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
-        const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
-        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape, const hailo_format_t &src_format,
-        const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_info, const hailo_nms_info_t &nms_info,
-        const std::string &name, const hailo_vstream_params_t &vstream_params, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr shutdown_event,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<PostInferElement>> create(const hailo_3d_image_shape_t &src_image_shape,
-        const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape, const hailo_format_t &dst_format,
-        const std::vector<hailo_quant_info_t> &dst_quant_infos, const hailo_nms_info_t &nms_info, const std::string &name,
-        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    PostInferElement(std::unique_ptr<OutputTransformContext> &&transform_context, const std::string &name,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool,
-        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~PostInferElement() = default;
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual PipelinePad &next_pad() override;
-    virtual std::string description() const override;
-
-protected:
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
-
-private:
-    std::unique_ptr<OutputTransformContext> m_transform_context;
-};
-
-class ConvertNmsToDetectionsElement : public FilterElement
-{
-public:
-    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> create(const hailo_nms_info_t &nms_info, const std::string &name,
-        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> create(
-        const hailo_nms_info_t &nms_info, const std::string &name, const ElementBuildParams &build_params,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    ConvertNmsToDetectionsElement(const hailo_nms_info_t &&nms_info, const std::string &name, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~ConvertNmsToDetectionsElement() = default;
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual PipelinePad &next_pad() override;
-    virtual std::string description() const override;
-
-protected:
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
-
-private:
-    hailo_nms_info_t m_nms_info;
-};
-
-class FillNmsFormatElement : public FilterElement
-{
-public:
-    static Expected<std::shared_ptr<FillNmsFormatElement>> create(const hailo_nms_info_t nms_info,
-        const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
-        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        size_t buffer_pool_size, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<FillNmsFormatElement>> create(const hailo_nms_info_t nms_info,
-        const hailo_format_t &dst_format, const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
-        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, BufferPoolPtr buffer_pool, std::chrono::milliseconds timeout,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~FillNmsFormatElement() = default;
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual PipelinePad &next_pad() override;
-    virtual std::string description() const override;
-
-    virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class) override
-    {
-        m_nms_config.max_proposals_per_class = max_proposals_per_class;
-        return HAILO_SUCCESS;
-    }
-
-protected:
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
-
-private:
-    net_flow::NmsPostProcessConfig m_nms_config;
-};
-
-class ArgmaxPostProcessElement : public FilterElement
-{
-public:
-    static Expected<std::shared_ptr<ArgmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> argmax_op,
-        const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout,
-        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<ArgmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> argmax_op,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    ArgmaxPostProcessElement(std::shared_ptr<net_flow::Op> argmax_op, const std::string &name,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction,
-        std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~ArgmaxPostProcessElement() = default;
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual PipelinePad &next_pad() override;
-    virtual std::string description() const override;
-    
-protected:
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
-
-private:
-    std::shared_ptr<net_flow::Op> m_argmax_op;
-};
-
-class SoftmaxPostProcessElement : public FilterElement
-{
-public:
-    static Expected<std::shared_ptr<SoftmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> softmax_op,
-        const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, size_t buffer_pool_size, std::chrono::milliseconds timeout,
-        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<SoftmaxPostProcessElement>> create(std::shared_ptr<net_flow::Op> softmax_op,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    SoftmaxPostProcessElement(std::shared_ptr<net_flow::Op> softmax_op, const std::string &name,
-        DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        std::chrono::milliseconds timeout, BufferPoolPtr buffer_pool, PipelineDirection pipeline_direction,
-        std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~SoftmaxPostProcessElement() = default;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual PipelinePad &next_pad() override;
-    virtual std::string description() const override;
-
-protected:
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
-
-private:
-    std::shared_ptr<net_flow::Op> m_softmax_op;
-};
-
-class NmsPostProcessMuxElement : public BaseMuxElement
-{
-public:
-    static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
-        const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size,
-        hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<NmsPostProcessMuxElement>> create(std::shared_ptr<net_flow::Op> nms_op,
-        const std::string &name, const hailo_vstream_params_t &vstream_params,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    NmsPostProcessMuxElement(std::shared_ptr<net_flow::Op> nms_op, BufferPoolPtr &&pool, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
-        std::shared_ptr<AsyncPipeline> async_pipeline);
-
-    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
-    void add_sink_name(const std::string &name) // TODO: remove this (HRT-8875)
-    {
-        m_sinks_names.push_back(name);
-    }
-
-    std::shared_ptr<net_flow::Op> get_op() { return m_nms_op; }
-
-    virtual hailo_status set_nms_score_threshold(float32_t threshold)
-    {
-        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
-        assert(nullptr != nms_metadata);
-        nms_metadata->nms_config().nms_score_th = threshold;
-
-        return HAILO_SUCCESS;
-    }
-
-    virtual hailo_status set_nms_iou_threshold(float32_t threshold)
-    {
-        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
-        assert(nullptr != nms_metadata);
-        nms_metadata->nms_config().nms_iou_th = threshold;
-
-        return HAILO_SUCCESS;
-    }
-
-    virtual hailo_status set_nms_max_proposals_per_class(uint32_t max_proposals_per_class)
-    {
-        auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
-        assert(nullptr != nms_metadata);
-        nms_metadata->nms_config().max_proposals_per_class = max_proposals_per_class;
-
-        return HAILO_SUCCESS;
-    }
-
-protected:
-    virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) override;
-
-private:
-    std::shared_ptr<net_flow::Op> m_nms_op;
-    std::vector<std::string> m_sinks_names; // TODO: remove this (HRT-8875)
-};
-
-class NmsMuxElement : public BaseMuxElement
-{
-public:
-    static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos,
-        const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
-        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos, const std::string &name,
-        const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<NmsMuxElement>> create(const std::vector<hailo_nms_info_t> &nms_infos,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    NmsMuxElement(const std::vector<hailo_nms_info_t> &nms_infos, const hailo_nms_info_t &fused_nms_info, BufferPoolPtr &&pool, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    const hailo_nms_info_t &get_fused_nms_info() const;
-
-    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
-
-protected:
-    virtual Expected<PipelineBuffer> action(std::vector<PipelineBuffer> &&inputs, PipelineBuffer &&optional) override;
-
-private:
-    std::vector<hailo_nms_info_t> m_nms_infos;
-    hailo_nms_info_t m_fused_nms_info;
-};
-
-class TransformDemuxElement : public BaseDemuxElement
-{
-public:
-    static Expected<std::shared_ptr<TransformDemuxElement>> create(std::shared_ptr<OutputDemuxer> demuxer,
-        const std::string &name, std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
-        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PULL, bool is_last_copy_element = false,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    static Expected<std::shared_ptr<TransformDemuxElement>> create(std::shared_ptr<OutputDemuxer> demuxer,
-        const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
-        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    TransformDemuxElement(std::shared_ptr<OutputDemuxer> demuxer, std::vector<BufferPoolPtr> &&pools, const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
-
-protected:
-    virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input) override;
-
-private:
-    std::shared_ptr<OutputDemuxer> m_demuxer;
-};
-
-class PixBufferElement : public BaseDemuxElement
-{
-public:
-    static Expected<std::shared_ptr<PixBufferElement>> create(const std::string &name,
-        std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, hailo_format_order_t order,
-        std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-
-    PixBufferElement(const std::string &name, std::chrono::milliseconds timeout, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, hailo_format_order_t order,
-        std::shared_ptr<AsyncPipeline> async_pipeline);
-
-    virtual Expected<bool> can_push_buffer_upstream(const std::string &pad_name) override;
-
-protected:
-    virtual Expected<std::vector<PipelineBuffer>> action(PipelineBuffer &&input);
-    hailo_format_order_t m_order;
-};
-
-
-class HwReadElement : public SourceElement
-{
-public:
-    static Expected<std::shared_ptr<HwReadElement>> create(std::shared_ptr<OutputStreamBase> stream, const std::string &name, std::chrono::milliseconds timeout,
-        size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags, hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, PipelineDirection pipeline_direction = PipelineDirection::PULL);
-    HwReadElement(std::shared_ptr<OutputStreamBase> stream, BufferPoolPtr buffer_pool, const std::string &name, std::chrono::milliseconds timeout,
-        DurationCollector &&duration_collector, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        PipelineDirection pipeline_direction);
-    virtual ~HwReadElement() = default;
-
-    virtual std::vector<AccumulatorPtr> get_queue_size_accumulators() override;
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual hailo_status execute_activate() override;
-    virtual hailo_status execute_deactivate() override;
-    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
-    virtual hailo_status execute_clear() override;
-    virtual hailo_status execute_flush() override;
-    virtual hailo_status execute_abort() override;
-    virtual hailo_status execute_clear_abort() override;
-    virtual hailo_status execute_wait_for_finish() override;
-    uint32_t get_invalid_frames_count();
-    virtual std::string description() const override;
-
-private:
-    std::shared_ptr<OutputStreamBase> m_stream;
-    BufferPoolPtr m_pool;
-    std::chrono::milliseconds m_timeout;
-    EventPtr m_shutdown_event;
-    WaitOrShutdown m_activation_wait_or_shutdown;
-};
-
-class HwWriteElement : public SinkElement
-{
-public:
-    static Expected<std::shared_ptr<HwWriteElement>> create(std::shared_ptr<InputStreamBase> stream, const std::string &name,
-        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    HwWriteElement(std::shared_ptr<InputStreamBase> stream, const std::string &name, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, EventPtr got_flush_event, PipelineDirection pipeline_direction);
-    virtual ~HwWriteElement() = default;
-
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual hailo_status execute_activate() override;
-    virtual hailo_status execute_deactivate() override;
-    virtual hailo_status execute_post_deactivate(bool should_clear_abort) override;
-    virtual hailo_status execute_clear() override;
-    virtual hailo_status execute_flush() override;
-    virtual hailo_status execute_abort() override;
-    virtual hailo_status execute_clear_abort() override;
-    virtual hailo_status execute_wait_for_finish() override;
-    virtual std::string description() const override;
-
-private:
-    std::shared_ptr<InputStreamBase> m_stream;
-    EventPtr m_got_flush_event;
-};
-
-class LastAsyncElement : public SinkElement
-{
-public:
-    static Expected<std::shared_ptr<LastAsyncElement>> create(const std::string &name,
-        hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    static Expected<std::shared_ptr<LastAsyncElement>> create(const std::string &name, const ElementBuildParams &build_params,
-        std::shared_ptr<AsyncPipeline> async_pipeline, PipelineDirection pipeline_direction = PipelineDirection::PUSH);
-    LastAsyncElement(const std::string &name, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~LastAsyncElement() = default;
-
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-    virtual std::string description() const override;
-    virtual hailo_status execute_activate() override;
-    virtual hailo_status execute_wait_for_finish() override;
-
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-
-    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
-    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
-
-    virtual hailo_status execute_post_deactivate(bool /*should_clear_abort*/) override { return HAILO_SUCCESS; };
-    virtual hailo_status execute_deactivate() override { return HAILO_SUCCESS; };
-};
-
-// Note: This element does infer - it sends writes to HW and reads the outputs
-class AsyncHwElement : public PipelineElementInternal
-{
-public:
-    static Expected<std::shared_ptr<AsyncHwElement>> create(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos,
-        std::chrono::milliseconds timeout, size_t buffer_pool_size, hailo_pipeline_elem_stats_flags_t elem_flags,
-        hailo_vstream_stats_flags_t vstream_flags, EventPtr shutdown_event, const std::string &name,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::shared_ptr<ConfiguredNetworkGroup> net_group, PipelineDirection pipeline_direction = PipelineDirection::PUSH,
-        bool is_last_copy_element = false, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    AsyncHwElement(const std::unordered_map<std::string, hailo_stream_info_t> &named_stream_infos, std::chrono::milliseconds timeout,
-        std::vector<BufferPoolPtr> &&output_streams_pools, const std::string &name, DurationCollector &&duration_collector,
-        std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, PipelineDirection pipeline_direction,
-        std::shared_ptr<AsyncPipeline> async_pipeline, std::shared_ptr<ConfiguredNetworkGroup> net_group,
-        const size_t max_ongoing_transfers);
-    virtual ~AsyncHwElement() = default;
-
-    virtual void run_push_async(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
-    virtual Expected<PipelineBuffer> run_pull(PipelineBuffer &&optional, const PipelinePad &source) override;
-
-    virtual hailo_status enqueue_execution_buffer(MemoryView mem_view, const TransferDoneCallbackAsyncInfer &exec_done, const std::string &source_name) override;
-    virtual Expected<bool> can_push_buffer_upstream(const uint32_t source_index) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const uint32_t source_index) override;
-    virtual Expected<bool> can_push_buffer_upstream(const std::string &source_name) override;
-    virtual hailo_status fill_buffer_pool(bool is_dma_able, size_t num_of_buffers, const std::string &source_name) override;
-
-    Expected<uint32_t> get_source_index_from_output_stream_name(const std::string &output_stream_name);
-    Expected<uint32_t> get_sink_index_from_input_stream_name(const std::string &input_stream_name);
-    virtual Expected<uint32_t> get_source_index_from_source_name(const std::string &source_name) override;
-
-protected:
-    virtual std::vector<PipelinePad*> execution_pads() override;
-    virtual hailo_status execute_terminate(hailo_status error_status) override;
-
-private:
-    void handle_error_in_hw_async_elem(hailo_status error_status);
-    bool has_all_sinks_arrived();
-    virtual hailo_status execute_dequeue_user_buffers(hailo_status error_status) override;
-
-    std::chrono::milliseconds m_timeout;
-    std::vector<BufferPoolPtr> m_pools;
-    std::shared_ptr<ConfiguredNetworkGroup> m_net_group;
-    size_t m_max_ongoing_transfers;
-
-    std::unordered_map<std::string, std::string> m_sink_name_to_stream_name;
-    std::unordered_map<std::string, std::string> m_source_name_to_stream_name;
-    std::unordered_map<std::string, bool> m_sink_has_arrived;
-    std::unordered_map<std::string, PipelineBuffer> m_input_buffers;
-    std::mutex m_mutex;
-    std::condition_variable m_cv;
-    std::unordered_map<std::string, uint32_t> m_source_name_to_index;
-    std::unordered_map<std::string, uint32_t> m_sink_name_to_index;
-};
-
-class CopyBufferElement : public FilterElement
-{
-public:
-    static Expected<std::shared_ptr<CopyBufferElement>> create(const std::string &name, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL, std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
-    CopyBufferElement(const std::string &name, DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
-    virtual ~CopyBufferElement() = default;
-    virtual PipelinePad &next_pad() override;
-
-protected:
-    virtual Expected<PipelineBuffer> action(PipelineBuffer &&input, PipelineBuffer &&optional) override;
-};
-
-class VStreamsBuilderUtils
-{
-public:
-    static Expected<std::vector<InputVStream>> create_inputs(std::vector<std::shared_ptr<InputStreamBase>> input_streams, const hailo_vstream_info_t &input_vstream_infos,
-        const hailo_vstream_params_t &vstreams_params);
-    static Expected<std::vector<OutputVStream>> create_outputs(std::shared_ptr<OutputStreamBase> output_stream,
-        NameToVStreamParamsMap &vstreams_params_map, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
-    static InputVStream create_input(std::shared_ptr<InputVStreamInternal> input_vstream);
-    static OutputVStream create_output(std::shared_ptr<OutputVStreamInternal> output_vstream);
-    static Expected<std::vector<OutputVStream>> create_output_nms(OutputStreamPtrVector &output_streams,
-        hailo_vstream_params_t vstreams_params,
-        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
-    static Expected<std::vector<OutputVStream>> create_output_vstreams_from_streams(const OutputStreamWithParamsVector &all_output_streams,
-        OutputStreamPtrVector &output_streams, const hailo_vstream_params_t &vstream_params,
-        const std::unordered_map<std::string, net_flow::PostProcessOpMetadataPtr> &post_process_ops,
-        const std::unordered_map<std::string, std::string> &op_inputs_to_op_name, const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos_map);
-    static Expected<std::vector<OutputVStream>> create_output_post_process_nms(OutputStreamPtrVector &output_streams,
-        hailo_vstream_params_t vstreams_params,
-        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
-        const std::shared_ptr<hailort::net_flow::Op> &nms_op);
-    static Expected<std::shared_ptr<HwReadElement>> add_hw_read_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, EventPtr &shutdown_event, size_t buffer_pool_size,
-        const hailo_pipeline_elem_stats_flags_t &hw_read_element_stats_flags, const hailo_vstream_stats_flags_t &hw_read_stream_stats_flags);
-
-    static Expected<std::shared_ptr<PullQueueElement>> add_pull_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params);
-
-    // Move all post-processes related elements to a dedicated model - HRT-11512
-    static Expected<std::shared_ptr<ArgmaxPostProcessElement>> add_argmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &argmax_op,
-        size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags,
-        EventPtr &shutdown_event);
-
-    static Expected<std::shared_ptr<SoftmaxPostProcessElement>> add_softmax_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &softmax_op,
-        size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags,
-        EventPtr &shutdown_event);
-
-    static Expected<std::shared_ptr<ConvertNmsToDetectionsElement>> add_nms_to_detections_convert_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name,
-        hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata, size_t buffer_pool_size, std::chrono::milliseconds timeout,
-        const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event);
-
-    static Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> add_remove_overlapping_bboxes_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
-        size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event);
-
-    static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, hailo_vstream_params_t &vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
-        size_t buffer_pool_size, std::chrono::milliseconds timeout, const hailo_vstream_stats_flags_t &vstream_flags, EventPtr &shutdown_event);
-
-    static Expected<std::shared_ptr<UserBufferQueueElement>> add_user_buffer_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, EventPtr &shutdown_event, const hailo_vstream_params_t &vstream_params);
-
-    static Expected<std::shared_ptr<PostInferElement>> add_post_infer_element(std::shared_ptr<OutputStreamBase> &output_stream,
-        std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
-        const std::string &element_name, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event);
-
-    static hailo_status add_demux(std::shared_ptr<OutputStreamBase> output_stream, NameToVStreamParamsMap &vstreams_params_map,
-        std::vector<std::shared_ptr<PipelineElement>> &&elements, std::vector<OutputVStream> &vstreams,
-        std::shared_ptr<HwReadElement> hw_read_elem, EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
-
-    static hailo_status handle_pix_buffer_splitter_flow(std::vector<std::shared_ptr<InputStreamBase>> streams,
-        const hailo_vstream_info_t &vstream_info, std::vector<std::shared_ptr<PipelineElement>> &&base_elements,
-        std::vector<InputVStream> &vstreams, const hailo_vstream_params_t &vstream_params, EventPtr shutdown_event,
-        std::shared_ptr<std::atomic<hailo_status>> pipeline_status, EventPtr &core_op_activated_event,
-        AccumulatorPtr accumaltor);
-
-    static hailo_status add_nms_fuse(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params,
-        std::vector<std::shared_ptr<PipelineElement>> &elements, std::vector<OutputVStream> &vstreams,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos);
-
-    static hailo_status add_nms_post_process(OutputStreamPtrVector &output_streams, hailo_vstream_params_t &vstreams_params,
-        std::vector<std::shared_ptr<PipelineElement>> &elements, std::vector<OutputVStream> &vstreams,
-        EventPtr shutdown_event, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        const std::map<std::string, hailo_vstream_info_t> &output_vstream_infos,
-        const std::shared_ptr<hailort::net_flow::Op> &nms_op);
-
-    static Expected<AccumulatorPtr> create_pipeline_latency_accumulator(const hailo_vstream_params_t &vstreams_params);
-
-    static hailo_format_t expand_user_buffer_format_autos_multi_planar(const hailo_vstream_info_t &vstream_info,
-        const hailo_format_t &user_buffer_format)
-    {
-        /* In multi planar case we compare to vstream_info instead of stream_info,
-            as the ll-streams formats doesnt indicate the format of the vstreams */
-        auto expanded_user_buffer_format = user_buffer_format;
-        if (HAILO_FORMAT_TYPE_AUTO == expanded_user_buffer_format.type) {
-            expanded_user_buffer_format.type = vstream_info.format.type;
-        }
-        if (HAILO_FORMAT_ORDER_AUTO == expanded_user_buffer_format.order) {
-            expanded_user_buffer_format.order = vstream_info.format.order;
-        }
-
-        return expanded_user_buffer_format;
-    }
-
-private:
-    static Expected<std::vector<OutputVStream>> create_output_post_process_argmax(std::shared_ptr<OutputStreamBase> output_stream,
-        const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
-        const net_flow::PostProcessOpMetadataPtr &argmax_op_metadata);
-    static Expected<std::vector<OutputVStream>> create_output_post_process_softmax(std::shared_ptr<OutputStreamBase> output_stream,
-        const NameToVStreamParamsMap &vstreams_params_map, const hailo_vstream_info_t &output_vstream_info,
-        const net_flow::PostProcessOpMetadataPtr &softmax_op_metadata);
-    static Expected<std::vector<OutputVStream>> create_output_post_process_iou(std::shared_ptr<OutputStreamBase> output_stream,
-        hailo_vstream_params_t vstream_params, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata);
-};
-
 } /* namespace hailort */
 
 #endif /* _HAILO_VSTREAM_INTERNAL_HPP_ */
diff --git a/hailort/libhailort/src/network_group/network_group.cpp b/hailort/libhailort/src/network_group/network_group.cpp
index dfc59160..569ead8d 100644
--- a/hailort/libhailort/src/network_group/network_group.cpp
+++ b/hailort/libhailort/src/network_group/network_group.cpp
@@ -17,14 +17,15 @@
 #include "common/os_utils.hpp"
 
 #include "network_group/network_group_internal.hpp"
-#include "hef/hef_internal.hpp"
 #include "eth/eth_stream.hpp"
 #include "vdma/vdma_stream.hpp"
 #include "mipi/mipi_stream.hpp"
 #include "device_common/control.hpp"
-#include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/pipeline/vstream_builder.hpp"
+#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp"
 #include "core_op/resource_manager/resource_manager.hpp"
-
+#include "utils/buffer_storage.hpp"
+#include "hef/hef_internal.hpp"
 
 namespace hailort
 {
@@ -37,7 +38,7 @@ class ActivatedNetworkGroupImpl : public ActivatedNetworkGroup {
         auto status = HAILO_UNINITIALIZED;
         std::unique_ptr<ActivatedNetworkGroup> ang = make_unique_nothrow<ActivatedNetworkGroupImpl>(cng, status);
         CHECK_NOT_NULL_AS_EXPECTED(ang, HAILO_OUT_OF_HOST_MEMORY);
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             LOGGER__ERROR("Network group activation failed because some of the low level streams are aborted. Make sure to run clear_abort before activating!");
             return make_unexpected(status);
         }
@@ -84,7 +85,7 @@ class ActivatedNetworkGroupImpl : public ActivatedNetworkGroup {
         m_cng(cng)
     {
         auto activate_status = m_cng.activate_impl();
-        if (HAILO_STREAM_ABORTED_BY_USER == activate_status) {
+        if (HAILO_STREAM_ABORT == activate_status) {
             LOGGER__INFO("Network group activation failed because it was aborted by user");
             status = activate_status;
             return;
@@ -159,18 +160,13 @@ Expected<std::unique_ptr<ActivatedNetworkGroup>> ConfiguredNetworkGroup::activat
     return activate(HailoRTDefaults::get_active_network_group_params());
 }
 
-hailo_status ConfiguredNetworkGroup::wait_for_callbacks_finish()
-{
-    return wait_for_callbacks_to_maintain_below_threshold(1);
-}
-
-hailo_status ConfiguredNetworkGroup::wait_for_callbacks_to_maintain_below_threshold(const size_t threshold)
+hailo_status ConfiguredNetworkGroup::wait_for_ongoing_callbacks_count_under(const size_t threshold)
 {
     std::unique_lock<std::mutex> lock(m_infer_requests_mutex);
     bool done = m_cv.wait_for(lock, DEFAULT_TRANSFER_TIMEOUT, [&, threshold](){
         return (m_ongoing_transfers.load() < threshold);
     });
-    CHECK(done, HAILO_TIMEOUT, "Got timeout in `wait_for_callbacks_to_maintain_below_threshold`");
+    CHECK(done, HAILO_TIMEOUT);
 
     return HAILO_SUCCESS;
 }
@@ -303,7 +299,7 @@ Expected<std::unique_ptr<LayerInfo>> ConfiguredNetworkGroupBase::get_layer_info(
     return res;
 }
 
-Expected<std::shared_ptr<net_flow::NmsOpMetadata>> ConfiguredNetworkGroupBase::get_nms_meta_data(const std::string &edge_name)
+Expected<net_flow::PostProcessOpMetadataPtr> ConfiguredNetworkGroupBase::get_op_meta_data(const std::string &edge_name)
 {
     auto expected_ops_metadata = get_ops_metadata();
     CHECK_EXPECTED(expected_ops_metadata);
@@ -319,9 +315,19 @@ Expected<std::shared_ptr<net_flow::NmsOpMetadata>> ConfiguredNetworkGroupBase::g
             return false;
         });
     CHECK_AS_EXPECTED(matching_metadata != ops_metadata.end(), HAILO_INVALID_ARGUMENT,
-        "There is no NMS post-process for '{}'", edge_name);
+        "There is no post-process metadata for '{}'", edge_name);
+    auto metadata = (*matching_metadata);
+    return metadata;
+}
+
+Expected<std::shared_ptr<net_flow::NmsOpMetadata>> ConfiguredNetworkGroupBase::get_nms_meta_data(const std::string &edge_name)
+{
+    auto matching_metadata = get_op_meta_data(edge_name);
+    CHECK_EXPECTED(matching_metadata);
+
     auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(*matching_metadata);
-    CHECK_NOT_NULL_AS_EXPECTED(nms_metadata, HAILO_INVALID_ARGUMENT);
+    CHECK((nms_metadata != nullptr), HAILO_INVALID_ARGUMENT,
+        "Failed to get nms metadata for `{}`. Op's metadata is not nms metadata", edge_name);
     return nms_metadata;
 }
 
@@ -349,6 +355,19 @@ hailo_status ConfiguredNetworkGroupBase::set_nms_max_bboxes_per_class(const std:
     return HAILO_SUCCESS;
 }
 
+hailo_status ConfiguredNetworkGroupBase::set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size)
+{
+    auto expected_op_metadata = get_op_meta_data(edge_name);
+    CHECK_EXPECTED_AS_STATUS(expected_op_metadata);
+
+    auto nms_metadata = std::dynamic_pointer_cast<net_flow::Yolov5SegOpMetadata>(expected_op_metadata.value());
+    CHECK((nms_metadata != nullptr), HAILO_INVALID_ARGUMENT,
+        "Failed to `set_nms_max_accumulated_mask_size` for `{}`. Op's metadata is not YOLOv5-Seg metadata", edge_name);
+
+    nms_metadata->yolov5seg_config().max_accumulated_mask_size = max_accumulated_mask_size;
+    return HAILO_SUCCESS;
+}
+
 ConfiguredNetworkGroupBase::ConfiguredNetworkGroupBase(
     const ConfigureNetworkParams &config_params, std::vector<std::shared_ptr<CoreOp>> &&core_ops,
     NetworkGroupMetadata &&metadata) :
@@ -512,7 +531,7 @@ hailo_status ConfiguredNetworkGroupBase::deactivate_impl()
 
 hailo_status ConfiguredNetworkGroupBase::shutdown()
 {
-    std::unique_lock<std::mutex> lock(m_shutdown_mutex);
+    std::unique_lock<std::mutex> lock(m_mutex);
     if (!m_is_shutdown) {
         m_is_shutdown = true;
         return get_core_op()->shutdown();
@@ -773,9 +792,6 @@ Expected<size_t> ConfiguredNetworkGroupBase::get_min_buffer_pool_size()
         }
     }
 
-    // TODO (HRT-11294): In some cases, buffer_pool_size is lower then batch_size. we should remove this line.
-    buffer_pool_size = std::max(buffer_pool_size, static_cast<uint32_t>(get_smallest_configured_batch_size(get_config_params())));
-
     return buffer_pool_size;
 }
 
@@ -783,31 +799,14 @@ hailo_status ConfiguredNetworkGroupBase::infer_async(const NamedBuffersCallbacks
     const std::function<void(hailo_status)> &infer_request_done_cb)
 {
     InferRequest infer_request{};
-    const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
     for (auto &named_buffer_callback : named_buffers_callbacks) {
         const auto &name = named_buffer_callback.first;
         const auto &buffer = named_buffer_callback.second.first;
         const auto &callback = named_buffer_callback.second.second;
-        TransferRequest trans_req{};
-        trans_req.callback = callback;
-        BufferPtr buffer_ptr = nullptr;
-        // TODO (HRT-12239): Avoid this section
-        if (reinterpret_cast<size_t>(buffer.data()) % dma_able_alignment == 0) {
-            auto hailo_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast<uint8_t*>(buffer.data()),
-                buffer.size());
-            CHECK_EXPECTED_AS_STATUS(hailo_buffer);
-            buffer_ptr = hailo_buffer.release();
-        } else {
-            auto hailo_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast<uint8_t*>(buffer.data()),
-                buffer.size());
-            CHECK_EXPECTED_AS_STATUS(hailo_buffer);
-            buffer_ptr = hailo_buffer.release();
-        }
-        trans_req.transfer_buffers.emplace_back(buffer_ptr);
-        infer_request.transfers.emplace(name, trans_req);
+        infer_request.transfers.emplace(name, TransferRequest{buffer, callback});
     }
     infer_request.callback = [this, infer_request_done_cb](hailo_status status){
-        if (status == HAILO_STREAM_ABORTED_BY_USER) {
+        if (status == HAILO_STREAM_ABORT) {
             LOGGER__INFO("Infer request was aborted by user");
         }
         else if (status != HAILO_SUCCESS) {
@@ -818,7 +817,8 @@ hailo_status ConfiguredNetworkGroupBase::infer_async(const NamedBuffersCallbacks
         decrease_ongoing_callbacks();
     };
 
-    increase_ongoing_callbacks();
+    increase_ongoing_callbacks(); // Increase before lunch, as the cb may be called before we got the chance to increase the counter
+    std::unique_lock<std::mutex> lock(m_mutex);
     auto status = get_core_op()->infer_async(std::move(infer_request));
     if (status != HAILO_SUCCESS) {
         // If we got error in `infer_async()`, then the callbacks will not be called.
diff --git a/hailort/libhailort/src/network_group/network_group_internal.hpp b/hailort/libhailort/src/network_group/network_group_internal.hpp
index 98b8f987..87b4a635 100644
--- a/hailort/libhailort/src/network_group/network_group_internal.hpp
+++ b/hailort/libhailort/src/network_group/network_group_internal.hpp
@@ -32,10 +32,11 @@
 
 #include "common/latency_meter.hpp"
 
-#include "hef/hef_internal.hpp"
 #include "core_op/active_core_op_holder.hpp"
 #include "core_op/core_op.hpp"
 
+#include "net_flow/ops_metadata/nms_op_metadata.hpp"
+
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
 #include "service/hailort_rpc_client.hpp"
 #include "rpc/rpc_definitions.hpp"
@@ -198,10 +199,12 @@ class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup
 
     virtual Expected<std::unique_ptr<LayerInfo>> get_layer_info(const std::string &stream_name) override;
     virtual Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata() override;
+    Expected<net_flow::PostProcessOpMetadataPtr> get_op_meta_data(const std::string &edge_name);
 
     virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) override;
     virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override;
     virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override;
+    virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) override;
 
     Expected<std::shared_ptr<net_flow::NmsOpMetadata>> get_nms_meta_data(const std::string &edge_name);
 private:
@@ -221,10 +224,10 @@ class ConfiguredNetworkGroupBase : public ConfiguredNetworkGroup
     bool m_is_shutdown = false;
     bool m_is_forked;
 
-    std::mutex m_shutdown_mutex;
+    std::mutex m_mutex;
 
     friend class VDeviceCoreOp;
-    friend class PipelineBuilder;
+    friend class AsyncPipelineBuilder;
 };
 
 // Move client ng to different header
@@ -322,12 +325,15 @@ class ConfiguredNetworkGroupClient : public ConfiguredNetworkGroup
         const std::function<void(hailo_status)> &infer_request_done_cb) override;
     hailo_status execute_callback(const ProtoCallbackIdentifier &cb_id);
 
+    void execute_callbacks_on_error(hailo_status error_status);
+
     virtual Expected<std::unique_ptr<LayerInfo>> get_layer_info(const std::string &stream_name) override;
     virtual Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> get_ops_metadata() override;
 
     virtual hailo_status set_nms_score_threshold(const std::string &edge_name, float32_t nms_score_threshold) override;
     virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override;
     virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override;
+    virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) override;
 
 private:
     ConfiguredNetworkGroupClient(NetworkGroupIdentifier &&identifier, const std::string &network_group_name);
diff --git a/hailort/libhailort/src/os/driver_scan.hpp b/hailort/libhailort/src/os/driver_scan.hpp
deleted file mode 100644
index c8620aa5..00000000
--- a/hailort/libhailort/src/os/driver_scan.hpp
+++ /dev/null
@@ -1,22 +0,0 @@
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file driver_scan.hpp
- * @brief Get list and parse pcie driver info
- **/
-
-#include "os/hailort_driver.hpp"
-
-namespace hailort
-{
-
-Expected<std::vector<std::string>> list_devices();
-#ifndef __QNX__
-Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_name);
-#else // __QNX__
-Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_name, uint32_t index);
-#endif // __QNX__
-
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/os/microsec_timer.hpp b/hailort/libhailort/src/os/microsec_timer.hpp
index 87f70bcd..7d4ce71d 100644
--- a/hailort/libhailort/src/os/microsec_timer.hpp
+++ b/hailort/libhailort/src/os/microsec_timer.hpp
@@ -32,6 +32,11 @@ class MicrosecTimer final
      * @note This function is guaranteed to sleep for at least the desired time, though it may sleep for more.
      */
     static void sleep(uint64_t microsecs);
+
+    static void sleep(std::chrono::microseconds microsecs)
+    {
+        sleep(microsecs.count());
+    }
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/os/posix/CMakeLists.txt b/hailort/libhailort/src/os/posix/CMakeLists.txt
index 2aa2e8ae..b8fd6eea 100644
--- a/hailort/libhailort/src/os/posix/CMakeLists.txt
+++ b/hailort/libhailort/src/os/posix/CMakeLists.txt
@@ -10,7 +10,6 @@ set(files
     ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/hailort_driver.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/os/posix/linux/CMakeLists.txt b/hailort/libhailort/src/os/posix/linux/CMakeLists.txt
index cffd810c..779505b3 100644
--- a/hailort/libhailort/src/os/posix/linux/CMakeLists.txt
+++ b/hailort/libhailort/src/os/posix/linux/CMakeLists.txt
@@ -1,8 +1,8 @@
 cmake_minimum_required(VERSION 3.0.0)
 
 set(files
-    ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/dma_buffer_utils.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp b/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp
new file mode 100644
index 00000000..0f9a7166
--- /dev/null
+++ b/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp
@@ -0,0 +1,79 @@
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file dma_buffer_utils.cpp
+ * @brief A module for managing DMA buffers on Linux
+ **/
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <linux/dma-buf.h>
+
+
+#include "hailo/hailort.h"
+#include "hailo/event.hpp"
+#include "common/utils.hpp"
+#include "utils/dma_buffer_utils.hpp"
+
+namespace hailort
+{
+
+Expected<MemoryView> DmaBufferUtils::mmap_dma_buffer_write(hailo_dma_buffer_t dma_buffer)
+{
+    void* dma_buf_ptr = mmap(NULL, dma_buffer.size, PROT_WRITE, MAP_SHARED, dma_buffer.fd, 0);
+    CHECK_AS_EXPECTED(MAP_FAILED != dma_buf_ptr, HAILO_INTERNAL_FAILURE, "Failed to run mmap on DMA buffer for writing");
+
+    struct dma_buf_sync sync = {
+        .flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE,
+    };
+    auto err = ioctl(dma_buffer.fd, DMA_BUF_IOCTL_SYNC, &sync);
+    CHECK_AS_EXPECTED(0 == err, HAILO_INTERNAL_FAILURE, "Failed to run DMA_BUF_IOCTL_SYNC ioctl, errno {}", err);
+
+    return MemoryView(dma_buf_ptr, dma_buffer.size);
+}
+
+hailo_status DmaBufferUtils::munmap_dma_buffer_write(hailo_dma_buffer_t dma_buffer, MemoryView dma_buffer_memview)
+{
+    struct dma_buf_sync sync = {
+        .flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE,
+    };
+
+    auto err = ioctl(dma_buffer.fd, DMA_BUF_IOCTL_SYNC, &sync);
+    CHECK(0 == err, HAILO_INTERNAL_FAILURE, "Failed to run DMA_BUF_IOCTL_SYNC ioctl, errno {}", err);
+
+    err = munmap(dma_buffer_memview.data(), dma_buffer.size);
+    CHECK(0 == err, HAILO_INTERNAL_FAILURE, "Failed to munmap dma buffer, errno {}", err);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<MemoryView> DmaBufferUtils::mmap_dma_buffer_read(hailo_dma_buffer_t dma_buffer)
+{
+    void* dma_buf_ptr = mmap(NULL, dma_buffer.size, PROT_READ, MAP_SHARED, dma_buffer.fd, 0);
+    CHECK_AS_EXPECTED(MAP_FAILED != dma_buf_ptr, HAILO_INTERNAL_FAILURE, "Failed to run mmap on DMA buffer for reading");
+
+    struct dma_buf_sync sync = {
+        .flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ,
+    };
+    auto err = ioctl(dma_buffer.fd, DMA_BUF_IOCTL_SYNC, &sync);
+    CHECK_AS_EXPECTED(0 == err, HAILO_INTERNAL_FAILURE, "Failed to run DMA_BUF_IOCTL_SYNC ioctl, errno {}", err);
+
+    return MemoryView(dma_buf_ptr, dma_buffer.size);
+}
+
+hailo_status DmaBufferUtils::munmap_dma_buffer_read(hailo_dma_buffer_t dma_buffer, MemoryView dma_buffer_memview)
+{
+    struct dma_buf_sync sync = {
+        .flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ,
+    };
+    auto err = ioctl(dma_buffer.fd, DMA_BUF_IOCTL_SYNC, &sync);
+    CHECK(0 == err, HAILO_INTERNAL_FAILURE, "Failed to run DMA_BUF_IOCTL_SYNC ioctl, errno {}", err);
+
+    err = munmap(dma_buffer_memview.data(), dma_buffer.size);
+    CHECK(0 == err, HAILO_INTERNAL_FAILURE, "Failed to unmap dma buffer, errno {}", err);
+
+    return HAILO_SUCCESS;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/os/posix/linux/event.cpp b/hailort/libhailort/src/os/posix/linux/event.cpp
index 3e1e9477..b12d0d32 100644
--- a/hailort/libhailort/src/os/posix/linux/event.cpp
+++ b/hailort/libhailort/src/os/posix/linux/event.cpp
@@ -180,14 +180,15 @@ Expected<Semaphore> Semaphore::create(uint32_t initial_count)
     return Semaphore(handle);
 }
 
-SemaphorePtr Semaphore::create_shared(uint32_t initial_count)
+Expected<SemaphorePtr> Semaphore::create_shared(uint32_t initial_count)
 {
     const auto handle = open_semaphore_handle(initial_count);
-    if (-1 == handle) {
-        return nullptr;
-    }
+    CHECK_AS_EXPECTED(-1 != handle, HAILO_EVENT_CREATE_FAIL);
 
-    return make_shared_nothrow<Semaphore>(handle);
+    auto res = make_shared_nothrow<Semaphore>(handle);
+    CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
+
+    return res;
 }
 
 hailo_status Semaphore::signal()
diff --git a/hailort/libhailort/src/os/posix/mmap_buffer.cpp b/hailort/libhailort/src/os/posix/mmap_buffer.cpp
index 09391182..6144f05e 100644
--- a/hailort/libhailort/src/os/posix/mmap_buffer.cpp
+++ b/hailort/libhailort/src/os/posix/mmap_buffer.cpp
@@ -8,7 +8,7 @@
  **/
 
 #include "os/mmap_buffer.hpp"
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 #include "hailo_ioctl_common.h"
 #include <sys/ioctl.h>
 
diff --git a/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt b/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt
index cffd810c..779505b3 100644
--- a/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt
+++ b/hailort/libhailort/src/os/posix/qnx/CMakeLists.txt
@@ -1,8 +1,8 @@
 cmake_minimum_required(VERSION 3.0.0)
 
 set(files
-    ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/dma_buffer_utils.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${files} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp b/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp
new file mode 100644
index 00000000..4937a7f4
--- /dev/null
+++ b/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp
@@ -0,0 +1,38 @@
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file dma_buffer_utils.cpp
+ * @brief A module for managing DMA buffers on QNX (not supported)
+ **/
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "utils/dma_buffer_utils.hpp"
+
+
+namespace hailort
+{
+
+Expected<MemoryView> DmaBufferUtils::mmap_dma_buffer_write(hailo_dma_buffer_t /*dma_buffer*/)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+hailo_status DmaBufferUtils::munmap_dma_buffer_write(hailo_dma_buffer_t /*dma_buffer*/, MemoryView /*dma_buffer_memview*/)
+{
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+Expected<MemoryView> DmaBufferUtils::mmap_dma_buffer_read(hailo_dma_buffer_t /*dma_buffer*/)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+hailo_status DmaBufferUtils::munmap_dma_buffer_read(hailo_dma_buffer_t /*dma_buffer*/, MemoryView /*dma_buffer_memview*/)
+{
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/os/posix/qnx/event.cpp b/hailort/libhailort/src/os/posix/qnx/event.cpp
index 312b6991..855cdf2e 100644
--- a/hailort/libhailort/src/os/posix/qnx/event.cpp
+++ b/hailort/libhailort/src/os/posix/qnx/event.cpp
@@ -120,14 +120,15 @@ Expected<Semaphore> Semaphore::create(uint32_t initial_count)
     return std::move(Semaphore(handle, initial_count));
 }
 
-SemaphorePtr Semaphore::create_shared(uint32_t initial_count)
+Expected<SemaphorePtr> Semaphore::create_shared(uint32_t initial_count)
 {
     const auto handle = open_semaphore_handle(initial_count);
-    if (INVALID_EVENT_HANDLE == handle) {
-        return nullptr;
-    }
+    CHECK(INVALID_EVENT_HANDLE != handle, HAILO_EVENT_CREATE_FAIL);
 
-    return make_shared_nothrow<Semaphore>(handle, initial_count);
+    auto res = make_shared_nothrow<Semaphore>(handle, initial_count);
+    CHECK_NOT_NULL(res, HAILO_OUT_OF_HOST_MEMORY);
+
+    return res;
 }
 
 hailo_status Semaphore::signal()
diff --git a/hailort/libhailort/src/os/windows/CMakeLists.txt b/hailort/libhailort/src/os/windows/CMakeLists.txt
index bba4bed1..c7ad1a20 100644
--- a/hailort/libhailort/src/os/windows/CMakeLists.txt
+++ b/hailort/libhailort/src/os/windows/CMakeLists.txt
@@ -4,9 +4,8 @@ set(files
     ${CMAKE_CURRENT_SOURCE_DIR}/microsec_timer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/file_descriptor.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/mmap_buffer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/hailort_driver.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/event.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/driver_scan.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/dma_buffer_utils.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/virtual_alloc_guard.cpp
 )
 
diff --git a/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp b/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp
new file mode 100644
index 00000000..b8404bd4
--- /dev/null
+++ b/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp
@@ -0,0 +1,36 @@
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file dma_buffer_utils.cpp
+ * @brief A module for managing DMA buffers on Windows (not supported)
+ **/
+
+#include "utils/dma_buffer_utils.hpp"
+
+
+namespace hailort
+{
+
+Expected<MemoryView> DmaBufferUtils::mmap_dma_buffer_write(hailo_dma_buffer_t /*dma_buffer*/)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+hailo_status DmaBufferUtils::munmap_dma_buffer_write(hailo_dma_buffer_t /*dma_buffer*/, MemoryView /*dma_buffer_memview*/)
+{
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+Expected<MemoryView> DmaBufferUtils::mmap_dma_buffer_read(hailo_dma_buffer_t /*dma_buffer*/)
+{
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+hailo_status DmaBufferUtils::munmap_dma_buffer_read(hailo_dma_buffer_t /*dma_buffer*/, MemoryView /*dma_buffer_memview*/)
+{
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/os/windows/event.cpp b/hailort/libhailort/src/os/windows/event.cpp
index 3512631d..9cf851f6 100644
--- a/hailort/libhailort/src/os/windows/event.cpp
+++ b/hailort/libhailort/src/os/windows/event.cpp
@@ -129,14 +129,15 @@ Expected<Semaphore> Semaphore::create(uint32_t initial_count)
     return std::move(Semaphore(handle));
 }
 
-SemaphorePtr Semaphore::create_shared(uint32_t initial_count)
+Expected<SemaphorePtr> Semaphore::create_shared(uint32_t initial_count)
 {
     const auto handle = open_semaphore_handle(initial_count);
-    if (nullptr == handle) {
-        return nullptr;
-    }
+    CHECK_AS_EXPECTED(nullptr != handle, HAILO_EVENT_CREATE_FAIL);
 
-    return make_shared_nothrow<Semaphore>(handle);
+    auto res = make_shared_nothrow<Semaphore>(handle);
+    CHECK_NOT_NULL_AS_EXPECTED(res, HAILO_OUT_OF_HOST_MEMORY);
+
+    return res;
 }
 
 hailo_status Semaphore::signal()
diff --git a/hailort/libhailort/src/os/windows/hailort_driver.cpp b/hailort/libhailort/src/os/windows/hailort_driver.cpp
deleted file mode 100644
index f614f17e..00000000
--- a/hailort/libhailort/src/os/windows/hailort_driver.cpp
+++ /dev/null
@@ -1,947 +0,0 @@
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file hailort_driver.cpp
- * @brief Low level interface to PCI driver
- **/
-
-#include "os/windows/osdep.hpp"
-#include "os/hailort_driver.hpp"
-#include "os/driver_scan.hpp"
-#include "common/logger_macros.hpp"
-#include "common/utils.hpp"
-#include "common/os/windows/string_conversion.hpp"
-#include "os/mmap_buffer.hpp"
-#include "../../../../drivers/win/include/Public.h"
-
-#pragma comment(lib, "cfgmgr32.lib")
-
-namespace hailort
-{
-
-static_assert(VDMA_CHANNELS_PER_ENGINE == MAX_VDMA_CHANNELS_PER_ENGINE, "Driver and libhailort parameters mismatch");
-static_assert(MAX_VDMA_ENGINES == MAX_VDMA_ENGINES_COUNT, "Driver and libhailort parameters mismatch");
-static_assert(MIN_D2H_CHANNEL_INDEX == VDMA_DEST_CHANNELS_START, "Driver and libhailort parameters mismatch");
-
-//TODO HRT-7309: merge with posix
-static hailo_dma_data_direction direction_to_dma_data_direction(HailoRTDriver::DmaDirection direction) {
-    switch (direction){
-    case HailoRTDriver::DmaDirection::H2D:
-        return HAILO_DMA_TO_DEVICE;
-    case HailoRTDriver::DmaDirection::D2H:
-        return HAILO_DMA_FROM_DEVICE;
-    case HailoRTDriver::DmaDirection::BOTH:
-        return HAILO_DMA_BIDIRECTIONAL;
-    }
-
-    assert(false);
-    // On release build Return value that will make ioctls to fail.
-    return HAILO_DMA_NONE;
-}
-
-static enum hailo_cpu_id translate_cpu_id(hailo_cpu_id_t cpu_id)
-{   
-    switch (cpu_id)
-    {
-    case HAILO_CPU_ID_0:
-        return HAILO_CPU_ID_CPU0;
-    case HAILO_CPU_ID_1:
-        return HAILO_CPU_ID_CPU1;
-    case HAILO_CPU_ID_MAX_ENUM:
-        // Add label for HAILO_CPU_ID_MAX_ENUM to cover all enum cases (avoid warnings). Continue to the assert.
-        break;
-    }
-
-    assert(false);
-    // On release build Return value that will make ioctls to fail.
-    return HAILO_CPU_ID_NONE;
-}
-
-static hailo_transfer_memory_type translate_memory_type(HailoRTDriver::MemoryType memory_type)
-{
-    using MemoryType = HailoRTDriver::MemoryType;
-    switch (memory_type) {
-    case MemoryType::DIRECT_MEMORY:
-        return HAILO_TRANSFER_DEVICE_DIRECT_MEMORY;
-    case MemoryType::VDMA0:
-        return HAILO_TRANSFER_MEMORY_VDMA0;
-    case MemoryType::VDMA1:
-        return HAILO_TRANSFER_MEMORY_VDMA1;
-    case MemoryType::VDMA2:
-        return HAILO_TRANSFER_MEMORY_VDMA2;
-    case MemoryType::PCIE_BAR0:
-        return HAILO_TRANSFER_MEMORY_PCIE_BAR0;
-    case MemoryType::PCIE_BAR2:
-        return HAILO_TRANSFER_MEMORY_PCIE_BAR2;
-    case MemoryType::PCIE_BAR4:
-        return HAILO_TRANSFER_MEMORY_PCIE_BAR4;
-    case MemoryType::DMA_ENGINE0:
-        return HAILO_TRANSFER_MEMORY_DMA_ENGINE0;
-    case MemoryType::DMA_ENGINE1:
-        return HAILO_TRANSFER_MEMORY_DMA_ENGINE1;
-    case MemoryType::DMA_ENGINE2:
-        return HAILO_TRANSFER_MEMORY_DMA_ENGINE2;
-    }
-
-    assert(false);
-    return HAILO_TRANSFER_MEMORY_MAX_ENUM;
-}
-
-class CWaitable
-{
-public:
-    ULONG Wait(ULONG millies = INFINITE)
-    {
-        return WaitForSingleObject(m_Handle, millies);
-    }
-    ~CWaitable()
-    {
-         if (m_Handle) {
-             CloseHandle(m_Handle);
-         }
-    }
-protected:
-    CWaitable(HANDLE h) : m_Handle(h) { }
-    HANDLE m_Handle;
-};
-
-class CMutex : public CWaitable
-{
-public:
-    CMutex() : CWaitable(CreateMutex(NULL, false, NULL)) { }
-    void Release()
-    {
-        ReleaseMutex(m_Handle);
-    }
-};
-
-class CEvent : public CWaitable
-{
-public:
-    CEvent(bool Manual) : CWaitable(CreateEvent(NULL, Manual, false, NULL)) { }
-};
-
-class COverlapped : public CEvent
-{
-public:
-    COverlapped() : CEvent(true)
-    {
-        RtlZeroMemory(&m_Overlapped, sizeof(m_Overlapped));
-        m_Overlapped.hEvent = m_Handle;
-    }
-    operator LPOVERLAPPED() { return &m_Overlapped; }
-protected:
-    OVERLAPPED m_Overlapped;
-};
-
-template <typename t>
-class CSync
-{
-public:
-    CSync(t& obj) : m_Obj(obj) { m_Obj.Wait(); }
-    ~CSync() { m_Obj.Release(); }
-private:
-    t& m_Obj;
-};
-using CMutexSync = CSync<CMutex>;
-
-class CDeviceFile
-{
-public:
-
-    CDeviceFile(const std::string& path)
-    {
-        Create(path.c_str(), true);
-    }
-    void Close()
-    {
-        CMutexSync sync(m_Mutex);
-        if (m_Handle) {
-            CloseHandle(m_Handle);
-            m_Handle = NULL;
-        }
-    }
-    ~CDeviceFile()
-    {
-        Unregister();
-        Close();
-    }
-    bool Present() const
-    {
-        return m_Handle;
-    }
-    HANDLE Detach() {
-        CMutexSync sync(m_Mutex);
-        HANDLE h = m_Handle;
-        m_Handle = NULL;
-        return h;
-    }
-protected:
-    bool Notify()
-    {
-        if (m_Handle) {
-            LOGGER__ERROR("Closing the file {}", m_InterfaceName);
-        }
-        Close();
-        return true;
-    }
-    void Create(LPCSTR Name, bool Writable)
-    {
-        ULONG access = GENERIC_READ, share = FILE_SHARE_READ;
-        if (Writable) {
-            access |= GENERIC_WRITE;
-        }
-        else {
-            share |= FILE_SHARE_WRITE;
-        }
-        m_Handle = CreateFileA(
-            Name,
-            access,
-            share,
-            NULL,
-            OPEN_EXISTING,
-            FILE_FLAG_OVERLAPPED,
-            NULL);
-        if (m_Handle == INVALID_HANDLE_VALUE) {
-            m_Handle = NULL;
-            LOGGER__ERROR("can't open '{}'", Name);
-            return;
-        }
-
-        if (!m_SetNotify) {
-            return;
-        }
-
-        CM_NOTIFY_FILTER filter;
-        filter.cbSize = sizeof(filter);
-        filter.Flags = 0;
-        filter.FilterType = CM_NOTIFY_FILTER_TYPE_DEVICEHANDLE;
-        filter.u.DeviceHandle.hTarget = m_Handle;
-        Unregister();
-        CM_Register_Notification(&filter, this, [](
-            _In_ HCMNOTIFICATION,
-            _In_opt_ PVOID             Context,
-            _In_ CM_NOTIFY_ACTION      Action,
-            _In_reads_bytes_(EventDataSize) PCM_NOTIFY_EVENT_DATA,
-            _In_ DWORD) -> DWORD
-            {
-                CDeviceFile* f = (CDeviceFile*)Context;
-                if (Action == CM_NOTIFY_ACTION_DEVICEQUERYREMOVE) {
-                    return f->Notify() ? ERROR_SUCCESS : ERROR_CANCELLED;
-                }
-                if (Action == CM_NOTIFY_ACTION_DEVICEREMOVECOMPLETE) {
-                    f->Notify();
-                }
-                return ERROR_SUCCESS;
-            },
-            &m_Notification);
-    }
-    void Unregister()
-    {
-        if (m_Notification) {
-            CM_Unregister_Notification(m_Notification);
-            m_Notification = NULL;
-        }
-    }
-private:
-    std::string m_InterfaceName;
-    HCMNOTIFICATION m_Notification = NULL;
-    CMutex m_Mutex;
-    bool m_SetNotify = false;
-    HANDLE m_Handle = NULL;
-};
-
-// TODO: HRT-7309 : implement hailo_ioctl for windows
-static int ioctl(HANDLE h, ULONG val, tCompatibleHailoIoctlData *ioctl_data)
-{
-    ioctl_data->Parameters.u.value = val;
-    ULONG returned;
-    COverlapped overlapped;
-    bool res = DeviceIoControl(h, HAILO_IOCTL_COMPATIBLE, ioctl_data, sizeof(*ioctl_data),
-                               ioctl_data, sizeof(*ioctl_data), &returned, overlapped);
-    if (!res) {
-        ULONG lastError = GetLastError();
-        if (lastError != ERROR_IO_PENDING) {
-            errno = (int)lastError;
-            return -1;
-        }
-        if (!GetOverlappedResult(h, overlapped, &returned, true)) {
-            errno = (int)GetLastError();
-            return -1;
-        }
-    }
-    return 0;
-}
-
-// TODO: validate wraparounds for buffer/mapping handles in the driver (HRT-9509)
-const uintptr_t HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE;
-const size_t HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE;
-const uint8_t HailoRTDriver::INVALID_VDMA_CHANNEL_INDEX = INVALID_VDMA_CHANNEL;
-
-static hailo_status validate_driver_version(const hailo_driver_info &driver_info)
-{
-    hailo_version_t library_version{};
-    auto status = hailo_get_library_version(&library_version);
-    CHECK_SUCCESS(status);
-    CHECK((driver_info.major_version == library_version.major) &&
-        (driver_info.minor_version == library_version.minor) &&
-        (driver_info.revision_version == library_version.revision), HAILO_INVALID_DRIVER_VERSION,
-        "Driver version ({}.{}.{}) is different from library version ({}.{}.{})",
-        driver_info.major_version, driver_info.minor_version, driver_info.revision_version,
-        library_version.major, library_version.minor, library_version.revision);
-    return HAILO_SUCCESS;
-}
-
-HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd, hailo_status &status) :
-    m_fd(std::move(fd)),
-    m_device_info(device_info),
-    m_allocate_driver_buffer(false)
-{
-    tCompatibleHailoIoctlData data = {};
-    hailo_driver_info& driver_info = data.Buffer.DriverInfo;
-    if (0 > ioctl(m_fd, HAILO_QUERY_DRIVER_INFO, &data)) {
-        LOGGER__ERROR("Failed to query driver info, errno {}", errno);
-        status = HAILO_DRIVER_FAIL;
-        return;
-    }
-    status = validate_driver_version(driver_info);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Driver version mismatch, status {}", status);
-        return;
-    }
-
-    hailo_device_properties& device_properties = data.Buffer.DeviceProperties;
-    if (0 > ioctl(m_fd, HAILO_QUERY_DEVICE_PROPERTIES, &data)) {
-        LOGGER__ERROR("Failed query pcie device properties, errno {}", errno);
-        status = HAILO_DRIVER_FAIL;
-        return;
-    }
-
-    m_desc_max_page_size = device_properties.desc_max_page_size;
-    m_dma_engines_count = device_properties.dma_engines_count;
-
-    switch (device_properties.dma_type) {
-    case HAILO_DMA_TYPE_PCIE:
-        m_dma_type = DmaType::PCIE;
-        break;
-    case HAILO_DMA_TYPE_DRAM:
-        m_dma_type = DmaType::DRAM;
-        break;
-    default:
-        LOGGER__ERROR("Invalid dma type returned from ioctl {}", device_properties.dma_type);
-        status = HAILO_DRIVER_FAIL;
-        return;
-    }
-
-    m_is_fw_loaded = device_properties.is_fw_loaded;
-    status = HAILO_SUCCESS;
-}
-
-Expected<std::vector<HailoRTDriver::DeviceInfo>> HailoRTDriver::scan_devices()
-{
-    auto device_names = list_devices();
-    CHECK_EXPECTED(device_names, "Failed listing pcie devices");
-
-    std::vector<HailoRTDriver::DeviceInfo> devices_info;
-    for (const auto &device_name : device_names.value()) {
-        auto device_info = query_device_info(device_name);
-        CHECK_EXPECTED(device_info, "failed parsing device info for {}", device_name);
-        devices_info.push_back(device_info.release());
-    }
-    return devices_info;
-}
-
-Expected<std::unique_ptr<HailoRTDriver>> HailoRTDriver::create(const DeviceInfo &device_info)
-{
-    CDeviceFile f(device_info.dev_path);
-    if (!f.Present()) {
-        LOGGER__ERROR("Failed to open board {}", device_info.dev_path);
-        return make_unexpected(HAILO_OPEN_FILE_FAILURE);
-    }
-    FileDescriptor fd(f.Detach());
-
-    hailo_status status = HAILO_UNINITIALIZED;
-    std::unique_ptr<HailoRTDriver> driver(new (std::nothrow) HailoRTDriver(device_info, std::move(fd), status));
-    CHECK_NOT_NULL_AS_EXPECTED(driver, HAILO_OUT_OF_HOST_MEMORY);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    return driver;
-}
-
-Expected<std::vector<uint8_t>> HailoRTDriver::read_notification()
-{
-    tCompatibleHailoIoctlData data;
-    hailo_d2h_notification& notification_buffer = data.Buffer.D2HNotification;
-
-    auto rc = ioctl(this->m_fd, HAILO_READ_NOTIFICATION, &data);
-    if (0 > rc) {
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    std::vector<uint8_t> notification(notification_buffer.buffer_len);
-    memcpy(notification.data(), notification_buffer.buffer, notification_buffer.buffer_len);
-    return notification;
-}
-
-hailo_status HailoRTDriver::disable_notifications()
-{
-    tCompatibleHailoIoctlData data = {};
-
-    int res = ioctl(m_fd, HAILO_DISABLE_NOTIFICATION, &data);
-    CHECK(0 <= res, HAILO_DRIVER_FAIL, "HAILO_DISABLE_NOTIFICATION failed with errno: {}", errno);
-
-    return HAILO_SUCCESS;
-}
-hailo_status HailoRTDriver::read_memory(MemoryType memory_type, uint64_t address, void *buf, size_t size)
-{
-    if (size == 0) {
-        LOGGER__ERROR("Invalid size to read");
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    if (buf == nullptr) {
-        LOGGER__ERROR("Read buffer pointer is NULL");
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    constexpr uint32_t CHUNK_SIZE = ARRAY_ENTRIES(hailo_memory_transfer_params::buffer);
-    uint32_t offset = 0;
-
-    while (offset < size) {
-        const uint32_t actual_size = std::min(CHUNK_SIZE, static_cast<uint32_t>(size) - offset);
-        auto status = read_memory_ioctl(memory_type, address + offset,
-            reinterpret_cast<uint8_t*>(buf) + offset, actual_size);
-        CHECK_SUCCESS(status);
-        offset += actual_size;
-    }
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::write_memory(MemoryType memory_type, uint64_t address, const void *buf, size_t size)
-{
-    if (size == 0) {
-        LOGGER__ERROR("Invalid size to read");
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    if (buf == nullptr) {
-        LOGGER__ERROR("Read buffer pointer is NULL");
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    constexpr uint32_t CHUNK_SIZE = ARRAY_ENTRIES(hailo_memory_transfer_params::buffer);
-    uint32_t offset = 0;
-
-    while (offset < size) {
-        const uint32_t actual_size = std::min(CHUNK_SIZE, static_cast<uint32_t>(size) - offset);
-        auto status = write_memory_ioctl(memory_type, address + offset,
-            reinterpret_cast<const uint8_t*>(buf) + offset, actual_size);
-        CHECK_SUCCESS(status);
-        offset += actual_size;
-    }
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size)
-{
-    if (size == 0) {
-        LOGGER__ERROR("Invalid size to read");
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    if (buf == nullptr) {
-        LOGGER__ERROR("Read buffer pointer is NULL");
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    if (m_dma_type == DmaType::PCIE) {
-        CHECK(address < std::numeric_limits<uint32_t>::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address);
-    }
-
-    tCompatibleHailoIoctlData data = {};
-    hailo_memory_transfer_params& transfer = data.Buffer.MemoryTransfer;
-    transfer.transfer_direction = TRANSFER_READ;
-    transfer.memory_type = translate_memory_type(memory_type);
-    transfer.address = address;
-    transfer.count = size;
-    memset(transfer.buffer, 0, sizeof(transfer.buffer));
-
-    CHECK(size <= sizeof(transfer.buffer), HAILO_INVALID_ARGUMENT,
-        "Invalid size to read, size given {} is larger than max size {}", size, sizeof(transfer.buffer));
-
-    if (0 > ioctl(m_fd, HAILO_MEMORY_TRANSFER, &data)) {
-        LOGGER__ERROR("HailoRTDriver::read_memory failed with errno:{}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    memcpy(buf, transfer.buffer, transfer.count);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size)
-{
-    if (size == 0) {
-        LOGGER__ERROR("Invalid size to write");
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    if (buf == nullptr) {
-        LOGGER__ERROR("Write buffer pointer is NULL");
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    if (m_dma_type == DmaType::PCIE) {
-        CHECK(address < std::numeric_limits<uint32_t>::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address);
-    }
-
-    tCompatibleHailoIoctlData data = {};
-    hailo_memory_transfer_params& transfer = data.Buffer.MemoryTransfer;
-    transfer.transfer_direction = TRANSFER_WRITE;
-    transfer.memory_type = translate_memory_type(memory_type);
-    transfer.address = address;
-    transfer.count = size;
-    memset(transfer.buffer, 0, sizeof(transfer.buffer));
-
-    CHECK(size <= sizeof(transfer.buffer), HAILO_INVALID_ARGUMENT,
-        "Invalid size to write, size given {} is larger than max size {}", size, sizeof(transfer.buffer));
-
-    memcpy(transfer.buffer, buf, transfer.count);
-
-    if (0 > ioctl(this->m_fd, HAILO_MEMORY_TRANSFER, &data)) {
-        LOGGER__ERROR("HailoRTDriver::write_memory failed with errno: {}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-Expected<uint32_t> HailoRTDriver::read_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction,
-    size_t offset, size_t reg_size)
-{
-    CHECK_AS_EXPECTED(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id);
-    CHECK_AS_EXPECTED(data_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Invalid direction given");
-
-    tCompatibleHailoIoctlData data = {};
-    auto& params = data.Buffer.ChannelRegisterRead;
-    params.engine_index = channel_id.engine_index;
-    params.channel_index = channel_id.channel_index;
-    params.direction = direction_to_dma_data_direction(data_direction);
-    params.offset = offset;
-    params.reg_size = reg_size;
-    params.data = 0;
-
-    if (0 > ioctl(this->m_fd, HAILO_VDMA_CHANNEL_READ_REGISTER, &data)) {
-        LOGGER__ERROR("HailoRTDriver::read_vdma_channel_register failed with errno: {}", errno);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    return std::move(params.data);
-}
-
-hailo_status HailoRTDriver::write_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction,
-    size_t offset, size_t reg_size, uint32_t value)
-{
-    CHECK(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id);
-    CHECK(data_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Invalid direction given");
-
-    tCompatibleHailoIoctlData data = {};
-    auto& params = data.Buffer.ChannelRegisterWrite;
-    params.engine_index = channel_id.engine_index;
-    params.channel_index = channel_id.channel_index;
-    params.direction = direction_to_dma_data_direction(data_direction);
-    params.offset = offset;
-    params.reg_size = reg_size;
-    params.data = value;
-
-    if (0 > ioctl(this->m_fd, HAILO_VDMA_CHANNEL_WRITE_REGISTER, &data)) {
-        LOGGER__ERROR("HailoRTDriver::write_vdma_channel_register failed with errno: {}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction,
-    size_t offset, size_t count)
-{
-    tCompatibleHailoIoctlData data = {};
-    hailo_vdma_buffer_sync_params& sync_info = data.Buffer.VdmaBufferSync;
-    sync_info.handle = handle;
-    sync_info.sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE;
-    sync_info.offset = offset;
-    sync_info.count = count;
-    if (0 > ioctl(this->m_fd, HAILO_VDMA_BUFFER_SYNC, &data)) {
-        LOGGER__ERROR("HAILO_VDMA_BUFFER_SYNC failed with errno: {}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-    return HAILO_SUCCESS;
-}
-
-
-hailo_status HailoRTDriver::vdma_interrupts_enable(const ChannelsBitmap &channels_bitmap, bool enable_timestamps_measure)
-{
-    CHECK(is_valid_channels_bitmap(channels_bitmap), HAILO_INVALID_ARGUMENT, "Invalid channel bitmap given");
-    tCompatibleHailoIoctlData data = {};
-    hailo_vdma_interrupts_enable_params& params = data.Buffer.VdmaInterruptsEnable;
-    std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine);
-    params.enable_timestamps_measure = enable_timestamps_measure;
-
-    CHECK(ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_ENABLE, &data) >= 0, HAILO_DRIVER_FAIL,
-        "Failed to enable vdma interrupts with errno:{}", errno);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::vdma_interrupts_disable(const ChannelsBitmap &channels_bitmap)
-{
-    CHECK(is_valid_channels_bitmap(channels_bitmap), HAILO_INVALID_ARGUMENT, "Invalid channel bitmap given");
-    tCompatibleHailoIoctlData data = {};
-    hailo_vdma_interrupts_disable_params& params = data.Buffer.VdmaInterruptsDisable;
-    std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine);
-
-
-    if (0 > ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_DISABLE, &data)) {
-        LOGGER__ERROR("Failed to disable vdma interrupts with errno: {}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-// TODO: HRT-7309 - unite with posix
-static Expected<ChannelInterruptTimestampList> create_interrupt_timestamp_list(
-    hailo_vdma_interrupts_read_timestamp_params &inter_data)
-{
-    CHECK_AS_EXPECTED(inter_data.timestamps_count <= MAX_IRQ_TIMESTAMPS_SIZE, HAILO_DRIVER_FAIL,
-        "Invalid channel interrupts timestamps count returned {}", inter_data.timestamps_count);
-    ChannelInterruptTimestampList timestamp_list{};
-
-    timestamp_list.count = inter_data.timestamps_count;
-    for (size_t i = 0; i < timestamp_list.count; i++) {
-        timestamp_list.timestamp_list[i].timestamp = std::chrono::nanoseconds(inter_data.timestamps[i].timestamp_ns);
-        timestamp_list.timestamp_list[i].desc_num_processed = inter_data.timestamps[i].desc_num_processed;
-    }
-    return timestamp_list;
-}
-
-static Expected<IrqData> to_irq_data(const hailo_vdma_interrupts_wait_params& params,
-    uint8_t engines_count)
-{
-    static_assert(ARRAY_ENTRIES(IrqData::channels_irq_data) == ARRAY_ENTRIES(params.irq_data), "Mismatch irq data size");
-    CHECK_AS_EXPECTED(params.channels_count <= ARRAY_ENTRIES(params.irq_data), HAILO_DRIVER_FAIL,
-        "Invalid channels count returned from vdma_interrupts_wait");
-
-    IrqData irq{};
-    irq.channels_count = params.channels_count;
-    for (uint8_t i = 0; i < params.channels_count; i++) {
-        const auto engine_index = params.irq_data[i].engine_index;
-        const auto channel_index = params.irq_data[i].channel_index;
-        CHECK_AS_EXPECTED(engine_index < engines_count, HAILO_DRIVER_FAIL,
-            "Invalid engine index {} returned from vdma_interrupts_wait, max {}", engine_index, engines_count);
-        CHECK_AS_EXPECTED(channel_index < MAX_VDMA_CHANNELS_PER_ENGINE, HAILO_DRIVER_FAIL,
-            "Invalid channel_index index {} returned from vdma_interrupts_wait", channel_index);
-
-        irq.channels_irq_data[i].channel_id.engine_index = engine_index;
-        irq.channels_irq_data[i].channel_id.channel_index = channel_index;
-        irq.channels_irq_data[i].is_active = params.irq_data[i].is_active;
-        irq.channels_irq_data[i].desc_num_processed = params.irq_data[i].host_num_processed;
-        irq.channels_irq_data[i].host_error = params.irq_data[i].host_error;
-        irq.channels_irq_data[i].device_error = params.irq_data[i].device_error;
-    }
-    return irq;
-}
-
-Expected<IrqData> HailoRTDriver::vdma_interrupts_wait(const ChannelsBitmap &channels_bitmap)
-{
-    CHECK_AS_EXPECTED(is_valid_channels_bitmap(channels_bitmap), HAILO_INVALID_ARGUMENT, "Invalid channel bitmap given");
-    tCompatibleHailoIoctlData data = {};
-    hailo_vdma_interrupts_wait_params& params = data.Buffer.VdmaInterruptsWait;
-    std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine);
-
-    if (0 > ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_WAIT, &data)) {
-        LOGGER__ERROR("Failed to wait interrupts for channels bitmap with errno: {}", errno);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    return to_irq_data(params, static_cast<uint8_t>(m_dma_engines_count));
-}
-
-Expected<ChannelInterruptTimestampList> HailoRTDriver::vdma_interrupts_read_timestamps(vdma::ChannelId channel_id)
-{
-    tCompatibleHailoIoctlData data = {};
-    hailo_vdma_interrupts_read_timestamp_params &params = data.Buffer.VdmaInterruptsReadTimestamps;
-    params.engine_index = channel_id.engine_index;
-    params.channel_index = channel_id.channel_index;
-
-    if (0 > ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS, &data)) {
-        LOGGER__ERROR("Failed to read channel interrupts timestamps errno: {}", errno);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    return create_interrupt_timestamp_list(params);
-}
-
-hailo_status HailoRTDriver::fw_control(const void *request, size_t request_len, const uint8_t request_md5[PCIE_EXPECTED_MD5_LENGTH],
-    void *response, size_t *response_len, uint8_t response_md5[PCIE_EXPECTED_MD5_LENGTH],
-    std::chrono::milliseconds timeout, hailo_cpu_id_t cpu_id)
-{
-    CHECK_ARG_NOT_NULL(request);
-    CHECK_ARG_NOT_NULL(response);
-    CHECK_ARG_NOT_NULL(response_len);
-    CHECK(timeout.count() >= 0, HAILO_INVALID_ARGUMENT);
-
-    tCompatibleHailoIoctlData data = {};
-    hailo_fw_control& command = data.Buffer.FirmwareControl;
-    static_assert(PCIE_EXPECTED_MD5_LENGTH == sizeof(command.expected_md5), "mismatch md5 size");
-    memcpy(&command.expected_md5, request_md5, sizeof(command.expected_md5));
-    command.buffer_len = static_cast<uint32_t>(request_len);
-    CHECK(request_len <= sizeof(command.buffer), HAILO_INVALID_ARGUMENT,
-        "FW control request len can't be larger than {} (size given {})", sizeof(command.buffer), request_len);
-    memcpy(&command.buffer, request, request_len);
-    command.timeout_ms = static_cast<uint32_t>(timeout.count());
-    command.cpu_id = translate_cpu_id(cpu_id);
-
-    if (0 > ioctl(this->m_fd, HAILO_FW_CONTROL, &data)) {
-        LOGGER__ERROR("HAILO_FW_CONTROL failed with errno: {}", errno);
-        return HAILO_FW_CONTROL_FAILURE;
-    }
-
-    if (*response_len < command.buffer_len) {
-        LOGGER__ERROR("FW control response len needs to be at least {} (size given {})", command.buffer_len, *response_len);
-        *response_len = command.buffer_len;
-        return HAILO_INSUFFICIENT_BUFFER;
-    }
-    memcpy(response, command.buffer, command.buffer_len);
-    *response_len = command.buffer_len;
-    memcpy(response_md5, command.expected_md5, PCIE_EXPECTED_MD5_LENGTH);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status read_log(uint8_t *buffer, size_t buffer_size, size_t *read_bytes, hailo_cpu_id_t cpu_id)
-{
-    (void)buffer;
-    (void)buffer_size;
-    (void)read_bytes;
-    (void)cpu_id;
-    return HAILO_PCIE_NOT_SUPPORTED_ON_PLATFORM;
-}
-
-Expected<size_t> HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size, DmaDirection data_direction,
-    const vdma_mapped_buffer_driver_identifier &driver_buff_handle)
-{
-    tCompatibleHailoIoctlData data = {};
-    hailo_vdma_buffer_map_params& map_user_buffer_info = data.Buffer.VdmaBufferMap;
-    map_user_buffer_info.user_address = user_address;
-    map_user_buffer_info.size = required_size;
-    map_user_buffer_info.data_direction = direction_to_dma_data_direction(data_direction);
-    map_user_buffer_info.allocated_buffer_handle = driver_buff_handle;
-    map_user_buffer_info.mapped_handle = 0;
-
-    if (0 > ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &data)) {
-        LOGGER__ERROR("Failed to map user buffer with errno: {}", errno);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    return std::move(map_user_buffer_info.mapped_handle);
-}
-
-hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle)
-{
-    tCompatibleHailoIoctlData data = {};
-    hailo_vdma_buffer_unmap_params& unmap_user_buffer_info = data.Buffer.VdmaBufferUnmap;
-    unmap_user_buffer_info.mapped_handle = handle;
-    if (0 > ioctl(this->m_fd, HAILO_VDMA_BUFFER_UNMAP, &data)) {
-        LOGGER__ERROR("Failed to unmap user buffer with errno: {}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-Expected<DescriptorsListInfo> HailoRTDriver::descriptors_list_create(size_t desc_count, bool is_circular)
-{
-    auto handle_to_dma_address_pair = descriptors_list_create_ioctl(desc_count, is_circular);
-    CHECK_EXPECTED(handle_to_dma_address_pair);
-
-    const auto desc_handle = handle_to_dma_address_pair->first;
-    const auto dma_address = handle_to_dma_address_pair->second;
-
-    auto user_address = descriptors_list_create_mmap(desc_handle, desc_count);
-    if (!user_address) {
-        auto status = descriptors_list_release_ioctl(desc_handle);
-        if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Failed releasing descriptors list, status {}", status);
-            // continue
-        }
-        return make_unexpected(user_address.status());
-    }
-
-    return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()};
-}
-
-hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info)
-{
-    hailo_status status = HAILO_SUCCESS;
-
-    auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count);
-    if (HAILO_SUCCESS != unmap_status) {
-        LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status);
-        status = unmap_status;
-        // continue
-    }
-
-    auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle);
-    if (HAILO_SUCCESS != release_status) {
-        LOGGER__ERROR("Descriptors list release status failed with {}", release_status);
-        status = release_status;
-        // continue
-    }
-
-    return status;
-}
-
-Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, bool is_circular)
-{
-    tCompatibleHailoIoctlData data = {};
-    hailo_desc_list_create_params& create_desc_info = data.Buffer.DescListCreate;
-    create_desc_info.desc_count = desc_count;
-    create_desc_info.is_circular = is_circular;
-
-    if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_CREATE, &data)) {
-        LOGGER__ERROR("Failed to create descriptors list with errno: {}", errno);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    return std::move(std::make_pair(create_desc_info.desc_handle, create_desc_info.dma_address));
-}
-
-hailo_status HailoRTDriver::descriptors_list_release_ioctl(uintptr_t desc_handle)
-{
-    tCompatibleHailoIoctlData data = {};
-    uintptr_t& release_desc_info = data.Buffer.DescListReleaseParam;
-    release_desc_info = desc_handle;
-    if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_RELEASE, &data)) {
-        LOGGER__ERROR("Failed to release descriptors list with errno: {}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-Expected<void *> HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count)
-{
-    tCompatibleHailoIoctlData data = {};
-    data.Buffer.DescListMmap.desc_handle = desc_handle;
-    data.Buffer.DescListMmap.size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR;
-    if (0 > ioctl(m_fd, HAILO_NON_LINUX_DESC_LIST_MMAP, &data)) {
-        LOGGER__ERROR("Failed to map physical memory with errno: {}", errno);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    void *user_address = data.Buffer.DescListMmap.user_address;
-    return user_address;
-}
-
-hailo_status HailoRTDriver::descriptors_list_create_munmap(void *, size_t )
-{
-    // On windows, the unmap is done on the release ioctl
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle,
-    uint16_t desc_page_size,  uint8_t channel_index, uint32_t starting_desc)
-{
-    tCompatibleHailoIoctlData data = {};
-    hailo_desc_list_bind_vdma_buffer_params& config_info = data.Buffer.DescListBind;
-    config_info.buffer_handle = buffer_handle;
-    config_info.desc_handle = desc_handle;
-    config_info.desc_page_size = desc_page_size;
-    config_info.channel_index = channel_index;
-    config_info.starting_desc = starting_desc;
-
-    if (0 > ioctl(this->m_fd, HAILO_DESC_LIST_BIND_VDMA_BUFFER, &data)) {
-        LOGGER__ERROR("Failed to bind vdma buffer to descriptors list with errno: {}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::read_log(uint8_t *buffer, size_t buffer_size, size_t *read_bytes, hailo_cpu_id_t cpu_id)
-{
-    tCompatibleHailoIoctlData data = {};
-    hailo_read_log_params& params = data.Buffer.ReadLog;
-    params.buffer_size = __min(buffer_size, sizeof(params.buffer));
-    params.cpu_id = translate_cpu_id(cpu_id);
-
-    CHECK_ARG_NOT_NULL(buffer);
-    CHECK_ARG_NOT_NULL(read_bytes);
-
-    if (0 > ioctl(this->m_fd, HAILO_READ_LOG, &data)) {
-      LOGGER__ERROR("Failed to read log with errno:{}", errno);
-      return HAILO_DRIVER_FAIL;
-    }
-
-    CHECK(params.read_bytes <= sizeof(params.buffer), HAILO_DRIVER_FAIL,
-        "Amount of bytes read from log {} is bigger than size of buffer {}",
-        params.read_bytes, sizeof(params.buffer));
-
-    memcpy(buffer, params.buffer, params.read_bytes);
-    *read_bytes = params.read_bytes;
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::reset_nn_core()
-{
-    LOGGER__ERROR("Reset nn core is not supported over the windows driver");
-    return HAILO_NOT_IMPLEMENTED;
-}
-
-Expected<uintptr_t> HailoRTDriver::vdma_low_memory_buffer_alloc(size_t size) {
-    (void) size;
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
-
-
-hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t buffer_handle) {
-    (void) buffer_handle;
-    return HAILO_INVALID_OPERATION;
-}
-
-Expected<ContinousBufferInfo> HailoRTDriver::vdma_continuous_buffer_alloc(size_t size)
-{
-    (void) size;
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
-
-hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info)
-{
-    (void) buffer_info;
-    return HAILO_INVALID_OPERATION;
-}
-
-hailo_status HailoRTDriver::mark_as_used()
-{
-    tCompatibleHailoIoctlData data = {};
-    if (0 > ioctl(this->m_fd, HAILO_MARK_AS_IN_USE, &data)) {
-        LOGGER__ERROR("Failed to mark device as in use with errno: {}", errno);
-        return HAILO_DRIVER_FAIL;
-    }
-    if (data.Buffer.MarkAsInUse.in_use) {
-        return HAILO_DEVICE_IN_USE;
-    }
-    return HAILO_SUCCESS;
-}
-
-// TODO: HRT-7309 merge with posix
-bool HailoRTDriver::is_valid_channel_id(const vdma::ChannelId &channel_id)
-{
-    return (channel_id.engine_index < m_dma_engines_count) && (channel_id.channel_index < MAX_VDMA_CHANNELS_PER_ENGINE);
-}
-
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/service/hailort_rpc_client.cpp b/hailort/libhailort/src/service/hailort_rpc_client.cpp
index 91db64d4..85acf5fa 100644
--- a/hailort/libhailort/src/service/hailort_rpc_client.cpp
+++ b/hailort/libhailort/src/service/hailort_rpc_client.cpp
@@ -11,14 +11,14 @@
 
 #include "hef/hef_internal.hpp"
 #include "hailort_rpc_client.hpp"
-#include "net_flow/ops/yolov8_post_process.hpp"
-#include "net_flow/ops/yolox_post_process.hpp"
-#include "net_flow/ops/ssd_post_process.hpp"
-#include "net_flow/ops/softmax_post_process.hpp"
-#include "net_flow/ops/argmax_post_process.hpp"
-#include "net_flow/ops/nms_post_process.hpp"
-#include "net_flow/ops/yolov5_op_metadata.hpp"
-#include "net_flow/ops/yolov5_seg_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov8_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolox_op_metadata.hpp"
+#include "net_flow/ops_metadata/ssd_op_metadata.hpp"
+#include "net_flow/ops_metadata/softmax_op_metadata.hpp"
+#include "net_flow/ops_metadata/argmax_op_metadata.hpp"
+#include "net_flow/ops_metadata/nms_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov5_op_metadata.hpp"
+#include "net_flow/ops_metadata/yolov5_seg_op_metadata.hpp"
 
 #include <grpcpp/health_check_service_interface.h>
 
@@ -82,7 +82,12 @@ hailo_status HailoRtRpcClient::VDevice_release(const VDeviceIdentifier &identifi
     request.set_pid(pid);
 
     Release_Reply reply;
-    ClientContextWithTimeout context;
+    // Note: In multiple devices app and multiple networks, there are many mapped buffers for each device.
+    // Theerefore, the release of the devices might take a longer time to finished un-mapping all the buffers,
+    // so we increase the timeout for the VDevice_release context.
+    // TODO: HRT-13274
+    const std::chrono::minutes release_timeout(2);
+    ClientContextWithTimeout context(release_timeout);
     grpc::Status status = m_stub->VDevice_release(&context, request, &reply);
     CHECK_GRPC_STATUS(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
@@ -803,7 +808,7 @@ Expected<hailort::net_flow::YoloV5SegPostProcessConfig> create_yolov5seg_post_pr
 {
     auto yolov5seg_config_proto = op_metadata_proto.yolov5seg_config();
     hailort::net_flow::YoloV5SegPostProcessConfig yolov5seg_post_process_config = {yolov5seg_config_proto.mask_threshold(),
-                                                                                    yolov5seg_config_proto.layer_name()};
+        yolov5seg_config_proto.max_accumulated_mask_size(), yolov5seg_config_proto.layer_name()};
     return yolov5seg_post_process_config;
 }
 
@@ -1051,7 +1056,7 @@ hailo_vstream_info_t deserialize_vstream_info(const ProtoVStreamInfo &info_proto
         hailo_nms_shape_t nms_shape = {
             info_proto.nms_shape().number_of_classes(),
             info_proto.nms_shape().max_bbox_per_class(),
-            info_proto.nms_shape().max_mask_size()
+            info_proto.nms_shape().max_accumulated_mask_size()
         };
         info.nms_shape = nms_shape;
     } else {
@@ -1395,6 +1400,23 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_max_bboxes_per_cla
     return static_cast<hailo_status>(reply.status());
 }
 
+hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(const NetworkGroupIdentifier &identifier,
+    const std::string &edge_name, uint32_t max_accumulated_mask_size)
+{
+    ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request request;
+    auto proto_identifier = request.mutable_identifier();
+    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
+    request.set_edge_name(edge_name);
+    request.set_max_accumulated_mask_size(max_accumulated_mask_size);
+
+    ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply reply;
+    ClientContextWithTimeout context;
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(&context, request, &reply);
+    CHECK_GRPC_STATUS(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    return static_cast<hailo_status>(reply.status());
+}
+
 Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_stream_names_from_vstream_name(const NetworkGroupIdentifier &identifier,
     const std::string &vstream_name)
 {
@@ -1454,7 +1476,6 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_infer_async(const NetworkG
         } else {
             proto_transfer_request.set_direction(HAILO_D2H_STREAM);
         }
-        proto_transfer_request.set_size(static_cast<uint32_t>(std::get<2>(idx_named_buffer).size()));
         proto_transfer_buffers->Add(std::move(proto_transfer_request));
     }
     request.set_infer_request_done_cb_idx(infer_request_done_cb);
@@ -1462,7 +1483,7 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_infer_async(const NetworkG
     ClientContextWithTimeout context;
     grpc::Status status = m_stub->ConfiguredNetworkGroup_infer_async(&context, request, &reply);
     assert(reply.status() < HAILO_STATUS_COUNT);
-    if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) {
+    if (reply.status() == HAILO_STREAM_ABORT) {
         return static_cast<hailo_status>(reply.status());
     }
     CHECK_GRPC_STATUS(status);
@@ -1488,6 +1509,8 @@ Expected<bool> HailoRtRpcClient::InputVStream_is_multi_planar(const VStreamIdent
 
 hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &identifier, const hailo_pix_buffer_t &buffer)
 {
+    CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == buffer.memory_type, HAILO_NOT_SUPPORTED, "Memory type of pix buffer must be of type USERPTR!");
+
     InputVStream_write_pix_Request request;
     auto proto_identifier = request.mutable_identifier();
     VStream_convert_identifier_to_proto(identifier, proto_identifier);
@@ -1502,7 +1525,7 @@ hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &ident
     grpc::Status status = m_stub->InputVStream_write_pix(&context, request, &reply);
     CHECK_GRPC_STATUS(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
-    if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) {
+    if (reply.status() == HAILO_STREAM_ABORT) {
         return static_cast<hailo_status>(reply.status());
     }
     CHECK_SUCCESS(static_cast<hailo_status>(reply.status()));
@@ -1521,7 +1544,7 @@ hailo_status HailoRtRpcClient::InputVStream_write(const VStreamIdentifier &ident
     grpc::Status status = m_stub->InputVStream_write(&context, request, &reply);
     CHECK_GRPC_STATUS(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
-    if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) {
+    if (reply.status() == HAILO_STREAM_ABORT) {
         return static_cast<hailo_status>(reply.status());
     }
     CHECK_SUCCESS(static_cast<hailo_status>(reply.status()));
@@ -1540,7 +1563,7 @@ hailo_status HailoRtRpcClient::OutputVStream_read(const VStreamIdentifier &ident
     grpc::Status status = m_stub->OutputVStream_read(&context, request, &reply);
     CHECK_GRPC_STATUS(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
-    if (reply.status() == HAILO_STREAM_ABORTED_BY_USER) {
+    if (reply.status() == HAILO_STREAM_ABORT) {
         return static_cast<hailo_status>(reply.status());
     }
     CHECK_SUCCESS(static_cast<hailo_status>(reply.status()));
@@ -1921,6 +1944,21 @@ hailo_status HailoRtRpcClient::OutputVStream_set_nms_max_proposals_per_class(con
     return static_cast<hailo_status>(reply.status());
 }
 
+hailo_status HailoRtRpcClient::OutputVStream_set_nms_max_accumulated_mask_size(const VStreamIdentifier &identifier, uint32_t max_accumulated_mask_size)
+{
+    VStream_set_nms_max_accumulated_mask_size_Request request;
+    auto proto_identifier = request.mutable_identifier();
+    VStream_convert_identifier_to_proto(identifier, proto_identifier);
+    request.set_max_accumulated_mask_size(max_accumulated_mask_size);
+
+    ClientContextWithTimeout context;
+    VStream_set_nms_max_accumulated_mask_size_Reply reply;
+    grpc::Status status = m_stub->OutputVStream_set_nms_max_accumulated_mask_size(&context, request, &reply);
+    CHECK_GRPC_STATUS(status);
+    assert(reply.status() < HAILO_STATUS_COUNT);
+    return static_cast<hailo_status>(reply.status());
+}
+
 void HailoRtRpcClient::VDevice_convert_identifier_to_proto(const VDeviceIdentifier &identifier, ProtoVDeviceIdentifier *proto_identifier)
 {
     proto_identifier->set_vdevice_handle(identifier.m_vdevice_handle);
diff --git a/hailort/libhailort/src/service/hailort_rpc_client.hpp b/hailort/libhailort/src/service/hailort_rpc_client.hpp
index 7be961d5..2f83319d 100644
--- a/hailort/libhailort/src/service/hailort_rpc_client.hpp
+++ b/hailort/libhailort/src/service/hailort_rpc_client.hpp
@@ -40,9 +40,9 @@ using callback_idx_t = uint32_t;
 
 class ClientContextWithTimeout : public grpc::ClientContext {
 public:
-    ClientContextWithTimeout()
+    ClientContextWithTimeout(const std::chrono::milliseconds context_timeout = CONTEXT_TIMEOUT)
     {
-        set_deadline(std::chrono::system_clock::now() + CONTEXT_TIMEOUT);
+        set_deadline(std::chrono::system_clock::now() + context_timeout);
     }
 };
 
@@ -98,6 +98,7 @@ class HailoRtRpcClient final {
     hailo_status ConfiguredNetworkGroup_set_nms_score_threshold(const NetworkGroupIdentifier &identifier, const std::string &edge_name, float32_t nms_score_th);
     hailo_status ConfiguredNetworkGroup_set_nms_iou_threshold(const NetworkGroupIdentifier &identifier, const std::string &edge_name, float32_t iou_th);
     hailo_status ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(const NetworkGroupIdentifier &identifier, const std::string &edge_name, uint32_t max_bboxes);
+    hailo_status ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(const NetworkGroupIdentifier &identifier, const std::string &edge_name, uint32_t max_accumulated_mask_size);
     Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_stream_names_from_vstream_name(const NetworkGroupIdentifier &identifier, const std::string &vstream_name);
     Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_vstream_names_from_stream_name(const NetworkGroupIdentifier &identifier, const std::string &stream_name);
     hailo_status ConfiguredNetworkGroup_infer_async(const NetworkGroupIdentifier &identifier,
@@ -150,6 +151,7 @@ class HailoRtRpcClient final {
     hailo_status OutputVStream_set_nms_score_threshold(const VStreamIdentifier &identifier, float32_t threshold);
     hailo_status OutputVStream_set_nms_iou_threshold(const VStreamIdentifier &identifier, float32_t threshold);
     hailo_status OutputVStream_set_nms_max_proposals_per_class(const VStreamIdentifier &identifier, uint32_t max_proposals_per_class);
+    hailo_status OutputVStream_set_nms_max_accumulated_mask_size(const VStreamIdentifier &identifier, uint32_t max_accumulated_mask_size);
 
 private:
     void VDevice_convert_identifier_to_proto(const VDeviceIdentifier &identifier, ProtoVDeviceIdentifier *proto_identifier);
diff --git a/hailort/libhailort/src/service/network_group_client.cpp b/hailort/libhailort/src/service/network_group_client.cpp
index 89b32ecd..06c9dbd2 100644
--- a/hailort/libhailort/src/service/network_group_client.cpp
+++ b/hailort/libhailort/src/service/network_group_client.cpp
@@ -14,7 +14,7 @@
 #include "common/os_utils.hpp"
 
 #include "network_group/network_group_internal.hpp"
-#include "net_flow/pipeline/vstream_internal.hpp"
+#include "net_flow/pipeline/vstream_builder.hpp"
 #include "net_flow/ops/nms_post_process.hpp"
 #include "rpc_client_utils.hpp"
 
@@ -78,6 +78,11 @@ ConfiguredNetworkGroupClient::~ConfiguredNetworkGroupClient()
     if (reply != HAILO_SUCCESS) {
         LOGGER__CRITICAL("ConfiguredNetworkGroup_release failed with status: {}", reply);
     }
+    execute_callbacks_on_error(HAILO_INTERNAL_FAILURE); // At this point there should'nt be any callbacks left. if there are any, raise HAILO_INTERNAL_FAILURE
+    auto status = wait_for_ongoing_callbacks_count_under(1);
+    if (HAILO_SUCCESS != status) {
+        LOGGER__CRITICAL("Failed to wait for callbacks to finish");
+    }
 }
 
 hailo_status ConfiguredNetworkGroupClient::before_fork()
@@ -212,7 +217,12 @@ hailo_status ConfiguredNetworkGroupClient::wait_for_activation(const std::chrono
 
 hailo_status ConfiguredNetworkGroupClient::shutdown()
 {
-    return m_client->ConfiguredNetworkGroup_shutdown(m_identifier);
+    auto status = m_client->ConfiguredNetworkGroup_shutdown(m_identifier);
+    CHECK_SUCCESS(status, "Failed to shutdown");
+    status = wait_for_ongoing_callbacks_count_under(1);
+    CHECK_SUCCESS(status, "Failed to wait for callbacks to finish");
+
+    return status;
 }
 
 Expected<std::vector<std::vector<std::string>>> ConfiguredNetworkGroupClient::get_output_vstream_groups()
@@ -413,6 +423,11 @@ hailo_status ConfiguredNetworkGroupClient::set_nms_max_bboxes_per_class(const st
     return m_client->ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(m_identifier, edge_name, max_bboxes_per_class);
 }
 
+hailo_status ConfiguredNetworkGroupClient::set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size)
+{
+    return m_client->ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(m_identifier, edge_name, max_accumulated_mask_size);
+}
+
 hailo_status ConfiguredNetworkGroupClient::execute_callback(const ProtoCallbackIdentifier &cb_id)
 {
     if (cb_id.cb_type() == CALLBACK_TYPE_TRANSFER) {
@@ -427,6 +442,19 @@ hailo_status ConfiguredNetworkGroupClient::execute_callback(const ProtoCallbackI
     return HAILO_SUCCESS;
 }
 
+void ConfiguredNetworkGroupClient::execute_callbacks_on_error(hailo_status error_status)
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    for (auto cb_pair : m_idx_to_callbacks) {
+        std::get<2>(*cb_pair.second)(error_status);
+    }
+    m_idx_to_callbacks.clear();
+    for (auto cb_pair : m_infer_request_idx_to_callbacks) {
+        cb_pair.second(error_status);
+    }
+    m_infer_request_idx_to_callbacks.clear();
+}
+
 hailo_status ConfiguredNetworkGroupClient::execute_infer_request_callback(const ProtoCallbackIdentifier &cb_id)
 {
     std::function<void(hailo_status)> cb;
@@ -485,7 +513,7 @@ hailo_status ConfiguredNetworkGroupClient::infer_async(const NamedBuffersCallbac
     }
 
     auto infer_request_callback = [this, infer_request_done_cb](hailo_status status){
-        if (status == HAILO_STREAM_ABORTED_BY_USER) {
+        if (status == HAILO_STREAM_ABORT) {
             LOGGER__INFO("Infer request was aborted by user");
         }
         else if (status != HAILO_SUCCESS) {
@@ -503,10 +531,22 @@ hailo_status ConfiguredNetworkGroupClient::infer_async(const NamedBuffersCallbac
         m_infer_request_idx_to_callbacks.emplace(infer_request_cb_idx, infer_request_callback);
     }
 
-    increase_ongoing_callbacks();
+    increase_ongoing_callbacks(); // Increase before lunch, as the cb may be called before we got the chance to increase the counter
     auto status = m_client->ConfiguredNetworkGroup_infer_async(m_identifier, cb_idx_to_stream_buffer,
         infer_request_cb_idx, m_input_streams_names);
-    if (status == HAILO_STREAM_ABORTED_BY_USER) {
+
+    if (HAILO_SUCCESS != status) {
+        // If we got error in `infer_async()`, then the callbacks will not be called in the service domain.
+        // remove them from the cb lists so they wont be called in the client domain as well.
+        std::unique_lock<std::mutex> lock(m_mutex);
+        for (auto &pair : cb_idx_to_stream_buffer) {
+            m_idx_to_callbacks.erase(std::get<0>(pair));
+        }
+        m_infer_request_idx_to_callbacks.erase(infer_request_cb_idx);
+        decrease_ongoing_callbacks();
+    }
+
+    if (status == HAILO_STREAM_ABORT) {
         LOGGER__INFO("Infer request was aborted by user");
         return status;
     }
diff --git a/hailort/libhailort/src/stream_common/async_stream_base.cpp b/hailort/libhailort/src/stream_common/async_stream_base.cpp
index 0968aee8..e2abd420 100644
--- a/hailort/libhailort/src/stream_common/async_stream_base.cpp
+++ b/hailort/libhailort/src/stream_common/async_stream_base.cpp
@@ -172,7 +172,7 @@ hailo_status AsyncInputStreamBase::write_async(TransferRequest &&transfer_reques
     std::unique_lock<std::mutex> lock(m_stream_mutex);
 
     if (m_is_aborted) {
-        return HAILO_STREAM_ABORTED_BY_USER;
+        return HAILO_STREAM_ABORT;
     } else if (!m_is_stream_activated) {
         return HAILO_STREAM_NOT_ACTIVATED;
     }
@@ -187,6 +187,12 @@ hailo_status AsyncInputStreamBase::activate_stream()
     auto status = activate_stream_impl();
     CHECK_SUCCESS(status);
 
+    // If the mode is OWNING is set, it means we use the write/write_impl API. We want to make sure the buffer starts
+    // from the beginning of the buffer pool (to avoid unnecessary buffer bindings).
+    if (StreamBufferMode::OWNING == m_buffer_mode) {
+        m_buffer_pool->reset_pointers();
+    }
+
     m_is_stream_activated = true;
 
     return HAILO_SUCCESS;
@@ -231,7 +237,7 @@ hailo_status AsyncInputStreamBase::call_write_async_impl(TransferRequest &&trans
 
 
     auto status = write_async_impl(std::move(transfer_request));
-    if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) {
+    if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORT == status)) {
         return status;
     }
     CHECK_SUCCESS(status);
@@ -307,7 +313,7 @@ hailo_status AsyncOutputStreamBase::read_async(TransferRequest &&transfer_reques
     std::unique_lock<std::mutex> lock(m_stream_mutex);
 
     if (m_is_aborted) {
-        return HAILO_STREAM_ABORTED_BY_USER;
+        return HAILO_STREAM_ABORT;
     } else if (!m_is_stream_activated) {
         return HAILO_STREAM_NOT_ACTIVATED;
     }
@@ -329,7 +335,7 @@ hailo_status AsyncOutputStreamBase::call_read_async_impl(TransferRequest &&trans
     };
 
     auto status = read_async_impl(std::move(transfer_request));
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         return status;
     }
     CHECK_SUCCESS(status);
@@ -464,7 +470,7 @@ hailo_status AsyncOutputStreamBase::read_impl(MemoryView user_buffer)
     CHECK_SUCCESS(status);
 
     status = dequeue_and_launch_transfer();
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         // The buffer_pool state will reset on next activation.
         return status;
     }
@@ -479,7 +485,7 @@ hailo_status AsyncOutputStreamBase::dequeue_and_launch_transfer()
     CHECK_EXPECTED_AS_STATUS(buffer);
 
     auto callback = [this, buffer=buffer.value()](hailo_status status) {
-        if (HAILO_STREAM_ABORTED_BY_USER == status) {
+        if (HAILO_STREAM_ABORT == status) {
             // On deactivation flow, we should get this status. We just ignore the callback here, and in the next
             // activation we should reset the buffers.
             return;
@@ -492,7 +498,7 @@ hailo_status AsyncOutputStreamBase::dequeue_and_launch_transfer()
     };
 
     auto status = call_read_async_impl(TransferRequest(std::move(buffer.value()), callback));
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         // The buffer_pool state will reset on next activation.
         return status;
     }
diff --git a/hailort/libhailort/src/stream_common/async_stream_base.hpp b/hailort/libhailort/src/stream_common/async_stream_base.hpp
index 48640a31..739bfef6 100644
--- a/hailort/libhailort/src/stream_common/async_stream_base.hpp
+++ b/hailort/libhailort/src/stream_common/async_stream_base.hpp
@@ -66,7 +66,7 @@ class AsyncInputStreamBase : public InputStreamBase {
         const auto wait_done = m_has_ready_buffer.wait_for(lock, timeout,
             [this, pred, &status] {
                 if (m_is_aborted) {
-                    status = HAILO_STREAM_ABORTED_BY_USER;
+                    status = HAILO_STREAM_ABORT;
                     return true;
                 }
 
@@ -151,7 +151,7 @@ class AsyncOutputStreamBase : public OutputStreamBase {
         const auto wait_done = m_has_ready_buffer.wait_for(lock, timeout,
             [this, pred, &status] {
                 if (m_is_aborted) {
-                    status = HAILO_STREAM_ABORTED_BY_USER;
+                    status = HAILO_STREAM_ABORT;
                     return true;
                 }
 
diff --git a/hailort/libhailort/src/stream_common/nms_stream.cpp b/hailort/libhailort/src/stream_common/nms_stream.cpp
index 725530b2..b09be229 100644
--- a/hailort/libhailort/src/stream_common/nms_stream.cpp
+++ b/hailort/libhailort/src/stream_common/nms_stream.cpp
@@ -173,7 +173,7 @@ hailo_status NMSStreamReader::read_nms_bbox_mode(OutputStreamBase &stream, void
         while (true) {
             MemoryView buffer_view(static_cast<uint8_t*>(buffer) + offset, bbox_size);
             auto status = stream.read_impl(buffer_view);
-            if ((HAILO_STREAM_ABORTED_BY_USER == status) ||
+            if ((HAILO_STREAM_ABORT == status) ||
                 ((HAILO_STREAM_NOT_ACTIVATED == status))) {
                 return status;
             }
@@ -189,7 +189,7 @@ hailo_status NMSStreamReader::read_nms_bbox_mode(OutputStreamBase &stream, void
             }
 
             class_bboxes_count++;
-            CHECK_IN_DEBUG(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE,
+            CHECK(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE,
                 "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count,
                 stream.get_info().nms_info.max_bboxes_per_class);
             offset += bbox_size;
@@ -203,7 +203,7 @@ hailo_status NMSStreamReader::read_nms_bbox_mode(OutputStreamBase &stream, void
         // last class delimeter)
         uint64_t last_bbox = 0;
         auto status = stream.read_impl(MemoryView(&last_bbox, sizeof(last_bbox)));
-        if ((HAILO_STREAM_ABORTED_BY_USER == status) ||
+        if ((HAILO_STREAM_ABORT == status) ||
             ((HAILO_STREAM_NOT_ACTIVATED == status))) {
             return status;
         }
@@ -249,7 +249,7 @@ hailo_status NMSStreamReader::read_nms_burst_mode(OutputStreamBase &stream, void
             assert(offset + transfer_size <= buffer_size);
             current_burst = MemoryView(static_cast<uint8_t*>(buffer) + offset, transfer_size);
             auto status = stream.read_impl(current_burst);
-            if ((HAILO_STREAM_ABORTED_BY_USER == status) || ((HAILO_STREAM_NOT_ACTIVATED == status))) {
+            if ((HAILO_STREAM_ABORT == status) || ((HAILO_STREAM_NOT_ACTIVATED == status))) {
                 return status;
             }
             CHECK_SUCCESS(status, "Failed reading nms burst");
@@ -290,7 +290,7 @@ hailo_status NMSStreamReader::read_nms_burst_mode(OutputStreamBase &stream, void
             }
 
             class_bboxes_count++;
-            CHECK_IN_DEBUG(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE,
+            CHECK(class_bboxes_count <= stream.get_info().nms_info.max_bboxes_per_class, HAILO_INTERNAL_FAILURE,
                 "Data read from the device for the current class was size {}, max size is {}", class_bboxes_count,
                 stream.get_info().nms_info.max_bboxes_per_class);
 
@@ -303,17 +303,22 @@ hailo_status NMSStreamReader::read_nms_burst_mode(OutputStreamBase &stream, void
     return HAILO_SUCCESS;
 }
 
-hailo_status NMSStreamReader::read_nms(OutputStreamBase &stream, void *buffer, size_t offset, size_t size)
+hailo_status NMSStreamReader::read_nms(OutputStreamBase &stream, void *buffer, size_t offset, size_t size,
+    hailo_stream_interface_t stream_interface)
 {
     hailo_status status = HAILO_UNINITIALIZED;
     const auto burst_type = stream.get_layer_info().nms_info.burst_type;
     const bool is_burst_mode = (HAILO_BURST_TYPE_H8_BBOX != burst_type) && (HAILO_BURST_TYPE_H15_BBOX != burst_type);
+    // Burst mode in Ethernet is not supported - Return Error in this case
+    CHECK(!(is_burst_mode && (HAILO_STREAM_INTERFACE_ETH == stream_interface)), HAILO_NOT_SUPPORTED,
+        "NMS Burst mode is not supported in Ethernet interface");
+    
     if (is_burst_mode) {
         status = NMSStreamReader::read_nms_burst_mode(stream, buffer, offset, size);
     } else {
         status = NMSStreamReader::read_nms_bbox_mode(stream, buffer, offset);
     }
-    if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
+    if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
         return status;
     }
     CHECK_SUCCESS(status, "Failed reading nms");
@@ -322,11 +327,12 @@ hailo_status NMSStreamReader::read_nms(OutputStreamBase &stream, void *buffer, s
 }
 
 Expected<std::shared_ptr<NmsOutputStream>> NmsOutputStream::create(std::shared_ptr<OutputStreamBase> base_stream,
-    const LayerInfo &edge_layer, size_t max_queue_size, EventPtr core_op_activated_event)
+    const LayerInfo &edge_layer, size_t max_queue_size, EventPtr core_op_activated_event,
+    hailo_stream_interface_t stream_interface)
 {
     auto status = HAILO_UNINITIALIZED;
     auto nms_stream = make_shared_nothrow<NmsOutputStream>(base_stream, edge_layer, max_queue_size,
-        std::move(core_op_activated_event), status);
+        std::move(core_op_activated_event), stream_interface, status);
     CHECK_NOT_NULL_AS_EXPECTED(nms_stream, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
@@ -387,10 +393,12 @@ hailo_status NmsOutputStream::cancel_pending_transfers()
     return m_base_stream->cancel_pending_transfers();
 }
 
-NmsReaderThread::NmsReaderThread(std::shared_ptr<OutputStreamBase> base_stream, size_t max_queue_size) :
+NmsReaderThread::NmsReaderThread(std::shared_ptr<OutputStreamBase> base_stream, size_t max_queue_size,
+    hailo_stream_interface_t stream_interface) :
     m_base_stream(base_stream),
     m_queue_max_size(max_queue_size),
     m_should_quit(false),
+    m_stream_interface(stream_interface),
     m_worker_thread([this] { process_transfer_requests(); })
 {}
 
@@ -472,12 +480,12 @@ void NmsReaderThread::process_transfer_requests()
         assert(1 == transfer_request.transfer_buffers.size());
         assert(0 == transfer_request.transfer_buffers[0].offset());
         auto buffer = transfer_request.transfer_buffers[0].base_buffer();
-        auto status = NMSStreamReader::read_nms(*m_base_stream, buffer->data(), 0, buffer->size());
+        auto status = NMSStreamReader::read_nms(*m_base_stream, buffer.data(), 0, buffer.size(), m_stream_interface);
 
-        if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORTED_BY_USER == status)) {
-            // On both deactivation/abort, we want to send HAILO_STREAM_ABORTED_BY_USER since it is part of the callback
+        if ((HAILO_STREAM_NOT_ACTIVATED == status) || (HAILO_STREAM_ABORT == status)) {
+            // On both deactivation/abort, we want to send HAILO_STREAM_ABORT since it is part of the callback
             // API.
-            transfer_request.callback(HAILO_STREAM_ABORTED_BY_USER);
+            transfer_request.callback(HAILO_STREAM_ABORT);
         } else {
             transfer_request.callback(status);
         }
@@ -490,7 +498,7 @@ void NmsReaderThread::cancel_pending_transfers()
     while(!m_queue.empty()) {
         auto transfer_request = m_queue.front();
         m_queue.pop();
-        transfer_request.callback(HAILO_STREAM_ABORTED_BY_USER);
+        transfer_request.callback(HAILO_STREAM_ABORT);
     }
 }
 
diff --git a/hailort/libhailort/src/stream_common/nms_stream.hpp b/hailort/libhailort/src/stream_common/nms_stream.hpp
index 4242b283..9b11ef37 100644
--- a/hailort/libhailort/src/stream_common/nms_stream.hpp
+++ b/hailort/libhailort/src/stream_common/nms_stream.hpp
@@ -34,7 +34,8 @@ enum class NMSBurstState {
 // For explanation on the different burst modes and types and state machine and logic of the class please check out the cpp.
 class NMSStreamReader {
 public:
-    static hailo_status read_nms(OutputStreamBase &stream, void *buffer, size_t offset, size_t size);
+    static hailo_status read_nms(OutputStreamBase &stream, void *buffer, size_t offset, size_t size,
+        hailo_stream_interface_t stream_interface);
 private:
     static hailo_status read_nms_bbox_mode(OutputStreamBase &stream, void *buffer, size_t offset);
     static hailo_status read_nms_burst_mode(OutputStreamBase &stream, void *buffer, size_t offset, size_t buffer_size);
@@ -46,7 +47,8 @@ class NMSStreamReader {
 class NmsReaderThread final {
 public:
 
-    NmsReaderThread(std::shared_ptr<OutputStreamBase> base_stream, size_t max_queue_size);
+    NmsReaderThread(std::shared_ptr<OutputStreamBase> base_stream, size_t max_queue_size,
+        hailo_stream_interface_t stream_interface);
     ~NmsReaderThread();
 
     NmsReaderThread(const NmsReaderThread &) = delete;
@@ -71,6 +73,7 @@ class NmsReaderThread final {
     std::queue<TransferRequest> m_queue;
     // m_should_quit is used to quit the thread (called on destruction)
     bool m_should_quit;
+    hailo_stream_interface_t m_stream_interface;
     std::thread m_worker_thread;
 };
 
@@ -80,15 +83,16 @@ class NmsReaderThread final {
 class NmsOutputStream : public AsyncOutputStreamBase {
 public:
     static Expected<std::shared_ptr<NmsOutputStream>> create(std::shared_ptr<OutputStreamBase> base_stream,
-        const LayerInfo &edge_layer, size_t max_queue_size, EventPtr core_op_activated_event);
+        const LayerInfo &edge_layer, size_t max_queue_size, EventPtr core_op_activated_event,
+        hailo_stream_interface_t stream_interface);
 
     virtual hailo_stream_interface_t get_interface() const override;
 
     NmsOutputStream(std::shared_ptr<OutputStreamBase> base_stream, const LayerInfo &edge_layer, size_t max_queue_size,
-        EventPtr core_op_activated_event, hailo_status &status) :
+        EventPtr core_op_activated_event, hailo_stream_interface_t stream_interface, hailo_status &status) :
             AsyncOutputStreamBase(edge_layer, std::move(core_op_activated_event), status),
             m_base_stream(base_stream),
-            m_reader_thread(base_stream, max_queue_size)
+            m_reader_thread(base_stream, max_queue_size, stream_interface)
     {}
 
     void set_vdevice_core_op_handle(vdevice_core_op_handle_t core_op_handle) override;
diff --git a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp
index e93de5af..3aadb450 100644
--- a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp
+++ b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp
@@ -31,7 +31,7 @@ QueuedStreamBufferPool::QueuedStreamBufferPool(std::vector<BufferPtr> &&storage)
     m_storage(std::move(storage))
 {
     for (auto buffer : m_storage) {
-        m_queue.push(buffer);
+        m_queue.push(MemoryView(*buffer));
     }
 }
 
@@ -40,6 +40,15 @@ size_t QueuedStreamBufferPool::max_queue_size() const
     return m_storage.size();
 }
 
+hailo_status QueuedStreamBufferPool::dma_map(VDevice &vdevice, hailo_dma_buffer_direction_t direction)
+{
+    for (auto &buffer : m_storage) {
+        TRY(auto mapping, DmaMappedBuffer::create(vdevice, buffer->data(), buffer->size(), direction));
+        m_dma_mappings.emplace_back(std::move(mapping));
+    }
+    return HAILO_SUCCESS;
+}
+
 Expected<TransferBuffer> QueuedStreamBufferPool::dequeue()
 {
     CHECK_AS_EXPECTED(!m_queue.empty(), HAILO_INTERNAL_FAILURE, "QueuedStreamBufferPool is empty");
@@ -53,7 +62,7 @@ hailo_status QueuedStreamBufferPool::enqueue(TransferBuffer &&buffer_info)
 {
     CHECK(buffer_info.offset() == 0, HAILO_INTERNAL_FAILURE, "Cant use offset on queued buffer pool");
     CHECK(buffer_info.size() == m_storage[0]->size(), HAILO_INTERNAL_FAILURE, "Invalid enqueue buffer size");
-    CHECK(buffer_info.base_buffer()->data() == m_storage[m_next_enqueue_buffer_index]->data(), HAILO_INTERNAL_FAILURE,
+    CHECK(buffer_info.base_buffer().data() == m_storage[m_next_enqueue_buffer_index]->data(), HAILO_INTERNAL_FAILURE,
         "Out of order enqueue for queued stream buffer pool");
 
     m_queue.push(buffer_info.base_buffer());
@@ -70,7 +79,7 @@ void QueuedStreamBufferPool::reset_pointers()
 
     // Now fill the buffers from the storage in the right order
     for (auto buffer : m_storage) {
-        m_queue.push(buffer);
+        m_queue.push(MemoryView(*buffer));
     }
     m_next_enqueue_buffer_index = 0;
 }
diff --git a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp
index 373206d6..263a2034 100644
--- a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp
+++ b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp
@@ -11,6 +11,7 @@
 #define _HAILO_QUEUED_STREAM_BUFFER_POOL_HPP_
 
 #include "stream_common/stream_buffer_pool.hpp"
+#include "hailo/dma_mapped_buffer.hpp"
 
 #include <queue>
 
@@ -24,6 +25,8 @@ class QueuedStreamBufferPool : public StreamBufferPool {
 
     explicit QueuedStreamBufferPool(std::vector<BufferPtr> &&storage);
 
+    hailo_status dma_map(VDevice &vdevice, hailo_dma_buffer_direction_t direction);
+
     virtual size_t max_queue_size() const override;
     virtual Expected<TransferBuffer> dequeue() override;
     virtual hailo_status enqueue(TransferBuffer &&buffer_info) override;
@@ -33,7 +36,10 @@ class QueuedStreamBufferPool : public StreamBufferPool {
     // Hold the buffer storage, keeps all buffers alive.
     std::vector<BufferPtr> m_storage;
 
-    std::queue<BufferPtr> m_queue;
+    // Keeps mappings alive (only if dma_map was called).
+    std::vector<DmaMappedBuffer> m_dma_mappings;
+
+    std::queue<MemoryView> m_queue;
 
     // Used for buffer enqueue order validation.
     size_t m_next_enqueue_buffer_index = 0;
diff --git a/hailort/libhailort/src/stream_common/remote_process_stream.cpp b/hailort/libhailort/src/stream_common/remote_process_stream.cpp
index f231a172..4ad2b504 100644
--- a/hailort/libhailort/src/stream_common/remote_process_stream.cpp
+++ b/hailort/libhailort/src/stream_common/remote_process_stream.cpp
@@ -221,8 +221,8 @@ hailo_status RemoteProcessInputStream::flush()
 
     // Get available buffer. We don't use the buffer, just use it to send flush request
     auto write_buffer = m_buffer_pool->dequeue_host_buffer(flush_timeout);
-    if (HAILO_STREAM_ABORTED_BY_USER == write_buffer.status()) {
-        return HAILO_STREAM_ABORTED_BY_USER;
+    if (HAILO_STREAM_ABORT == write_buffer.status()) {
+        return HAILO_STREAM_ABORT;
     }
     CHECK_EXPECTED_AS_STATUS(write_buffer);
 
@@ -235,8 +235,8 @@ hailo_status RemoteProcessInputStream::flush()
 
     // Now wait until available buffers is full
     status = m_buffer_pool->wait_until_host_queue_full(flush_timeout);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        return HAILO_STREAM_ABORTED_BY_USER;
+    if (HAILO_STREAM_ABORT == status) {
+        return HAILO_STREAM_ABORT;
     }
     CHECK_SUCCESS(status);
 
@@ -262,8 +262,8 @@ hailo_status RemoteProcessInputStream::write_impl(const MemoryView &buffer)
 {
     // Get available buffer
     auto write_buffer = m_buffer_pool->dequeue_host_buffer(m_timeout);
-    if (HAILO_STREAM_ABORTED_BY_USER == write_buffer.status()) {
-        return HAILO_STREAM_ABORTED_BY_USER;
+    if (HAILO_STREAM_ABORT == write_buffer.status()) {
+        return HAILO_STREAM_ABORT;
     }
     CHECK_EXPECTED_AS_STATUS(write_buffer);
 
@@ -300,8 +300,15 @@ RemoteProcessInputStream::RemoteProcessInputStream(std::shared_ptr<InputStreamBa
     }
 
     // Not all streams supports get_async_max_queue_size, fallback to default.
-    auto queue_size_exp = m_base_stream->get_async_max_queue_size();
-    const auto queue_size = queue_size_exp ? *queue_size_exp : DEFAULT_QUEUE_SIZE;
+    auto queue_size = DEFAULT_QUEUE_SIZE;
+    if (HAILO_STREAM_INTERFACE_ETH != m_base_stream->get_interface() && HAILO_STREAM_INTERFACE_MIPI != m_base_stream->get_interface()) {
+        auto queue_size_exp = m_base_stream->get_async_max_queue_size();
+        if (!queue_size_exp) {
+            status = queue_size_exp.status();
+            return;
+        }
+        queue_size = *queue_size_exp;
+    }
 
     auto buffer_pool = RemoteProcessBufferPool::create(HAILO_H2D_STREAM, base_stream->get_frame_size(), queue_size);
     if (!buffer_pool) {
@@ -332,7 +339,7 @@ void RemoteProcessInputStream::run_write_thread()
         }
 
         status = write_single_buffer();
-        if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
+        if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
             continue;
         } else if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failure on read thread {}", status);
@@ -473,8 +480,8 @@ hailo_status RemoteProcessOutputStream::cancel_pending_transfers()
 hailo_status RemoteProcessOutputStream::read_impl(MemoryView buffer)
 {
     auto read_buffer = m_buffer_pool->dequeue_host_buffer(m_timeout);
-    if (HAILO_STREAM_ABORTED_BY_USER == read_buffer.status()) {
-        return HAILO_STREAM_ABORTED_BY_USER;
+    if (HAILO_STREAM_ABORT == read_buffer.status()) {
+        return HAILO_STREAM_ABORT;
     }
     CHECK_EXPECTED_AS_STATUS(read_buffer);
 
@@ -509,8 +516,15 @@ RemoteProcessOutputStream::RemoteProcessOutputStream(std::shared_ptr<OutputStrea
     }
 
     // Not all streams supports get_async_max_queue_size, fallback to default.
-    auto queue_size_exp = m_base_stream->get_async_max_queue_size();
-    auto queue_size = queue_size_exp ? *queue_size_exp : DEFAULT_QUEUE_SIZE;
+    auto queue_size = DEFAULT_QUEUE_SIZE;
+    if (HAILO_STREAM_INTERFACE_ETH != m_base_stream->get_interface() && HAILO_STREAM_INTERFACE_MIPI != m_base_stream->get_interface()) {
+        auto queue_size_exp = m_base_stream->get_async_max_queue_size();
+        if (!queue_size_exp) {
+            status = queue_size_exp.status();
+            return;
+        }
+        queue_size = *queue_size_exp;
+    }
 
     auto buffer_pool = RemoteProcessBufferPool::create(HAILO_D2H_STREAM, base_stream->get_frame_size(), queue_size);
     if (!buffer_pool) {
@@ -544,7 +558,7 @@ void RemoteProcessOutputStream::run_read_thread()
         }
 
         status = read_single_buffer();
-        if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
+        if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
             continue;
         } else if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failure on read thread {}", status);
diff --git a/hailort/libhailort/src/stream_common/remote_process_stream.hpp b/hailort/libhailort/src/stream_common/remote_process_stream.hpp
index 2f6ccc78..d7087dcf 100644
--- a/hailort/libhailort/src/stream_common/remote_process_stream.hpp
+++ b/hailort/libhailort/src/stream_common/remote_process_stream.hpp
@@ -12,6 +12,7 @@
 #ifndef _HAILO_REMOTE_PROCESS_STREAM_HPP_
 #define _HAILO_REMOTE_PROCESS_STREAM_HPP_
 
+#include "common/event_internal.hpp"
 #include "common/fork_support.hpp"
 
 #include "stream_common/stream_internal.hpp"
@@ -77,7 +78,7 @@ class RemoteProcessBufferPool final : public SharedAllocatedObject {
         });
         CHECK(done, HAILO_TIMEOUT, "Timeout waiting on cond variable");
         if (m_is_aborted) {
-            return HAILO_STREAM_ABORTED_BY_USER;
+            return HAILO_STREAM_ABORT;
         }
         return HAILO_SUCCESS;
     }
diff --git a/hailort/libhailort/src/stream_common/stream.cpp b/hailort/libhailort/src/stream_common/stream.cpp
index c2fa06a8..eaaa447b 100644
--- a/hailort/libhailort/src/stream_common/stream.cpp
+++ b/hailort/libhailort/src/stream_common/stream.cpp
@@ -31,7 +31,6 @@ hailo_status InputStream::wait_for_async_ready(size_t /* transfer_size */, std::
 
 Expected<size_t> InputStream::get_async_max_queue_size() const
 {
-    LOGGER__ERROR("get_async_max_queue_size not implemented for sync API");
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
@@ -57,7 +56,6 @@ hailo_status OutputStream::wait_for_async_ready(size_t /* transfer_size */, std:
 
 Expected<size_t> OutputStream::get_async_max_queue_size() const
 {
-    LOGGER__ERROR("get_async_max_queue_size not implemented for sync API");
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
diff --git a/hailort/libhailort/src/stream_common/stream_internal.cpp b/hailort/libhailort/src/stream_common/stream_internal.cpp
index eb0de7bc..520ab934 100644
--- a/hailort/libhailort/src/stream_common/stream_internal.cpp
+++ b/hailort/libhailort/src/stream_common/stream_internal.cpp
@@ -15,6 +15,8 @@
 #include "common/logger_macros.hpp"
 #include "common/os_utils.hpp"
 
+#include "utils/buffer_storage.hpp"
+
 #include "stream_common/stream_internal.hpp"
 
 
@@ -37,35 +39,18 @@ hailo_status InputStreamBase::write(const void *buffer, size_t size)
     return write(MemoryView::create_const(buffer, size));
 }
 
-hailo_status InputStreamBase::write_async(BufferPtr buffer, const TransferDoneCallback &user_callback)
-{
-    CHECK_ARG_NOT_NULL(buffer);
-    CHECK_ARG_NOT_NULL(buffer->data());
-    CHECK(buffer->size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Write size {} must be frame size {}", buffer->size(),
-        get_frame_size());
-
-    auto wrapped_callback = [buffer, user_callback](hailo_status status) {
-        user_callback(CompletionInfo{status, buffer->data(), buffer->size()});
-    };
-    return write_async(TransferRequest(std::move(buffer), wrapped_callback));
-}
-
 hailo_status InputStreamBase::write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback)
 {
+    CHECK(!buffer.empty(), HAILO_INVALID_ARGUMENT, "Invalid buffer was passed to write_async");
     CHECK(0 == (reinterpret_cast<size_t>(buffer.data()) % HailoRTCommon::HW_DATA_ALIGNMENT), HAILO_INVALID_ARGUMENT,
         "User address must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT);
+    CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Write size {} must be frame size {}",
+        buffer.size(), get_frame_size());
 
-    const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
-    // User address is not aligned to page size
-    if ((0 != (reinterpret_cast<size_t>(buffer.data()) % dma_able_alignment))) {
-        auto user_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast<uint8_t*>(buffer.data()), buffer.size());
-        CHECK_EXPECTED_AS_STATUS(user_buffer);
-        return write_async(user_buffer.release(), user_callback);
-    } else {
-        auto dma_able_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast<uint8_t*>(buffer.data()), buffer.size());
-        CHECK_EXPECTED_AS_STATUS(dma_able_buffer);
-        return write_async(dma_able_buffer.release(), user_callback);
-    }
+    auto wrapped_callback = [buffer, user_callback](hailo_status status) {
+        user_callback(CompletionInfo{status, buffer.data(), buffer.size()});
+    };
+    return write_async(TransferRequest(buffer, wrapped_callback));
 }
 
 hailo_status InputStreamBase::write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback)
@@ -131,38 +116,22 @@ hailo_status OutputStreamBase::read(void *buffer, size_t size)
     return read(MemoryView(buffer, size));
 }
 
-hailo_status OutputStreamBase::read_async(BufferPtr buffer, const TransferDoneCallback &user_callback)
-{
-    CHECK_ARG_NOT_NULL(buffer);
-    CHECK_ARG_NOT_NULL(buffer->data());
-    CHECK(buffer->size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer->size(),
-        get_frame_size());
-
-    auto wrapped_callback = [buffer, user_callback](hailo_status status) {
-        user_callback(CompletionInfo{status, const_cast<uint8_t*>(buffer->data()), buffer->size()});
-    };
-    return read_async(TransferRequest(std::move(buffer), wrapped_callback));
-}
-
 hailo_status OutputStreamBase::read_async(MemoryView buffer, const TransferDoneCallback &user_callback)
 {
     CHECK_ARG_NOT_NULL(buffer.data());
     CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}", buffer.size(),
         get_frame_size());
 
-    const auto dma_able_alignment = HailoRTCommon::DMA_ABLE_ALIGNMENT_READ_HW_LIMITATION;
-    BufferPtr wrapped_buffer = nullptr;
-    if ((0 != (reinterpret_cast<size_t>(buffer.data()) % dma_able_alignment))) {
-        auto user_buffer = UserBufferStorage::create_storage_from_user_buffer(const_cast<uint8_t*>(buffer.data()), buffer.size());
-        CHECK_EXPECTED_AS_STATUS(user_buffer);
-        wrapped_buffer = user_buffer.release();
-    } else {
-        auto dma_able_buffer = DmaStorage::create_dma_able_buffer_from_user_size(const_cast<uint8_t*>(buffer.data()), buffer.size());
-        CHECK_EXPECTED_AS_STATUS(dma_able_buffer);
-        wrapped_buffer = dma_able_buffer.release();
-    }
-
-    return read_async(wrapped_buffer, user_callback);
+    CHECK(!buffer.empty(), HAILO_INVALID_ARGUMENT, "Invalid buffer was passed to read_async");
+    CHECK(0 == (reinterpret_cast<size_t>(buffer.data()) % HailoRTCommon::HW_DATA_ALIGNMENT), HAILO_INVALID_ARGUMENT,
+        "User address must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT);
+    CHECK(buffer.size() == get_frame_size(), HAILO_INVALID_ARGUMENT, "Read size {} must be frame size {}",
+        buffer.size(), get_frame_size());
+
+    auto wrapped_callback = [buffer, user_callback](hailo_status status) {
+        user_callback(CompletionInfo{status, const_cast<uint8_t*>(buffer.data()), buffer.size()});
+    };
+    return read_async(TransferRequest(buffer, wrapped_callback));
 }
 
 hailo_status OutputStreamBase::read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback)
diff --git a/hailort/libhailort/src/stream_common/stream_internal.hpp b/hailort/libhailort/src/stream_common/stream_internal.hpp
index 7e0758da..6791267e 100644
--- a/hailort/libhailort/src/stream_common/stream_internal.hpp
+++ b/hailort/libhailort/src/stream_common/stream_internal.hpp
@@ -38,7 +38,6 @@
 #include "hailo/hailort_common.hpp"
 
 #include "stream_common/transfer_common.hpp"
-#include "hef/hef_internal.hpp"
 #include "device_common/control_protocol.hpp"
 #include "hef/layer_info.hpp"
 
@@ -96,7 +95,6 @@ class InputStreamBase : public InputStream
 
     virtual hailo_status write_impl(const MemoryView &buffer) = 0;
 
-    virtual hailo_status write_async(BufferPtr buffer, const TransferDoneCallback &user_callback) override final;
     virtual hailo_status write_async(const MemoryView &buffer, const TransferDoneCallback &user_callback) override final;
     virtual hailo_status write_async(const void *buffer, size_t size, const TransferDoneCallback &user_callback) override final;
 
@@ -160,7 +158,6 @@ class OutputStreamBase : public OutputStream
 
     virtual hailo_status read_impl(MemoryView buffer) = 0;
 
-    virtual hailo_status read_async(BufferPtr buffer, const TransferDoneCallback &user_callback) override final;
     virtual hailo_status read_async(MemoryView buffer, const TransferDoneCallback &user_callback) override final;
     virtual hailo_status read_async(void *buffer, size_t size, const TransferDoneCallback &user_callback) override final;
 
diff --git a/hailort/libhailort/src/stream_common/transfer_common.cpp b/hailort/libhailort/src/stream_common/transfer_common.cpp
index f181ec23..a5fd6a42 100644
--- a/hailort/libhailort/src/stream_common/transfer_common.cpp
+++ b/hailort/libhailort/src/stream_common/transfer_common.cpp
@@ -8,41 +8,52 @@
 
 #include "transfer_common.hpp"
 #include "vdma/memory/mapped_buffer.hpp"
-#include "vdma/vdma_device.hpp"
+#include "utils/buffer_storage.hpp"
 
 namespace hailort
 {
 
 
 TransferBuffer::TransferBuffer() :
-    m_base_buffer(nullptr),
+    m_base_buffer(MemoryView{}),
     m_size(0),
     m_offset(0)
 {}
 
-TransferBuffer::TransferBuffer(BufferPtr base_buffer, size_t size, size_t offset) :
-    m_base_buffer(std::move(base_buffer)),
+TransferBuffer::TransferBuffer(MemoryView base_buffer, size_t size, size_t offset) :
+    m_base_buffer(base_buffer),
     m_size(size),
     m_offset(offset)
 {
-    assert(m_size <= m_base_buffer->size());
-    assert(m_offset < m_base_buffer->size());
+    assert(m_size <= base_buffer.size());
+    assert(m_offset < base_buffer.size());
 }
 
-TransferBuffer::TransferBuffer(BufferPtr base_buffer)
-    : TransferBuffer(base_buffer, base_buffer->size(), 0)
+TransferBuffer::TransferBuffer(MemoryView base_buffer)
+    : TransferBuffer(base_buffer, base_buffer.size(), 0)
 {}
 
-Expected<vdma::MappedBufferPtr> TransferBuffer::map_buffer(VdmaDevice &device, HailoRTDriver::DmaDirection direction)
+Expected<vdma::MappedBufferPtr> TransferBuffer::map_buffer(HailoRTDriver &driver, HailoRTDriver::DmaDirection direction)
 {
-    CHECK_AS_EXPECTED(m_base_buffer->storage().type() == BufferStorage::Type::DMA, HAILO_INVALID_ARGUMENT,
-       "Buffer must be dma-able (provided buffer type {})", static_cast<int>(m_base_buffer->storage().type()));
+    CHECK_AS_EXPECTED(!m_mappings, HAILO_INTERNAL_FAILURE, "Buffer is already mapped");
+
+    vdma::DmaAbleBufferPtr dma_able_buffer;
+    const auto storage_key = std::make_pair(m_base_buffer.data(), m_base_buffer.size());
+    if (auto storage = BufferStorageResourceManager::get_resource(storage_key)) {
+        auto dma_able_buffer_exp = storage->get()->get_dma_able_buffer();
+        CHECK_EXPECTED(dma_able_buffer_exp);
+        dma_able_buffer = dma_able_buffer_exp.release();
+    } else {
+        auto dma_able_buffer_exp = vdma::DmaAbleBuffer::create_from_user_address(m_base_buffer.data(), m_base_buffer.size());
+        CHECK_EXPECTED(dma_able_buffer_exp);
+        dma_able_buffer = dma_able_buffer_exp.release();
+    }
 
-    // Map if not already mapped
-    auto is_new_mapping_exp = m_base_buffer->storage().dma_map(device, to_hailo_dma_direction(direction));
-    CHECK_EXPECTED(is_new_mapping_exp);
+    auto mapped_buffer = vdma::MappedBuffer::create_shared(std::move(dma_able_buffer), driver, direction);
+    CHECK_EXPECTED(mapped_buffer);
 
-    return m_base_buffer->storage().get_dma_mapped_buffer(device.get_dev_id());
+    m_mappings = mapped_buffer.value();
+    return mapped_buffer;
 }
 
 hailo_status TransferBuffer::copy_to(MemoryView buffer)
@@ -72,52 +83,24 @@ hailo_status TransferBuffer::copy_from(const MemoryView buffer)
     return HAILO_SUCCESS;
 }
 
-hailo_status TransferBuffer::synchronize(VdmaDevice &device, HailoRTDriver::DmaSyncDirection sync_direction)
-{
-    auto mapped_buffer = m_base_buffer->storage().get_dma_mapped_buffer(device.get_dev_id());
-    CHECK_EXPECTED_AS_STATUS(mapped_buffer);
-
-    auto continuous_parts = get_continuous_parts();
-
-    auto status = synchronize_part(*mapped_buffer, continuous_parts.first, sync_direction);
-    CHECK_SUCCESS(status);
-
-    if (!continuous_parts.second.empty()) {
-        status = synchronize_part(*mapped_buffer, continuous_parts.second, sync_direction);
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status TransferBuffer::synchronize_part(vdma::MappedBufferPtr &mapped_buffer, MemoryView continuous_part,
-    HailoRTDriver::DmaSyncDirection sync_direction)
-{
-    assert(!continuous_part.empty());
-    assert(continuous_part.data() >= m_base_buffer->data());
-
-    return mapped_buffer->synchronize(continuous_part.data() - m_base_buffer->data(), continuous_part.size(),
-        sync_direction);
-}
-
 bool TransferBuffer::is_wrap_around() const
 {
-    return (m_offset + m_size) > m_base_buffer->size();
+    return (m_offset + m_size) > m_base_buffer.size();
 }
 
 std::pair<MemoryView, MemoryView> TransferBuffer::get_continuous_parts()
 {
     if (is_wrap_around()) {
-        const auto size_to_end = m_base_buffer->size() - m_offset;
+        const auto size_to_end = m_base_buffer.size() - m_offset;
         assert(size_to_end < m_size);
         return std::make_pair(
-            MemoryView(m_base_buffer->data() + m_offset, size_to_end),
-            MemoryView(m_base_buffer->data(), m_size - size_to_end)
+            MemoryView(m_base_buffer.data() + m_offset, size_to_end),
+            MemoryView(m_base_buffer.data(), m_size - size_to_end)
         );
 
     } else {
         return std::make_pair(
-            MemoryView(m_base_buffer->data() + m_offset, m_size),
+            MemoryView(m_base_buffer.data() + m_offset, m_size),
             MemoryView()
         );
     }
diff --git a/hailort/libhailort/src/stream_common/transfer_common.hpp b/hailort/libhailort/src/stream_common/transfer_common.hpp
index 795b4586..66aaf404 100644
--- a/hailort/libhailort/src/stream_common/transfer_common.hpp
+++ b/hailort/libhailort/src/stream_common/transfer_common.hpp
@@ -13,40 +13,33 @@
 #include "hailo/stream.hpp"
 #include "hailo/buffer.hpp"
 
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
+#include "vdma/memory/mapped_buffer.hpp"
 
 namespace hailort
 {
 
-class VdmaDevice;
-
 // Contains buffer that can be transferred. The buffer can be circular -
 // It relies at [m_offset, m_base_buffer.size()) and [0, m_base_buffer.size() - m_size).
 class TransferBuffer final {
 public:
 
     TransferBuffer();
-    TransferBuffer(BufferPtr base_buffer);
-    TransferBuffer(BufferPtr base_buffer, size_t size, size_t offset);
 
-    BufferPtr base_buffer() { return m_base_buffer; }
+    TransferBuffer(MemoryView base_buffer);
+    TransferBuffer(MemoryView base_buffer, size_t size, size_t offset);
+
+    MemoryView base_buffer() { return m_base_buffer; }
     size_t offset() const { return m_offset; }
     size_t size() const { return m_size; }
 
-    Expected<vdma::MappedBufferPtr> map_buffer(VdmaDevice &device, HailoRTDriver::DmaDirection direction);
+    Expected<vdma::MappedBufferPtr> map_buffer(HailoRTDriver &driver, HailoRTDriver::DmaDirection direction);
 
     hailo_status copy_to(MemoryView buffer);
     hailo_status copy_from(const MemoryView buffer);
 
-    // Sync the buffer to the given direction, fails if the buffer is not mapped.
-    hailo_status synchronize(VdmaDevice &device, HailoRTDriver::DmaSyncDirection sync_direction);
-
 private:
 
-    // Sync a signal continuous part
-    hailo_status synchronize_part(vdma::MappedBufferPtr &mapped_buffer, MemoryView continuous_part,
-        HailoRTDriver::DmaSyncDirection sync_direction);
-
     bool is_wrap_around() const;
 
     // Returns the continuous parts of the buffer.
@@ -57,9 +50,12 @@ class TransferBuffer final {
     //      2. If the buffer is not circular, the first part will contain the buffer, the second will point to nullptr.
     std::pair<MemoryView, MemoryView> get_continuous_parts();
 
-    BufferPtr m_base_buffer;
+    MemoryView m_base_buffer;
     size_t m_size;
     size_t m_offset;
+
+    // Once map_buffer is called, a MappedBuffer object is stored here to make sure the buffer is mapped.
+    vdma::MappedBufferPtr m_mappings;
 };
 
 // Internal function, wrapper to the user callbacks, accepts the callback status as an argument.
diff --git a/hailort/libhailort/src/transform/transform.cpp b/hailort/libhailort/src/transform/transform.cpp
index b977c466..8a82aaec 100644
--- a/hailort/libhailort/src/transform/transform.cpp
+++ b/hailort/libhailort/src/transform/transform.cpp
@@ -156,6 +156,16 @@ std::string TransformContextUtils::make_reorder_description(hailo_format_order_t
     return reorder_description.str();
 }
 
+std::string TransformContextUtils::make_pad_periph_description(hailo_3d_image_shape_t src_shape, hailo_3d_image_shape_t dst_shape)
+{
+    std::stringstream reorder_description;
+    reorder_description << "Padding Periph shape - src_shape: (" << src_shape.height << ", " << src_shape.width << ", "
+        << src_shape.features << "), dst_shape: (" << dst_shape.height << ", " << dst_shape.width << ", "
+        << dst_shape.features << ")";
+
+    return reorder_description.str();
+}
+
 std::string TransformContextUtils::make_transpose_description(hailo_3d_image_shape_t src_shape, hailo_3d_image_shape_t transposed_shape)
 {
     std::stringstream transpose_description;
@@ -1031,8 +1041,6 @@ hailo_status reorder_input_stream(const void *src_ptr, hailo_3d_image_shape_t sr
 
     if (((HAILO_FORMAT_ORDER_FCR == src_format.order) || (HAILO_FORMAT_ORDER_NHWC == src_format.order)) &&
         (HAILO_FORMAT_ORDER_FCR == dst_format.order)) {
-        //Check that there is alignment for 8 bytes
-        assert(0 == ((HailoRTCommon::get_data_bytes(dst_format.type) * dst_image_shape.features) % HailoRTCommon::HW_DATA_ALIGNMENT));
         switch (dst_format.type) {
             case HAILO_FORMAT_TYPE_UINT8:
                 transform__h2d_FCR<uint8_t>((uint8_t*)src_ptr, &src_image_shape, (uint8_t*)dst_ptr, &dst_image_shape);
@@ -1536,7 +1544,7 @@ hailo_status transform_demux_raw_frame(const void *src, uint32_t offset,
 }
 
 hailo_status validate_input_transform_params(hailo_3d_image_shape_t src_image_shape, hailo_format_t src_format,
-    hailo_3d_image_shape_t dst_image_shape, hailo_format_t dst_format)
+    hailo_format_t dst_format)
 {
     /* Check device type */
     if (!((HAILO_FORMAT_TYPE_UINT16 == dst_format.type) || (HAILO_FORMAT_TYPE_UINT8 == dst_format.type))) {
@@ -1545,15 +1553,7 @@ hailo_status validate_input_transform_params(hailo_3d_image_shape_t src_image_sh
     }
 
     /* Check reorder flags - where no reorder is needed */
-    if ((HAILO_FORMAT_ORDER_FCR == src_format.order) &&
-        (HAILO_FORMAT_ORDER_FCR == dst_format.order)) {
-        //Check that there is alignment for 8 bytes
-        if (0 != ((HailoRTCommon::get_data_bytes(dst_format.type) * dst_image_shape.features) % HailoRTCommon::HW_DATA_ALIGNMENT)) {
-            LOGGER__ERROR("HW features must be aligned to {}. passed hw features - {}",
-                HailoRTCommon::HW_DATA_ALIGNMENT, dst_image_shape.features);
-            return HAILO_INVALID_ARGUMENT;
-        }
-    } else if ((HAILO_FORMAT_ORDER_BAYER_RGB == src_format.order) &&
+    if ((HAILO_FORMAT_ORDER_BAYER_RGB == src_format.order) &&
         (HAILO_FORMAT_ORDER_BAYER_RGB == dst_format.order)) {
         if (src_image_shape.features != 1) {
             LOGGER__ERROR("Invalid Bayer user features. Expected 1, received {}", src_image_shape.features);
@@ -1565,11 +1565,6 @@ hailo_status validate_input_transform_params(hailo_3d_image_shape_t src_image_sh
             LOGGER__ERROR("Invalid Bayer user features. Expected 1, received {}", src_image_shape.features);
             return HAILO_INVALID_ARGUMENT;
         }
-    } else if ((HAILO_FORMAT_ORDER_YUY2 == src_format.order) &&
-        (HAILO_FORMAT_ORDER_YUY2 == dst_format.order)) {
-        auto shape_size_in_bytes = HailoRTCommon::get_shape_size(src_image_shape) * HailoRTCommon::get_data_bytes(src_format.type);
-        CHECK(shape_size_in_bytes % HailoRTCommon::HW_DATA_ALIGNMENT == 0, HAILO_INVALID_ARGUMENT,
-          "YUY2_to_YUY2 Transform shape_size must be aligned to {}", HailoRTCommon::HW_DATA_ALIGNMENT);
     }
 
     return HAILO_SUCCESS;
@@ -1654,6 +1649,15 @@ std::string InputTransformContext::description() const
         transform_description << TransformContextUtils::make_reorder_description(m_src_format.order, m_src_image_shape, m_dst_format.order, m_dst_image_shape);
     }
 
+    if (m_should_pad_periph) {
+        if (!first) {
+            transform_description << " | ";
+        } else {
+            first = false;
+        }
+        transform_description << TransformContextUtils::make_pad_periph_description(m_src_image_shape, m_dst_image_shape);
+    }
+
     return transform_description.str();
 }
 
@@ -1661,7 +1665,7 @@ Expected<std::unique_ptr<InputTransformContext>> InputTransformContext::create(c
     const hailo_format_t &src_format, const hailo_3d_image_shape_t &dst_image_shape,
     const hailo_format_t &dst_format, const std::vector<hailo_quant_info_t> &dst_quant_infos)
 {
-    auto status = validate_input_transform_params(src_image_shape, src_format, dst_image_shape, dst_format);
+    auto status = validate_input_transform_params(src_image_shape, src_format, dst_format);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     const auto internal_src_format = HailoRTDefaults::expand_auto_format(src_format, dst_format);
@@ -2077,6 +2081,15 @@ std::string FrameOutputTransformContext::description() const
         transform_description << TransformContextUtils::make_reorder_description(m_src_format.order, m_src_image_shape, m_dst_format.order, m_dst_image_shape);
     }
 
+    if (m_should_pad_periph) {
+        if (!first) {
+            transform_description << " | ";
+        } else {
+            first = false;
+        }
+        transform_description << TransformContextUtils::make_pad_periph_description(m_src_image_shape, m_dst_image_shape);
+    }
+
     return transform_description.str();
 }
 
@@ -2192,8 +2205,9 @@ hailo_status fuse_buffers(const std::vector<MemoryView> &buffers,
             HailoRTCommon::get_nms_hw_frame_size(info));
     }
 
-    // We keep the size of the dst buffer 1 bbox_size too big to stay in the format of not defused nms frames.
-    total_size_of_buffers += frames[0].first->bbox_size;
+    // We keep the size of the dst buffer 1 burst_size too big to stay in the format of not defused nms frames.
+    const auto burst_size = (frames[0].first->bbox_size * frames[0].first->burst_size);
+    total_size_of_buffers += burst_size;
 
     CHECK(dst.size() == total_size_of_buffers, HAILO_INVALID_ARGUMENT,
         "Size of destination buffer is not same as the expected size of the fused frame! (size: {}, expected: {})",
diff --git a/hailort/libhailort/src/transform/transform_internal.hpp b/hailort/libhailort/src/transform/transform_internal.hpp
index 3f254a62..8cbc115f 100644
--- a/hailort/libhailort/src/transform/transform_internal.hpp
+++ b/hailort/libhailort/src/transform/transform_internal.hpp
@@ -46,6 +46,7 @@ class HAILORTAPI TransformContextUtils final
     static std::string make_reorder_description(hailo_format_order_t src_order, hailo_3d_image_shape_t src_shape,
                                                 hailo_format_order_t dst_order, hailo_3d_image_shape_t dst_shape);
     static std::string make_transpose_description(hailo_3d_image_shape_t original_shape, hailo_3d_image_shape_t transposed_shape);
+    static std::string make_pad_periph_description(hailo_3d_image_shape_t src_shape, hailo_3d_image_shape_t dst_shape);
 
     template<typename T>
     static hailo_status transform__d2h_NHCW_to_NCHW(
diff --git a/hailort/libhailort/src/utils/CMakeLists.txt b/hailort/libhailort/src/utils/CMakeLists.txt
index 066e16e5..57d45d50 100644
--- a/hailort/libhailort/src/utils/CMakeLists.txt
+++ b/hailort/libhailort/src/utils/CMakeLists.txt
@@ -8,6 +8,7 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/sensor_config_utils.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/soc_utils/partial_cluster_reader.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/measurement_utils.cpp
 )
 
 if(HAILO_BUILD_PROFILER)
diff --git a/hailort/libhailort/src/utils/buffer.cpp b/hailort/libhailort/src/utils/buffer.cpp
index 6283fb55..054c24fd 100644
--- a/hailort/libhailort/src/utils/buffer.cpp
+++ b/hailort/libhailort/src/utils/buffer.cpp
@@ -10,6 +10,8 @@
  **/
 
 #include "hailo/buffer.hpp"
+#include "utils/buffer_storage.hpp"
+#include "utils/exported_resource_manager.hpp"
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
 #include "common/string_utils.hpp"
@@ -39,20 +41,37 @@ static void format_buffer(std::ostream& stream, const uint8_t *buffer, size_t si
     }
 }
 
+class Buffer::StorageImpl final {
+public:
+    StorageImpl(BufferStoragePtr storage, std::unique_ptr<BufferStorageRegisteredResource> storage_resource) :
+        m_storage(std::move(storage)),
+        m_storage_resource(std::move(storage_resource))
+    {}
+
+    BufferStoragePtr m_storage;
+
+    // Optionally we register the resource. By default we register the resource to the manager, but on some cases (for
+    // example - the unit tests, we want to skip the registration).
+    std::unique_ptr<BufferStorageRegisteredResource> m_storage_resource;
+};
+
 Buffer::Buffer() :
-    m_storage(),
+    m_storage_impl(),
     m_data(nullptr),
     m_size(0)
 {}
 
-Buffer::Buffer(BufferStoragePtr storage) :
-    m_storage(storage),
-    m_data(static_cast<uint8_t *>(m_storage->user_address())),
-    m_size(m_storage->size())
+// Declare on c++ file since StorageImpl definition is needed.
+Buffer::~Buffer() = default;
+
+Buffer::Buffer(std::unique_ptr<StorageImpl> storage) :
+    m_storage_impl(std::move(storage)),
+    m_data(static_cast<uint8_t *>(m_storage_impl->m_storage->user_address())),
+    m_size(m_storage_impl->m_storage->size())
 {}
 
 Buffer::Buffer(Buffer&& other) :
-    m_storage(std::move(other.m_storage)),
+    m_storage_impl(std::move(other.m_storage_impl)),
     m_data(std::exchange(other.m_data, nullptr)),
     m_size(std::exchange(other.m_size, 0))
 {}
@@ -62,7 +81,7 @@ Expected<Buffer> Buffer::create(size_t size, const BufferStorageParams &params)
     auto storage = BufferStorage::create(size, params);
     CHECK_EXPECTED(storage);
 
-    return Buffer(storage.release());
+    return create(storage.release());
 }
 
 Expected<Buffer> Buffer::create(size_t size, uint8_t default_value, const BufferStorageParams &params)
@@ -121,6 +140,24 @@ Expected<Buffer> Buffer::create(std::initializer_list<uint8_t> init, const Buffe
     return buffer;
 }
 
+Expected<Buffer> Buffer::create(BufferStoragePtr storage, bool register_storage /* = true */)
+{
+    // If needed, register the storage
+    std::unique_ptr<BufferStorageRegisteredResource> optional_registered_resource;
+    if (register_storage) {
+        const auto storage_key = std::make_pair(storage->user_address(), storage->size());
+        auto registered_resource = BufferStorageRegisteredResource::create(storage, storage_key);
+        CHECK_EXPECTED(registered_resource);
+        optional_registered_resource = make_unique_nothrow<BufferStorageRegisteredResource>(registered_resource.release());
+        CHECK_NOT_NULL(optional_registered_resource, HAILO_OUT_OF_HOST_MEMORY);
+    }
+
+    auto storage_impl = make_unique_nothrow<StorageImpl>(std::move(storage), std::move(optional_registered_resource));
+    CHECK_NOT_NULL(storage_impl, HAILO_OUT_OF_HOST_MEMORY);
+
+    return Buffer(std::move(storage_impl));
+}
+
 Expected<Buffer> Buffer::copy() const
 {
     return Buffer::create(m_data, m_size);
@@ -128,7 +165,7 @@ Expected<Buffer> Buffer::copy() const
 
 Buffer& Buffer::operator=(Buffer&& other)
 {
-    m_storage = std::move(other.m_storage);
+    m_storage_impl = std::move(other.m_storage_impl);
     m_data = std::exchange(other.m_data, nullptr);
     m_size = std::exchange(other.m_size, 0);
     return *this;
@@ -174,7 +211,8 @@ Buffer::iterator Buffer::end()
 
 BufferStorage &Buffer::storage()
 {
-    return *m_storage;
+    assert(m_storage_impl);
+    return *m_storage_impl->m_storage;
 }
 
 uint8_t* Buffer::data() noexcept
@@ -241,6 +279,11 @@ uint64_t& Buffer::as_uint64()
     return as_type<uint64_t>();
 }
 
+Expected<void *> Buffer::release() noexcept
+{
+    return m_storage_impl->m_storage->release();
+}
+
 MemoryView::MemoryView() :
     m_data(nullptr),
     m_size(0)
diff --git a/hailort/libhailort/src/utils/buffer_storage.cpp b/hailort/libhailort/src/utils/buffer_storage.cpp
index fbdde3d3..877ab7a6 100644
--- a/hailort/libhailort/src/utils/buffer_storage.cpp
+++ b/hailort/libhailort/src/utils/buffer_storage.cpp
@@ -7,7 +7,7 @@
  * @brief TODO: fill me (HRT-10026)
  **/
 
-#include "hailo/buffer_storage.hpp"
+#include "buffer_storage.hpp"
 #include "hailo/hailort.h"
 #include "hailo/vdevice.hpp"
 #include "vdma/vdma_device.hpp"
@@ -26,88 +26,16 @@ static_assert(HAILO_DMA_BUFFER_DIRECTION_D2H == (int)HailoRTDriver::DmaDirection
 static_assert(HAILO_DMA_BUFFER_DIRECTION_BOTH == (int)HailoRTDriver::DmaDirection::BOTH,
     "hailo_dma_buffer_direction_t must match HailoRTDriver::DmaDirection");
 
-BufferStorageParams::HeapParams::HeapParams()
-{}
-
-Expected<BufferStorageParams::DmaMappingParams> BufferStorageParams::DmaMappingParams::create(
-    const hailo_buffer_dma_mapping_params_t &params)
-{
-    CHECK_AS_EXPECTED((params.device == nullptr) || (params.vdevice == nullptr), HAILO_INVALID_ARGUMENT,
-        "Can't set both device and vdevice fields");
-    return DmaMappingParams(params);
-}
-
-BufferStorageParams::DmaMappingParams::DmaMappingParams(const hailo_buffer_dma_mapping_params_t &params) :
-    device(reinterpret_cast<Device*>(params.device)),
-    vdevice(reinterpret_cast<VDevice*>(params.vdevice)),
-    data_direction(params.direction)
-{}
-
-BufferStorageParams::DmaMappingParams::DmaMappingParams(Device &device, hailo_dma_buffer_direction_t data_direction) :
-    device(&device),
-    vdevice(nullptr),
-    data_direction(data_direction)
-{}
-
-BufferStorageParams::DmaMappingParams::DmaMappingParams(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction) :
-    device(nullptr),
-    vdevice(&vdevice),
-    data_direction(data_direction)
-{}
-
-BufferStorageParams::DmaMappingParams::DmaMappingParams() :
-    device(nullptr),
-    vdevice(nullptr),
-    data_direction(HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM)
-{}
-
-Expected<BufferStorageParams> BufferStorageParams::create(const hailo_buffer_parameters_t &params)
-{
-    BufferStorageParams result{};
-    result.flags = params.flags;
-
-    if (params.flags == HAILO_BUFFER_FLAGS_NONE) {
-        result.heap_params = HeapParams();
-    } else if ((params.flags & HAILO_BUFFER_FLAGS_DMA) != 0) {
-        auto dma_mapping_params = DmaMappingParams::create(params.dma_mapping_params);
-        CHECK_EXPECTED(dma_mapping_params);
-        result.dma_mapping_params = dma_mapping_params.release();
-    } else {
-        // TODO: HRT-10903
-        LOGGER__ERROR("Buffer storage flags not currently supported {}", params.flags);
-        return make_unexpected(HAILO_NOT_IMPLEMENTED);
-    }
-
-    return result;
-}
 
 BufferStorageParams BufferStorageParams::create_dma()
 {
     BufferStorageParams result{};
     result.flags = HAILO_BUFFER_FLAGS_DMA;
-    result.dma_mapping_params = DmaMappingParams();
-    return result;
-}
-
-BufferStorageParams BufferStorageParams::create_dma(Device &device, hailo_dma_buffer_direction_t data_direction)
-{
-    BufferStorageParams result{};
-    result.flags = HAILO_BUFFER_FLAGS_DMA;
-    result.dma_mapping_params = DmaMappingParams(device, data_direction);
-    return result;
-}
-
-BufferStorageParams BufferStorageParams::create_dma(VDevice &vdevice, hailo_dma_buffer_direction_t data_direction)
-{
-    BufferStorageParams result{};
-    result.flags = HAILO_BUFFER_FLAGS_DMA;
-    result.dma_mapping_params = DmaMappingParams(vdevice, data_direction);
     return result;
 }
 
 BufferStorageParams::BufferStorageParams() :
-    flags(HAILO_BUFFER_FLAGS_NONE),
-    heap_params()
+    flags(HAILO_BUFFER_FLAGS_NONE)
 {}
 
 Expected<BufferStoragePtr> BufferStorage::create(size_t size, const BufferStorageParams &params)
@@ -117,29 +45,9 @@ Expected<BufferStoragePtr> BufferStorage::create(size_t size, const BufferStorag
         CHECK_EXPECTED(result);
         return std::static_pointer_cast<BufferStorage>(result.release());
     } else if (0 != (params.flags & HAILO_BUFFER_FLAGS_DMA)) {
-        // TODO: check other flags here (HRT-10903)
-        auto &dma_mapping_params = params.dma_mapping_params;
-
-        DmaStoragePtr storage = nullptr;
-        if ((dma_mapping_params.device != nullptr) && (dma_mapping_params.vdevice != nullptr)) {
-            LOGGER__ERROR("Can't map a buffer to both vdevice and device");
-            return make_unexpected(HAILO_INVALID_ARGUMENT);
-        } else if (dma_mapping_params.device != nullptr) {
-            auto result = DmaStorage::create(size, dma_mapping_params.data_direction,
-                *dma_mapping_params.device);
-            CHECK_EXPECTED(result);
-            storage = result.release();
-        } else if (dma_mapping_params.vdevice != nullptr) {
-            auto result = DmaStorage::create(size, dma_mapping_params.data_direction,
-                *dma_mapping_params.vdevice);
-            CHECK_EXPECTED(result);
-            storage = result.release();
-        } else {
-            auto result = DmaStorage::create(size);
-            CHECK_EXPECTED(result);
-            storage = result.release();
-        }
-        return std::static_pointer_cast<BufferStorage>(storage);
+        auto result = DmaStorage::create(size);
+        CHECK_EXPECTED(result);
+        return std::static_pointer_cast<BufferStorage>(result.release());
     }
 
     // TODO: HRT-10903
@@ -147,13 +55,9 @@ Expected<BufferStoragePtr> BufferStorage::create(size_t size, const BufferStorag
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
-BufferStorage::BufferStorage(Type type) :
-    m_type(type)
-{}
-
-BufferStorage::Type BufferStorage::type() const
+Expected<vdma::DmaAbleBufferPtr> BufferStorage::get_dma_able_buffer()
 {
-    return m_type;
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
 Expected<HeapStoragePtr> HeapStorage::create(size_t size)
@@ -168,7 +72,6 @@ Expected<HeapStoragePtr> HeapStorage::create(size_t size)
 }
 
 HeapStorage::HeapStorage(std::unique_ptr<uint8_t[]> data, size_t size) :
-    BufferStorage(Type::HEAP),
     m_data(std::move(data)),
     m_size(size)
 {}
@@ -195,127 +98,21 @@ Expected<void *> HeapStorage::release() noexcept
     return m_data.release();
 }
 
-Expected<bool> HeapStorage::dma_map(Device &, hailo_dma_buffer_direction_t)
-{
-    LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA");
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
-
-Expected<bool> HeapStorage::dma_map(VdmaDevice &, hailo_dma_buffer_direction_t)
-{
-    LOGGER__ERROR("Heap allocated buffers can't be mapped to DMA");
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
-
-Expected<vdma::MappedBufferPtr> HeapStorage::get_dma_mapped_buffer(const std::string &)
-{
-    LOGGER__ERROR("Mapped buffer is not supported for Heap allocated buffers");
-    return make_unexpected(HAILO_INVALID_OPERATION);
-}
 
 Expected<DmaStoragePtr> DmaStorage::create(size_t size)
 {
-    static const auto ALLOCATE_BUFFER = nullptr;
-    return create(ALLOCATE_BUFFER, size);
-}
-
-Expected<DmaStoragePtr> DmaStorage::create(size_t size,
-    hailo_dma_buffer_direction_t data_direction, Device &device)
-{
-    static const auto ALLOCATE_BUFFER = nullptr;
-    return create(ALLOCATE_BUFFER, size, data_direction,
-        std::vector<std::reference_wrapper<Device>>{std::ref(device)});
-}
-
-Expected<DmaStoragePtr> DmaStorage::create(size_t size,
-    hailo_dma_buffer_direction_t data_direction, VDevice &vdevice)
-{
-    static const auto ALLOCATE_BUFFER = nullptr;
-    auto physical_devices = vdevice.get_physical_devices();
-    CHECK_EXPECTED(physical_devices);
-    return create(ALLOCATE_BUFFER, size, data_direction, physical_devices.release());
-}
-
-Expected<DmaStoragePtr> DmaStorage::create_from_user_address(void *user_address, size_t size)
-{
-    return create(user_address, size);
-}
-
-Expected<DmaStoragePtr> DmaStorage::create_from_user_address(void *user_address, size_t size,
-    hailo_dma_buffer_direction_t data_direction, Device &device)
-{
-    CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address);
-    return create(user_address, size, data_direction,
-        std::vector<std::reference_wrapper<Device>>{std::ref(device)});
-}
-
-Expected<DmaStoragePtr> DmaStorage::create_from_user_address(void *user_address, size_t size,
-    hailo_dma_buffer_direction_t data_direction, VDevice &vdevice)
-{
-    CHECK_ARG_NOT_NULL_AS_EXPECTED(user_address);
-    auto physical_devices = vdevice.get_physical_devices();
-    CHECK_EXPECTED(physical_devices);
-    return create(user_address, size, data_direction, physical_devices.release());
-}
-
-Expected<std::shared_ptr<Buffer>> DmaStorage::create_dma_able_buffer_from_user_size(void *addr, size_t size)
-{
-    auto storage = create_from_user_address(addr, size);
-    CHECK_EXPECTED(storage);
+    // TODO: HRT-10283 support sharing low memory buffers for DART and similar systems.
+    TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_by_allocation(size));
 
-    auto buffer = make_shared_nothrow<Buffer>(storage.release());
-    CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
-
-    return buffer;
-}
-
-Expected<DmaStoragePtr> DmaStorage::create(void *user_address, size_t size,
-    hailo_dma_buffer_direction_t data_direction,
-    std::vector<std::reference_wrapper<Device>> &&physical_devices)
-{
-    vdma::DmaAbleBufferPtr dma_able_buffer_ptr = nullptr;
-    if (nullptr == user_address) {
-        // TODO: HRT-10283 support sharing low memory buffers for DART and similar systems.
-        auto dma_able_buffer = vdma::DmaAbleBuffer::create_by_allocation(size);
-        CHECK_EXPECTED(dma_able_buffer);
-        dma_able_buffer_ptr = dma_able_buffer.release();
-    } else {
-        auto dma_able_buffer = vdma::DmaAbleBuffer::create_from_user_address(user_address, size);
-        CHECK_EXPECTED(dma_able_buffer);
-        dma_able_buffer_ptr = dma_able_buffer.release();
-    }
-
-    auto result = make_shared_nothrow<DmaStorage>(std::move(dma_able_buffer_ptr));
+    auto result = make_shared_nothrow<DmaStorage>(std::move(dma_able_buffer));
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
-
-    for (auto &device : physical_devices) {
-        auto is_new_mapping = result->dma_map(device, data_direction);
-        CHECK_EXPECTED(is_new_mapping);
-        CHECK_AS_EXPECTED(is_new_mapping.value(), HAILO_INTERNAL_FAILURE);
-    }
-
     return result;
 }
 
 DmaStorage::DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer) :
-    BufferStorage(Type::DMA),
-    m_dma_able_buffer(std::move(dma_able_buffer)),
-    m_mappings()
+    m_dma_able_buffer(std::move(dma_able_buffer))
 {}
 
-DmaStorage::~DmaStorage()
-{
-    // TODO: deleter callback holds a reference to a device, which is bad since this BufferStorage could outlive
-    //       the device. We need to doc that it isn't allowed. Later on, I think devices should use shared_ptrs
-    //       and then the mapping will inc the reference count (HRT-12361)
-    for (const auto &device_mapping_pair : m_mappings) {
-        const auto &mapping = device_mapping_pair.second;
-        if (nullptr != mapping.second) {
-            mapping.second();
-        }
-    }
-}
-
 size_t DmaStorage::size() const
 {
     return m_dma_able_buffer->size();
@@ -331,115 +128,9 @@ Expected<void *> DmaStorage::release() noexcept
     return make_unexpected(HAILO_NOT_IMPLEMENTED);
 }
 
-Expected<bool> DmaStorage::dma_map(Device &device, hailo_dma_buffer_direction_t data_direction)
-{
-    const auto device_type = device.get_type();
-    CHECK_AS_EXPECTED(((Device::Type::INTEGRATED == device_type) || (Device::Type::PCIE == device_type)),
-        HAILO_INVALID_ARGUMENT, "Invalid device type (expected integrated/pcie, received {})", device_type);
-    return dma_map(*reinterpret_cast<VdmaDevice*>(&device), data_direction);
-}
-
-// TODO: change data_direction to hailo_stream_direction_t (HRT-12391)
-Expected<bool> DmaStorage::dma_map(VdmaDevice &device, hailo_dma_buffer_direction_t data_direction)
-{
-    CHECK_AS_EXPECTED(data_direction <= HAILO_DMA_BUFFER_DIRECTION_BOTH, HAILO_INVALID_ARGUMENT,
-        "Invalid data direction {}", data_direction);
-
-    const auto device_id = device.get_dev_id();
-    auto find_result = m_mappings.find(device_id);
-    if (find_result != m_mappings.end()) {
-        // The buffer has been mapped in this object => don't map it again
-        return Expected<bool>(false); // not a new mapping
-    }
-
-    const auto direction = (data_direction == HAILO_DMA_BUFFER_DIRECTION_H2D) ? HAILO_H2D_STREAM : HAILO_D2H_STREAM;
-
-    auto mapping_result = device.try_dma_map(m_dma_able_buffer, direction);
-    CHECK_EXPECTED(mapping_result);
-
-    const auto is_new_mapping = mapping_result->second;
-    if (is_new_mapping) {
-        const auto deleter = [&device, address = m_dma_able_buffer->user_address(), direction]() {
-            // Best effort
-            auto status = device.dma_unmap(address, direction);
-            if (HAILO_SUCCESS != status) {
-                LOGGER__ERROR("Failed to un-map buffer {} from device {} in direction {}",
-                address, device.get_dev_id(), direction);
-            }
-        };
-        m_mappings.emplace(device_id, std::make_pair(mapping_result->first, deleter));
-    } else {
-        m_mappings.emplace(device_id, std::make_pair(mapping_result->first, nullptr));
-    }
-    return Expected<bool>(is_new_mapping);
-}
-
-Expected<vdma::MappedBufferPtr> DmaStorage::get_dma_mapped_buffer(const std::string &device_id)
-{
-    auto mapped_buffer = m_mappings.find(device_id);
-    if (mapped_buffer == m_mappings.end()) {
-        // Don't print error message here
-        LOGGER__INFO("Mapped buffer for {} not found", device_id);
-        return make_unexpected(HAILO_NOT_FOUND);
-    }
-
-    return Expected<vdma::MappedBufferPtr>(mapped_buffer->second.first);
-}
-
-Expected<UserBufferStoragePtr> UserBufferStorage::create(void *user_address, const size_t size)
-{
-    auto result = make_shared_nothrow<UserBufferStorage>(user_address, size);
-    CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
-
-    return result;
-}
-
-UserBufferStorage::UserBufferStorage(void * user_address, const size_t size) :
-    BufferStorage(Type::USER_BUFFER),
-    m_user_address(user_address),
-    m_size(size)
-{}
-
-size_t UserBufferStorage::size() const
-{
-    return m_size;
-}
-
-void *UserBufferStorage::user_address()
-{
-    return const_cast<void *>(m_user_address);
-}
-
-Expected<void *> UserBufferStorage::release() noexcept
-{
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
-Expected<bool> UserBufferStorage::dma_map(Device &/* device */, hailo_dma_buffer_direction_t /* data_direction */)
-{
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
-// TODO: change data_direction to hailo_stream_direction_t (HRT-12391)
-Expected<bool> UserBufferStorage::dma_map(VdmaDevice &/* device */, hailo_dma_buffer_direction_t /* data_direction */)
-{
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
-Expected<vdma::MappedBufferPtr> UserBufferStorage::get_dma_mapped_buffer(const std::string &/* device_id */)
-{
-    return make_unexpected(HAILO_NOT_IMPLEMENTED);
-}
-
-Expected<std::shared_ptr<Buffer>> UserBufferStorage::create_storage_from_user_buffer(void *addr, size_t size)
+Expected<vdma::DmaAbleBufferPtr> DmaStorage::get_dma_able_buffer()
 {
-    auto storage = UserBufferStorage::create(addr, size);
-    CHECK_EXPECTED(storage);
-
-    auto buffer = make_shared_nothrow<Buffer>(storage.release());
-    CHECK_NOT_NULL_AS_EXPECTED(buffer, HAILO_OUT_OF_HOST_MEMORY);
-
-    return buffer;
+    return vdma::DmaAbleBufferPtr{m_dma_able_buffer};
 }
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/buffer_storage.hpp b/hailort/libhailort/src/utils/buffer_storage.hpp
new file mode 100644
index 00000000..b277c983
--- /dev/null
+++ b/hailort/libhailort/src/utils/buffer_storage.hpp
@@ -0,0 +1,149 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file buffer_storage.hpp
+ * @brief Contains the internal storage object for the Buffer object.
+ **/
+
+#ifndef _HAILO_BUFFER_STORAGE_HPP_
+#define _HAILO_BUFFER_STORAGE_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "hailo/buffer.hpp"
+
+#include "utils/exported_resource_manager.hpp"
+
+#include <memory>
+#include <cstdint>
+#include <functional>
+#include <vector>
+#include <unordered_map>
+#include <string>
+
+
+/** hailort namespace */
+namespace hailort
+{
+
+// Forward declarations
+class Device;
+class VDevice;
+class VdmaDevice;
+class BufferStorage;
+class HeapStorage;
+class DmaStorage;
+class HailoRTDriver;
+class Buffer;
+
+namespace vdma {
+    class DmaAbleBuffer;
+    using DmaAbleBufferPtr = std::shared_ptr<DmaAbleBuffer>;
+
+    class MappedBuffer;
+    using MappedBufferPtr = std::shared_ptr<MappedBuffer>;
+}
+
+
+using BufferStoragePtr = std::shared_ptr<BufferStorage>;
+
+// Using void* and size as key. Since the key is std::pair (not hash-able), we use std::map as the underlying container.
+using BufferStorageKey = std::pair<void *, size_t>;
+
+struct BufferStorageKeyHash {
+    size_t operator()(const BufferStorageKey &key) const noexcept
+    {
+        return std::hash<void *>()(key.first) ^ std::hash<size_t>()(key.second);
+    }
+};
+
+using BufferStorageResourceManager = ExportedResourceManager<BufferStoragePtr, BufferStorageKey, BufferStorageKeyHash>;
+using BufferStorageRegisteredResource = RegisteredResource<BufferStoragePtr, BufferStorageKey, BufferStorageKeyHash>;
+
+class BufferStorage
+{
+public:
+
+    static Expected<BufferStoragePtr> create(size_t size, const BufferStorageParams &params);
+
+    BufferStorage(BufferStorage&& other) noexcept = default;
+    BufferStorage(const BufferStorage &) = delete;
+    BufferStorage &operator=(BufferStorage &&) = delete;
+    BufferStorage &operator=(const BufferStorage &) = delete;
+    virtual ~BufferStorage() = default;
+
+    virtual size_t size() const = 0;
+    virtual void *user_address() = 0;
+    // Returns the pointer managed by this object and releases ownership
+    // TODO: Add a free function pointer? (HRT-10024)
+    // // Free the returned pointer with `delete`
+    // TODO: after release the containing buffer will hold pointers to values that were released.
+    //       Document that this can happen? Disable this behavior somehow? (HRT-10024)
+    virtual Expected<void *> release() noexcept = 0;
+
+    // Internal functions
+    virtual Expected<vdma::DmaAbleBufferPtr> get_dma_able_buffer();
+
+    BufferStorage() = default;
+};
+
+using HeapStoragePtr = std::shared_ptr<HeapStorage>;
+
+/**
+ * Most basic storage for buffer - regular heap allocation.
+ */
+class HeapStorage : public BufferStorage
+{
+public:
+    static Expected<HeapStoragePtr> create(size_t size);
+    HeapStorage(std::unique_ptr<uint8_t[]> data, size_t size);
+    HeapStorage(HeapStorage&& other) noexcept;
+    HeapStorage(const HeapStorage &) = delete;
+    HeapStorage &operator=(HeapStorage &&) = delete;
+    HeapStorage &operator=(const HeapStorage &) = delete;
+    virtual ~HeapStorage() = default;
+
+    virtual size_t size() const override;
+    virtual void *user_address() override;
+    virtual Expected<void *> release() noexcept override;
+
+private:
+    std::unique_ptr<uint8_t[]> m_data;
+    size_t m_size;
+};
+
+using DmaStoragePtr = std::shared_ptr<DmaStorage>;
+
+/**
+ * Storage class for buffer that can be directly mapped to a device/vdevice for dma.
+ */
+class DmaStorage : public BufferStorage
+{
+public:
+    // Creates a DmaStorage instance holding a dma-able buffer size bytes large.
+    static Expected<DmaStoragePtr> create(size_t size);
+
+    DmaStorage(const DmaStorage &other) = delete;
+    DmaStorage &operator=(const DmaStorage &other) = delete;
+    DmaStorage(DmaStorage &&other) noexcept = default;
+    DmaStorage &operator=(DmaStorage &&other) = delete;
+    virtual ~DmaStorage() = default;
+
+    virtual size_t size() const override;
+    virtual void *user_address() override;
+    virtual Expected<void *> release() noexcept override;
+
+    // Internal functions
+    DmaStorage(vdma::DmaAbleBufferPtr &&dma_able_buffer);
+    virtual Expected<vdma::DmaAbleBufferPtr> get_dma_able_buffer() override;
+
+private:
+    vdma::DmaAbleBufferPtr m_dma_able_buffer;
+};
+
+
+} /* namespace hailort */
+
+#endif /* _HAILO_BUFFER_STORAGE_HPP_ */
diff --git a/hailort/libhailort/src/utils/dma_buffer_utils.hpp b/hailort/libhailort/src/utils/dma_buffer_utils.hpp
new file mode 100644
index 00000000..7ed80192
--- /dev/null
+++ b/hailort/libhailort/src/utils/dma_buffer_utils.hpp
@@ -0,0 +1,37 @@
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file dma_buffer.hpp
+ * @brief A module for managing DMA buffers
+ **/
+
+#ifndef _HAILO_DMA_BUFFER_UTILS_HPP_
+#define _HAILO_DMA_BUFFER_UTILS_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "utils/buffer_storage.hpp"
+
+/** hailort namespace */
+namespace hailort
+{
+
+class HAILORTAPI DmaBufferUtils
+{
+public:
+
+    static Expected<MemoryView> mmap_dma_buffer_write(hailo_dma_buffer_t dma_buffer);
+
+    static hailo_status munmap_dma_buffer_write(hailo_dma_buffer_t dma_buffer, MemoryView dma_buffer_memview);
+
+    static Expected<MemoryView> mmap_dma_buffer_read(hailo_dma_buffer_t dma_buffer);
+
+    static hailo_status munmap_dma_buffer_read(hailo_dma_buffer_t dma_buffer, MemoryView dma_buffer_memview);
+
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_DMA_BUFFER_UTILS_HPP_ */
diff --git a/hailort/libhailort/src/utils/exported_resource_manager.hpp b/hailort/libhailort/src/utils/exported_resource_manager.hpp
index a4d2d5df..5a59e241 100644
--- a/hailort/libhailort/src/utils/exported_resource_manager.hpp
+++ b/hailort/libhailort/src/utils/exported_resource_manager.hpp
@@ -11,8 +11,11 @@
 #define _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_
 
 #include "hailo/hailort.h"
+#include "common/logger_macros.hpp"
+#include "common/utils.hpp"
 
 #include <unordered_map>
+#include <mutex>
 
 namespace hailort
 {
@@ -89,6 +92,62 @@ class ExportedResourceManager final
     std::unordered_map<Key, Resource, Hash> m_storage;
 };
 
+template<typename Resource, typename Key, typename Hash  = std::hash<Key>>
+class RegisteredResource final {
+public:
+    using Manager = ExportedResourceManager<Resource, Key, Hash>;
+
+    static Expected<RegisteredResource> create(const Resource &resource, const Key &key)
+    {
+        hailo_status status = HAILO_UNINITIALIZED;
+        RegisteredResource registered_resource(resource, key, status);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__TRACE("Resource registration failed with status {}", status);
+            return make_unexpected(status);
+        }
+        return registered_resource;
+    }
+
+    RegisteredResource(const Resource &resource, const Key &key, hailo_status &status) :
+        m_key(key)
+    {
+        status = Manager::register_resource(resource, key);
+        if (HAILO_SUCCESS != status) {
+            return;
+        }
+        m_should_release = true;
+        status = HAILO_SUCCESS;
+    }
+
+    ~RegisteredResource()
+    {
+        if (m_should_release) {
+            Manager::unregister_resource(m_key);
+        }
+    }
+
+    RegisteredResource(const RegisteredResource &) = delete;
+    RegisteredResource& operator=(const RegisteredResource &) = delete;
+
+    RegisteredResource(RegisteredResource &&other) :
+        m_key(other.m_key),
+        m_should_release(std::exchange(other.m_should_release, false))
+    {}
+
+    RegisteredResource& operator=(RegisteredResource &&other)
+    {
+        if (this != &other) {
+            m_key = other.m_key;
+            m_should_release = std::exchange(other.m_should_release, false);
+        }
+        return *this;
+    }
+
+private:
+    Key m_key;
+    bool m_should_release = false;
+};
+
 } /* namespace hailort */
 
 #endif /* _HAILO_EXPORTED_RESOURCE_MANAGER_HPP_ */
diff --git a/hailort/libhailort/src/utils/hailort_common.cpp b/hailort/libhailort/src/utils/hailort_common.cpp
index d908e871..ddca83a3 100644
--- a/hailort/libhailort/src/utils/hailort_common.cpp
+++ b/hailort/libhailort/src/utils/hailort_common.cpp
@@ -15,7 +15,6 @@ namespace hailort
 
 // Needed for the linker
 const uint32_t HailoRTCommon::BBOX_PARAMS;
-const uint32_t HailoRTCommon::MASK_PARAMS;
 const uint32_t HailoRTCommon::MAX_DEFUSED_LAYER_COUNT;
 const size_t HailoRTCommon::HW_DATA_ALIGNMENT;
 const uint32_t HailoRTCommon::MAX_NMS_BURST_SIZE;
@@ -49,13 +48,13 @@ Expected<std::vector<hailo_device_id_t>> HailoRTCommon::to_device_ids_vector(con
 
 uint32_t HailoRTCommon::get_nms_host_frame_size(const hailo_nms_shape_t &nms_shape, const hailo_format_t &format)
 {
-    auto shape_size = 0;
+    double frame_size = 0;
     if (HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == format.order) {
-        shape_size = get_nms_with_byte_mask_host_shape_size(nms_shape, format);
+        frame_size = get_nms_with_byte_mask_host_frame_size(nms_shape);
     } else {
-        shape_size = get_nms_host_shape_size(nms_shape);
+        auto shape_size = get_nms_host_shape_size(nms_shape);
+        frame_size =  shape_size * get_format_data_bytes(format);
     }
-    double frame_size = shape_size * get_format_data_bytes(format);
     if (frame_size < UINT32_MAX) {
         return static_cast<uint32_t>(frame_size);
     } else{
@@ -64,4 +63,49 @@ uint32_t HailoRTCommon::get_nms_host_frame_size(const hailo_nms_shape_t &nms_sha
     }
 }
 
+Expected<hailo_pix_buffer_t> HailoRTCommon::as_hailo_pix_buffer(MemoryView &memory_view, hailo_format_order_t order)
+{
+    switch(order){
+    case HAILO_FORMAT_ORDER_NV12:
+    case HAILO_FORMAT_ORDER_NV21: {
+        CHECK_AS_EXPECTED(0 == (memory_view.size() % 3), HAILO_INVALID_ARGUMENT, "buffer size must be divisible by 3");
+        auto y_plane_size = memory_view.size() * 2 / 3;
+        auto uv_plane_size = memory_view.size() * 1 / 3;
+
+        auto uv_data_ptr = reinterpret_cast<uint8_t*>(memory_view.data()) + y_plane_size;
+
+        hailo_pix_buffer_plane_t y {uint32_t(y_plane_size), uint32_t(y_plane_size), {memory_view.data()}};
+        hailo_pix_buffer_plane_t uv {uint32_t(uv_plane_size), uint32_t(uv_plane_size), {uv_data_ptr}};
+        // Currently only support HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR
+        hailo_pix_buffer_t buffer{0, {y, uv}, NUMBER_OF_PLANES_NV12_NV21, HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR};
+
+        return buffer;
+    }
+    case HAILO_FORMAT_ORDER_I420: {
+        CHECK_AS_EXPECTED(0 == (memory_view.size() % 6), HAILO_INVALID_ARGUMENT, "buffer size must be divisible by 6");
+
+        auto y_plane_size = memory_view.size() * 2 / 3;
+        auto u_plane_size = memory_view.size() * 1 / 6;
+        auto v_plane_size = memory_view.size() * 1 / 6;
+
+        auto u_data_ptr = (char*)memory_view.data() + y_plane_size;
+        auto v_data_ptr = u_data_ptr + u_plane_size;
+
+        hailo_pix_buffer_plane_t y {uint32_t(y_plane_size), uint32_t(y_plane_size), {memory_view.data()}};
+        hailo_pix_buffer_plane_t u {uint32_t(u_plane_size), uint32_t(u_plane_size), {u_data_ptr}};
+        hailo_pix_buffer_plane_t v {uint32_t(v_plane_size), uint32_t(v_plane_size), {v_data_ptr}};
+        // Currently only support HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR
+        hailo_pix_buffer_t buffer{0, {y, u, v}, NUMBER_OF_PLANES_I420, HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR};
+
+        return buffer;
+    }
+    default: {
+        hailo_pix_buffer_plane_t plane = {(uint32_t)memory_view.size(), (uint32_t)memory_view.size(), {memory_view.data()}};
+        // Currently only support HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR
+        hailo_pix_buffer_t buffer{0, {plane}, 1, HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR};
+        return buffer;
+    }
+    }
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/measurement_utils.cpp b/hailort/libhailort/src/utils/measurement_utils.cpp
new file mode 100644
index 00000000..b958eaaa
--- /dev/null
+++ b/hailort/libhailort/src/utils/measurement_utils.cpp
@@ -0,0 +1,212 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file measurement_utils.cpp
+ * @brief Measurement utils module implementation
+ **/
+
+#include "hailo/hailort.h"
+#include "measurement_utils.hpp"
+
+
+namespace hailort {
+namespace utils {
+
+hailo_status MeasurementStorage::add_measurement(const std::string &accumulator_name, MeasurementType type,
+    double measurement)
+{
+    return get_instance().add_measurement_impl(accumulator_name, type, measurement);
+}
+
+Expected<AccumulatorResults> MeasurementStorage::get_measurements(MeasurementType type, const std::string &accumulator_name)
+{
+    return get_instance().get_measurements_impl(type, accumulator_name);
+}
+
+void MeasurementStorage::set_verbosity(bool verbosity)
+{
+    return get_instance().set_verbosity_impl(verbosity);
+}
+
+void MeasurementStorage::set_precision(uint32_t precision)
+{
+    return get_instance().set_precision_impl(precision);
+}
+
+void MeasurementStorage::clear()
+{
+    return get_instance().clear_impl();
+}
+
+void MeasurementStorage::show_output_on_destruction(bool show_output)
+{
+    return get_instance().show_output_on_destruction_impl(show_output);
+}
+
+MeasurementStorage::~MeasurementStorage()
+{
+    if (!m_show_output_on_destruction) {
+        return;
+    }
+
+    // Since MeasurementStorage has only one static instance, the following will be printed on program shutdown
+    std::cout << "**** MEASUREMENT UTIL RESULTS ****\n";
+    format_measurements(std::cout, MeasurementType::TIME);
+    format_measurements(std::cout, MeasurementType::FPS);
+    format_measurements(std::cout, MeasurementType::VALUE);
+}
+
+MeasurementStorage& MeasurementStorage::get_instance()
+{
+    static MeasurementStorage instance;
+    return instance;
+}
+
+std::string MeasurementStorage::indent_string(const std::string &str, uint8_t indent_level)
+{
+    static const std::string INDENT = "    ";
+
+    std::stringstream stream;
+    for (auto i = 0; i < indent_level; i++) {
+        stream << INDENT;
+    }
+
+    stream << str;
+    return stream.str();
+}
+
+MeasurementStorage::AccumulatorMap &MeasurementStorage::get_storage(MeasurementType type)
+{
+    switch (type)
+    {
+    case MeasurementType::TIME:
+        return m_time_acc_storage;
+    case MeasurementType::FPS:
+        return m_fps_acc_storage;
+    case MeasurementType::VALUE:
+        return m_value_acc_storage;
+    default:
+        // We should never get here, we'll return the time storage to avoid a crash
+        LOGGER__ERROR("Invalid measurement type");
+        return m_time_acc_storage;
+    }
+}
+
+std::vector<std::pair<std::string, AccumulatorPtr>> MeasurementStorage::get_sorted_elements(MeasurementType type)
+{
+    // Storage is unordered in order to be as fast as possible in add_measurement
+    // We now copy the elements to a vector and sort in order to get the most readable results
+    // Note that we return a snapshot of the storage elements, and after this function returns the storage may change
+    std::vector<std::pair<std::string, AccumulatorPtr>> sorted_accumulator_name_pairs;
+    {
+        auto &storage = get_storage(type);
+        std::lock_guard<std::mutex> lock_guard(storage.mutex);
+
+        sorted_accumulator_name_pairs.reserve(storage.map.size());
+        sorted_accumulator_name_pairs.insert(sorted_accumulator_name_pairs.end(), storage.map.cbegin(), storage.map.cend());
+    }
+    std::sort(sorted_accumulator_name_pairs.begin(), sorted_accumulator_name_pairs.end());
+
+    return sorted_accumulator_name_pairs;
+}
+
+std::string MeasurementStorage::get_measurement_title(MeasurementType type)
+{
+    switch (type)
+    {
+    case MeasurementType::TIME:
+        return "Time measurements (ms)";
+    case MeasurementType::FPS:
+        return "FPS measurements";
+    case MeasurementType::VALUE:
+        return "Value measurements";
+    default:
+        // We should never get here
+        LOGGER__ERROR("Invalid measurement type");
+        return "Invalid measurement type";
+    }
+}
+
+void MeasurementStorage::format_measurements(std::ostream &output_stream, MeasurementType type)
+{
+    static const std::string LIST_MARKER = "- ";
+
+    const auto sorted_elements = get_sorted_elements(type);
+
+    output_stream << indent_string(LIST_MARKER, 1)
+                  << get_measurement_title(type) << ": ";
+    if (sorted_elements.empty()) {
+        output_stream << "No measurements";
+    }
+    output_stream << "\n";
+
+    for (const auto &accumulator_name_pair : sorted_elements) {
+        const auto &accumulator_name = accumulator_name_pair.first;
+        const auto &accumulator_results = accumulator_name_pair.second->get();
+        output_stream << indent_string(LIST_MARKER, 2) << accumulator_name << ": "
+                      << AccumulatorResultsHelper::format_results(accumulator_results, m_verbose, m_precision) << "\n";
+    }
+}
+
+hailo_status MeasurementStorage::add_measurement_impl(const std::string &accumulator_name, MeasurementType type,
+    double measurement)
+{
+    auto &storage = get_storage(type);
+    std::lock_guard<std::mutex> lock_guard(storage.mutex);
+
+    auto it = storage.map.find(accumulator_name);
+    if (it == storage.map.end()) {
+        AccumulatorPtr accumulator = nullptr;
+        if (MeasurementType::FPS == type) {
+            accumulator = make_shared_nothrow<AverageFPSAccumulator<double>>(accumulator_name);
+        } else {
+            accumulator = make_shared_nothrow<FullAccumulator<double>>(accumulator_name);
+        }
+        CHECK_NOT_NULL(accumulator, HAILO_OUT_OF_HOST_MEMORY);
+        storage.map[accumulator_name] = accumulator;
+    }
+
+    storage.map[accumulator_name]->add_data_point(measurement);
+    return HAILO_SUCCESS;
+}
+
+Expected<AccumulatorResults> MeasurementStorage::get_measurements_impl(MeasurementType type, const std::string &accumulator_name)
+{
+    auto &storage = get_storage(type);
+    std::lock_guard<std::mutex> lock_guard(storage.mutex);
+
+    auto it = storage.map.find(accumulator_name);
+    CHECK(it != storage.map.end(), HAILO_NOT_FOUND);
+
+    return it->second->get();
+}
+
+void MeasurementStorage::set_verbosity_impl(bool verbosity)
+{
+    m_verbose = verbosity;
+}
+
+void MeasurementStorage::set_precision_impl(uint32_t precision)
+{
+    m_precision = precision;
+}
+
+void MeasurementStorage::clear_impl()
+{
+    // Note: After a certain storage is cleared, it could be filled again with new measurements
+    //       We lock to avoid race conditions for a given map, not to make this function "atomic"
+    for (auto &storage : {&m_time_acc_storage, &m_fps_acc_storage, &m_value_acc_storage}) {
+        std::lock_guard<std::mutex> lock_guard(storage->mutex);
+        storage->map.clear();
+    }
+}
+
+void MeasurementStorage::show_output_on_destruction_impl(bool show_output)
+{
+    m_show_output_on_destruction = show_output;
+}
+
+} /* namespace utils */
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/measurement_utils.hpp b/hailort/libhailort/src/utils/measurement_utils.hpp
new file mode 100644
index 00000000..ae9e3fbf
--- /dev/null
+++ b/hailort/libhailort/src/utils/measurement_utils.hpp
@@ -0,0 +1,174 @@
+/**
+ * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file measurement_utils.hpp
+ * @brief This module provides utility classes for measuring and storing runtime statistics of designated code
+ *        blocks/functions.
+ *        Three classes are provided for measurements:
+ *        1) utils::MeasureTime - measures the execution time of the scope in which its declared
+ *        2) utils::MeasureFps - measures the fps of the scope in which its declared
+ *        3) utils::MeasureValue - measures a numeric value
+ *
+ *        Usage:
+ *        1) To measure the running time of a certain function, declare an instance of utils::MeasureTime at the start
+ *           of the function. E.g.
+ *               1  hailo_status BoundaryChannel::inc_num_available(uint16_t value)
+ *               2  {
+ *               3      utils::MeasureTime time("inc_num_available on channel_id={}", m_channel_id.channel_index);
+ *               4      // ...
+ *               5      return m_host_registers.set_num_available(static_cast<uint16_t>(num_available));
+ *               6  }
+ *           The MEASURE_TIME macro can be used to simplify the declaration of MeasureTime instances. E.g.
+ *           Replace line 3 in the above example with:
+ *           MEASURE_TIME("inc_num_available on channel_id={}", m_channel_id.channel_index);
+ *        2) To measure the FPS of a certain function use utils::MeasureFps or the MEASURE_FPS macro.
+ *           The usage is the same as utils::MeasureTime/MEASURE_TIME.
+ *        3) In some cases we'll want to only measure the performance-critical section of the function. In this case,
+ *           open a new scope surrounding this section, and declare an instance of MeasureTime at the start of it. E.g.
+ *               1  hailo_status BoundaryChannel::prepare_descriptors(..., MappedBufferPtr mapped_buffer, ...)
+ *               2  {
+ *               3      if (mapped_buffer != nullptr) {
+ *               4          // Code that we don't want to measure...
+ *               5          if (!is_buffer_already_configured(mapped_buffer, buffer_offset_in_descs, starting_desc)) {
+ *               6              // More code that we don't want to measure...
+ *               7              {
+ *               8                  // We wrapped configure_to_use_buffer with a new scope, because we only want to measure it
+ *               9                  // (originally it wasn't in it's own scope)
+ *               10                 utils::MeasureTime time("configure_to_use_buffer on channel_id={}", m_channel_id.channel_index);
+ *               11                 auto status = m_desc_list->configure_to_use_buffer(*mapped_buffer, m_channel_id, configure_starting_desc);
+ *               12                 CHECK_SUCCESS(status);
+ *               13             }
+ *               14         }
+ *               15     }
+ *               16     // More code...
+ *               17     return HAILO_SUCCESS;
+ *               18 }
+ *           Again, the MEASURE_TIME macro can be used in place of the MeasureTime declaration.
+ *        4) To measure the FPS of a certain section use utils::MeasureFps or MEASURE_TIME.
+ *           The usage is the same as utils::MeasureTime/MEASURE_TIME.
+ *        5) To measure a numeric value, use the MEASURE_VALUE macro. E.g.
+ *               1  hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &core_op_handle, const device_id_t &device_id)
+ *               2  {
+ *               3      // ...
+ *               4      auto hw_batch_size = scheduled_core_op->use_dynamic_batch_flow() ? frames_count : SINGLE_CONTEXT_BATCH_SIZE;
+ *               5      MEASURE_VALUE(hw_batch_size, "core_op_handle={}", core_op_handle);
+ *               6      // ...
+ *               7  }
+ *           The MEASURE_VALUE macro simplifies the declaration of MeasureValue instances (the class that implements
+ *           the measurement logic), and it's usage is preferred. The macro will use the stringified variable name as
+ *           the prefix for the accumulator name.
+ *           E.g. for core_op_handle=0 the accumulator name will be "hw_batch_size (core_op_handle=0)".
+ *        6) Be sure to provide a descriptive name for each measurement. In the above examples, channel_id was used in
+ *           order to differentiate between set_num_available/configure_to_use_buffer on different channels.
+ *        7) At the end of the program's execution, the measurements will be printed to stdout. For example, given the
+ *           measurements registered in the examples provided for MeasureTime, the following will be printed upon
+ *           hailortcli's completion:
+ *           $ hailortcli run2 -m raw_async set-net shortcut_net_1080_1920_3.hef
+ *           [===================>] 100% 00:00:00
+ *           shortcut_net: fps: 255.72
+ *           **** MEASUREMENT UTIL RESULTS ****
+ *               - Time measurements (ms):
+ *                   - configure_to_use_buffer on channel_id=1: count=1285, mean=0.2604
+ *                   - configure_to_use_buffer on channel_id=16: count=1285, mean=0.2583
+ *                   - inc_num_available on channel_id=1: count=1285, mean=0.0030
+ *                   - inc_num_available on channel_id=16: count=1285, mean=0.0017
+ *               - FPS measurements: No measurements
+ *               - Value measurements: No measurements
+ *
+ *        Important note!
+ *            The module is intended for debugging of performance bottlenecks. For "release-grade" performance
+ *            monitoring use other classes provided in the library. For example, see references to AccumulatorPtr
+ *            in the core_op modules or DurationCollector in the pipeline modules.
+ **/
+
+#ifndef _HAILO_MEASUREMENT_UTILS_HPP_
+#define _HAILO_MEASUREMENT_UTILS_HPP_
+
+#include "measurement_utils_internal.hpp"
+#include <spdlog/fmt/bundled/format.h>
+
+namespace hailort {
+namespace utils {
+
+// Measures the execution time of a block/function in milli-seconds
+class MeasureTime : public MeasureTimeBase<std::milli>
+{
+public:
+    MeasureTime(const std::string &accumulator_name) :
+        MeasureTimeBase::MeasureTimeBase(MeasurementType::TIME, accumulator_name)
+    {}
+
+    template <typename... Args>
+    MeasureTime(const std::string &accumulator_name_format, Args&&... args) :
+        MeasureTime(fmt::format(accumulator_name_format, std::forward<Args>(args)...))
+    {}
+};
+
+// Measures the fps of a block/function
+// Using ratio<1,1> so that time measurements will be in seconds (needed for correct fps units)
+class MeasureFps : public MeasureTimeBase<std::ratio<1,1>>
+{
+public:
+    MeasureFps(const std::string &accumulator_name) :
+        MeasureTimeBase::MeasureTimeBase(MeasurementType::FPS, accumulator_name)
+    {}
+
+    template <typename... Args>
+    MeasureFps(const std::string &accumulator_name_format, Args&&... args) :
+        MeasureFps(fmt::format(accumulator_name_format, std::forward<Args>(args)...))
+    {}
+};
+
+// Measures a numeric value
+template<typename T, std::enable_if_t<std::is_arithmetic<T>::value, int> = 0>
+class MeasureValue : public Measure
+{
+public:
+    MeasureValue(T value, const std::string &accumulator_name) :
+        Measure::Measure(MeasurementType::VALUE, accumulator_name)
+    {
+        m_measurement = static_cast<double>(value);
+    }
+
+    template <typename... Args>
+    MeasureValue(T value, const std::string &accumulator_name_format, Args&&... args) :
+        MeasureValue(value, fmt::format(accumulator_name_format, std::forward<Args>(args)...))
+    {}
+};
+
+// TODO: The helper macros are only available for GCC because of ##__VA_ARGS__ support (HRT-13031)
+#ifdef __GNUC__
+#define _CONCAT_HELPER(x, y) x##y
+#define _CONCAT(x, y) _CONCAT_HELPER(x, y)
+
+// Helper macro for measuring the execution time of a block/function
+// Note: An instance with a unique name will be created (__time_<lineno>), so that:
+//       a) the measurements will be completed at the end of the scope
+//       b) name shadowing will be avoided
+#define MEASURE_TIME(accumulator_name_format, ...) \
+    hailort::utils::MeasureTime _CONCAT(__time_, __LINE__)(accumulator_name_format, ##__VA_ARGS__)
+
+// Helper macro for measuring fps of a block/function
+// Note: An instance with a unique name will be created (__time_<lineno>), so that:
+//       a) the measurements will be completed at the end of the scope
+//       b) name shadowing will be avoided
+#define MEASURE_FPS(accumulator_name_format, ...) \
+    hailort::utils::MeasureFps _CONCAT(__time_, __LINE__)(accumulator_name_format, ##__VA_ARGS__)
+
+// Helper macro for measuring a numeric value
+// Note: The accumulator's format is the stringified variable name together with accumulator_name_format.
+//       E.g. calling MEASURE_VALUE(hw_batch_size, "core_op_handle={}", core_op_handle) with core_op_handle=0 will
+//       yield the accumulator name "hw_batch_size (core_op_handle={})".
+// Note: The MeasureValue instances created here are temporary. Unlike MeasureTime and MeasureFps,
+//       we measure the value right away and not at the end of a scope.
+#define MEASURE_VALUE(value, accumulator_name_format, ...) \
+    hailort::utils::MeasureValue<decltype(value)>((value), #value " (" accumulator_name_format ")", ##__VA_ARGS__)
+
+#endif /* __GNUC__ */
+
+} /* namespace utils */
+} /* namespace hailort */
+
+#endif /* _HAILO_MEASUREMENT_UTILS_HPP_ */
diff --git a/hailort/libhailort/src/utils/measurement_utils_internal.hpp b/hailort/libhailort/src/utils/measurement_utils_internal.hpp
new file mode 100644
index 00000000..9ca28748
--- /dev/null
+++ b/hailort/libhailort/src/utils/measurement_utils_internal.hpp
@@ -0,0 +1,137 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file measurement_utils_internal.hpp
+ * @brief Internal class definitions for the measurement_utils module
+ **/
+
+#ifndef _HAILO_MEASUREMENT_UTILS_INTERNAL_HPP_
+#define _HAILO_MEASUREMENT_UTILS_INTERNAL_HPP_
+
+#include "hailo/hailort.h"
+#include "utils/hailort_logger.hpp"
+#include "common/runtime_statistics_internal.hpp"
+
+#include <stdlib.h>
+#include <chrono>
+#include <iostream>
+#include <sstream>
+
+
+namespace hailort {
+namespace utils {
+
+enum class MeasurementType
+{
+    TIME,
+    FPS,
+    VALUE
+};
+
+class MeasurementStorage final
+{
+public:
+    // Adds a 'type' measurement to the 'accumulator_name' accumulator; thread-safe
+    static hailo_status add_measurement(const std::string &accumulator_name, MeasurementType type, double measurement);
+    static Expected<AccumulatorResults> get_measurements(MeasurementType type, const std::string &accumulator_name);
+    // Not thread-safe
+    static void set_verbosity(bool verbosity);
+    static void set_precision(uint32_t precision);
+    static void clear();
+    static void show_output_on_destruction(bool show_output);
+
+    ~MeasurementStorage();
+
+private:
+    struct AccumulatorMap {
+        std::mutex mutex;
+        std::unordered_map<std::string, AccumulatorPtr> map;
+    };
+
+    static MeasurementStorage& get_instance();
+    static std::string indent_string(const std::string &str, uint8_t indent_level);
+
+    AccumulatorMap &get_storage(MeasurementType type);
+    std::vector<std::pair<std::string, AccumulatorPtr>> get_sorted_elements(MeasurementType type);
+    std::string get_measurement_title(MeasurementType type);
+    void format_measurements(std::ostream &output_stream, MeasurementType type);
+    hailo_status add_measurement_impl(const std::string &accumulator_name, MeasurementType type, double measurement);
+    Expected<AccumulatorResults> get_measurements_impl(MeasurementType type, const std::string &accumulator_name);
+    void set_verbosity_impl(bool verbosity);
+    void set_precision_impl(uint32_t precision);
+    void clear_impl();
+    void show_output_on_destruction_impl(bool show_output);
+
+    bool m_verbose = false;
+    uint32_t m_precision = AccumulatorResultsHelper::DEFAULT_FLOATING_POINT_PRECISION;
+    bool m_show_output_on_destruction = true;
+    AccumulatorMap m_time_acc_storage;
+    AccumulatorMap m_fps_acc_storage;
+    AccumulatorMap m_value_acc_storage;
+};
+
+class Measure
+{
+public:
+    virtual ~Measure()
+    {
+        const auto status = MeasurementStorage::add_measurement(m_accumulator_name, m_type, m_measurement);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed adding data point to {}", m_accumulator_name);
+        }
+    }
+
+    Measure(Measure &&) = delete;
+    Measure(const Measure &) = delete;
+    Measure &operator=(Measure &&) = delete;
+    Measure &operator=(const Measure &) = delete;
+
+protected:
+    // The measurement will be added to the accumulator named m_accumulator_name in the dtor
+    double m_measurement;
+
+    Measure(MeasurementType type, const std::string &accumulator_name) :
+        m_measurement(),
+        m_type(type),
+        m_accumulator_name(accumulator_name)
+    {}
+
+private:
+    const MeasurementType m_type;
+    const std::string m_accumulator_name;
+};
+
+template <typename RatioType>
+class MeasureTimeBase : public Measure
+{
+public:
+    virtual ~MeasureTimeBase()
+    {
+        // Set the measurement to the time delta
+        m_measurement = convert_to_double(std::chrono::steady_clock::now() - m_start_time);
+    }
+
+protected:
+    MeasureTimeBase(MeasurementType type, const std::string &accumulator_name) :
+        Measure::Measure(type, accumulator_name),
+        m_start_time(std::chrono::steady_clock::now())
+    {}
+
+private:
+    using time_point = decltype(std::chrono::steady_clock::now());
+
+    static double convert_to_double(std::chrono::nanoseconds time_in_ns)
+    {
+        return std::chrono::duration<double, RatioType>(time_in_ns).count();
+    }
+
+    // Must be the last member declared, so that the time will be measured correctly
+    const time_point m_start_time;
+};
+
+} /* namespace utils */
+} /* namespace hailort */
+
+#endif /* _HAILO_MEASUREMENT_UTILS_INTERNAL_HPP_ */
diff --git a/hailort/libhailort/src/utils/profiler/handler.hpp b/hailort/libhailort/src/utils/profiler/handler.hpp
index 406a8114..7b82ef6f 100644
--- a/hailort/libhailort/src/utils/profiler/handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/handler.hpp
@@ -52,10 +52,20 @@ struct AddDeviceTrace : Trace
 
 struct MonitorStartTrace : Trace
 {
-    MonitorStartTrace()
-        : Trace("scheduler_start")
+    MonitorStartTrace(const std::string &unique_vdevice_hash)
+        : Trace("scheduler_start"), unique_vdevice_hash(unique_vdevice_hash)
     {}
 
+    std::string unique_vdevice_hash;
+};
+
+struct MonitorEndTrace : Trace
+{
+    MonitorEndTrace(const std::string &unique_vdevice_hash)
+        : Trace("scheduler_end"), unique_vdevice_hash(unique_vdevice_hash)
+    {}
+
+    std::string unique_vdevice_hash;
 };
 
 struct AddCoreOpTrace : Trace
@@ -145,14 +155,30 @@ struct FrameEnqueueD2HTrace : Trace
     std::string queue_name;
 };
 
-struct SwitchCoreOpTrace : Trace
+struct ActivateCoreOpTrace : Trace
 {
-    SwitchCoreOpTrace(const device_id_t &device_id, scheduler_core_op_handle_t handle)
-        : Trace("switch_core_op"), device_id(device_id), core_op_handle(handle)
+    ActivateCoreOpTrace(const device_id_t &device_id, vdevice_core_op_handle_t handle, double duration)
+        : Trace("activate_core_op"), device_id(device_id), core_op_handle(handle), duration(duration)
     {}
 
     device_id_t device_id;
-    scheduler_core_op_handle_t core_op_handle;
+    vdevice_core_op_handle_t core_op_handle;
+    double duration;
+};
+
+// Currently, activate and switch are the same trace to make scheduler and fast-switch flow similar (although in the
+// scheduler we have no deactivate).
+using SwitchCoreOpTrace = ActivateCoreOpTrace;
+
+struct DeactivateCoreOpTrace : Trace
+{
+    DeactivateCoreOpTrace(const device_id_t &device_id, vdevice_core_op_handle_t handle, double duration)
+        : Trace("deactivate_core_op"), device_id(device_id), core_op_handle(handle), duration(duration)
+    {}
+
+    device_id_t device_id;
+    vdevice_core_op_handle_t core_op_handle;
+    double duration;
 };
 
 struct SetCoreOpTimeoutTrace : Trace
@@ -200,6 +226,19 @@ struct OracleDecisionTrace : Trace
     bool over_timeout;
 };
 
+struct HefLoadedTrace : Trace
+{
+    HefLoadedTrace(const std::string &hef_name, const std::string &dfc_version, const unsigned char *md5_hash)
+        : Trace("hef_loaded"), hef_name(hef_name), dfc_version(dfc_version)
+    {
+        std::memcpy(this->md5_hash, md5_hash, MD5_DIGEST_LENGTH);
+    }
+
+    std::string hef_name;
+    std::string dfc_version;
+    MD5_SUM_t md5_hash;
+};
+
 struct DumpProfilerStateTrace : Trace
 {
     DumpProfilerStateTrace() : Trace("dump_profiler_state") {}
@@ -218,8 +257,10 @@ class Handler
     virtual void handle_trace(const FrameDequeueH2DTrace&) {};
     virtual void handle_trace(const FrameDequeueD2HTrace&) {};
     virtual void handle_trace(const FrameEnqueueD2HTrace&) {};
-    virtual void handle_trace(const SwitchCoreOpTrace&) {};
+    virtual void handle_trace(const ActivateCoreOpTrace&) {};
+    virtual void handle_trace(const DeactivateCoreOpTrace&) {};
     virtual void handle_trace(const MonitorStartTrace&) {};
+    virtual void handle_trace(const MonitorEndTrace&) {};
     virtual void handle_trace(const AddDeviceTrace&) {};
     virtual void handle_trace(const SetCoreOpTimeoutTrace&) {};
     virtual void handle_trace(const SetCoreOpThresholdTrace&) {};
@@ -227,6 +268,7 @@ class Handler
     virtual void handle_trace(const OracleDecisionTrace&) {};
     virtual void handle_trace(const DumpProfilerStateTrace&) {};
     virtual void handle_trace(const InitProfilerProtoTrace&) {};
+    virtual void handle_trace(const HefLoadedTrace&) {};
 
 };
 
diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
index 25d6c72b..598bb905 100644
--- a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
+++ b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
@@ -37,8 +37,14 @@ void MonitorHandler::clear_monitor() {
 
 void MonitorHandler::handle_trace(const MonitorStartTrace &trace)
 {
-    (void)trace;
-    start_mon();
+    start_mon(trace.unique_vdevice_hash);
+}
+
+void MonitorHandler::handle_trace(const MonitorEndTrace &trace)
+{
+    if (m_unique_vdevice_hash == trace.unique_vdevice_hash) {
+        m_unique_vdevice_hash = {};
+    }
 }
 
 void MonitorHandler::handle_trace(const AddCoreOpTrace &trace)
@@ -53,19 +59,19 @@ void MonitorHandler::handle_trace(const AddDeviceTrace &trace)
     m_devices_info.emplace(trace.device_id, device_info);
 }
 
-void MonitorHandler::handle_trace(const SwitchCoreOpTrace &trace)
+void MonitorHandler::handle_trace(const ActivateCoreOpTrace &trace)
 {
     // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice.
     if (!m_is_monitor_currently_working) { return; }
-    assert(contains(m_devices_info, trace.device_id));
+    if (!contains(m_devices_info, trace.device_id)) { return; } // TODO (HRT-8835): Support multiple vdevices
     m_devices_info.at(trace.device_id).current_core_op_handle = trace.core_op_handle;
 }
 
 void MonitorHandler::handle_trace(const AddStreamH2DTrace &trace)
 {
     auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name);
-    assert(contains(m_core_ops_info, core_op_handle));
-    assert(contains(m_devices_info, trace.device_id));
+    if (!contains(m_core_ops_info, core_op_handle)) { return; } // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_devices_info, trace.device_id)) { return; } // TODO (HRT-8835): Support multiple vdevices
     m_core_ops_info[core_op_handle].input_streams_info[trace.stream_name] = StreamsInfo{trace.queue_size};
     if (!contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, core_op_handle)) {
         m_devices_info.at(trace.device_id).requested_transferred_frames_h2d.emplace(core_op_handle, make_shared_nothrow<SchedulerCounter>());
@@ -76,8 +82,8 @@ void MonitorHandler::handle_trace(const AddStreamH2DTrace &trace)
 void MonitorHandler::handle_trace(const AddStreamD2HTrace &trace)
 {
     auto core_op_handle = get_core_op_handle_by_name(trace.core_op_name);
-    assert(contains(m_core_ops_info, core_op_handle));
-    assert(contains(m_devices_info, trace.device_id));
+    if (!contains(m_core_ops_info, core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_devices_info, trace.device_id)) { return ;} // TODO (HRT-8835): Support multiple vdevices
     m_core_ops_info[core_op_handle].output_streams_info[trace.stream_name] = StreamsInfo{trace.queue_size};
     if (!contains(m_devices_info.at(trace.device_id).finished_transferred_frames_d2h, core_op_handle)) {
         m_devices_info.at(trace.device_id).finished_transferred_frames_d2h.emplace(core_op_handle, make_shared_nothrow<SchedulerCounter>());
@@ -87,8 +93,8 @@ void MonitorHandler::handle_trace(const AddStreamD2HTrace &trace)
 
 void MonitorHandler::handle_trace(const FrameEnqueueH2DTrace &trace)
 {
-    assert(contains(m_core_ops_info, trace.core_op_handle));
-    assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name));
+    if (!contains(m_core_ops_info, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name)) { return ;} // TODO (HRT-8835): Support multiple vdevices
     auto &queue = m_core_ops_info[trace.core_op_handle].input_streams_info[trace.queue_name];
     queue.pending_frames_count->fetch_add(1);
     queue.pending_frames_count_acc->add_data_point(queue.pending_frames_count->load());
@@ -96,8 +102,8 @@ void MonitorHandler::handle_trace(const FrameEnqueueH2DTrace &trace)
 
 void MonitorHandler::handle_trace(const FrameDequeueD2HTrace &trace)
 {
-    assert(contains(m_core_ops_info, trace.core_op_handle));
-    assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name));
+    if (!contains(m_core_ops_info, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name)) { return ;} // TODO (HRT-8835): Support multiple vdevices
     auto &queue = m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name];
     queue.pending_frames_count->fetch_sub(1);
     queue.pending_frames_count_acc->add_data_point(queue.pending_frames_count->load());
@@ -108,11 +114,11 @@ void MonitorHandler::handle_trace(const FrameEnqueueD2HTrace &trace)
 {
     // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice.
     if (!m_is_monitor_currently_working) { return; }
-    assert(contains(m_core_ops_info, trace.core_op_handle));
-    assert(contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name));
+    if (!contains(m_core_ops_info, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_core_ops_info[trace.core_op_handle].output_streams_info, trace.queue_name)) { return ;} // TODO (HRT-8835): Support multiple vdevices
 
-    assert(contains(m_devices_info, trace.device_id));
-    assert(contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, trace.core_op_handle));
+    if (!contains(m_devices_info, trace.device_id)) { return ;} // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices
 
     auto &queue = m_core_ops_info[trace.core_op_handle].output_streams_info[trace.queue_name];
     queue.pending_frames_count->fetch_add(1);
@@ -131,10 +137,10 @@ void MonitorHandler::handle_trace(const FrameDequeueH2DTrace &trace)
 {
     // TODO: 'if' should be removed, this is temporary solution since this trace is called out of the scheduler or vdevice.
     if (!m_is_monitor_currently_working) { return; }
-    assert(contains(m_core_ops_info, trace.core_op_handle));
-    assert(contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name));
-    assert(contains(m_devices_info, trace.device_id));
-    assert(contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, trace.core_op_handle));
+    if (!contains(m_core_ops_info, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_core_ops_info[trace.core_op_handle].input_streams_info, trace.queue_name)) { return ;} // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_devices_info, trace.device_id)) { return ;} // TODO (HRT-8835): Support multiple vdevices
+    if (!contains(m_devices_info.at(trace.device_id).requested_transferred_frames_h2d, trace.core_op_handle)) { return ;} // TODO (HRT-8835): Support multiple vdevices
 
     auto &queue = m_core_ops_info[trace.core_op_handle].input_streams_info[trace.queue_name];
     queue.pending_frames_count->fetch_sub(1);
@@ -155,15 +161,21 @@ scheduler_core_op_handle_t MonitorHandler::get_core_op_handle_by_name(const std:
     return INVALID_CORE_OP_HANDLE;
 }
 
-hailo_status MonitorHandler::start_mon()
+hailo_status MonitorHandler::start_mon(const std::string &unique_vdevice_hash)
 {
 #if defined(__GNUC__)
 
     /* Clearing monitor members. Since the owner of monitor_handler is tracer, which is static,
     the monitor may get rerun without destructor being called. */
     if (m_is_monitor_currently_working) {
+        if (!m_unique_vdevice_hash.empty() && (unique_vdevice_hash != m_unique_vdevice_hash)) {
+            LOGGER__WARNING("Trying to register a vdevice to hailo-monitor, "\
+                "while other vdevice is registered. Monitor currently supports single vdevice, which will result in non-consistent tracing.");
+            return HAILO_INVALID_OPERATION;
+        }
         clear_monitor();
     }
+    m_unique_vdevice_hash = unique_vdevice_hash;
     m_is_monitor_currently_working = true;
 
     auto event_exp = Event::create_shared(Event::State::not_signalled);
@@ -193,6 +205,7 @@ hailo_status MonitorHandler::start_mon()
 
     return HAILO_SUCCESS;
 #else
+    (void)unique_vdevice_hash;
     return HAILO_NOT_IMPLEMENTED;
 #endif
 }
diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
index a62498db..5ae124de 100644
--- a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
@@ -31,6 +31,7 @@
 #pragma warning(disable: 4244 4267 4127)
 #else
 #pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-parameter"
 #pragma GCC diagnostic ignored "-Wconversion"
 #endif
 #include "scheduler_mon.pb.h"
@@ -59,25 +60,25 @@ class SchedulerCounter
 
     void insert(const stream_name_t &name)
     {
-        assert(!contains(m_map, name));
+        if(contains(m_map, name)) { return; } // TODO (HRT-8835): Support multiple vdevices
         m_map[name] = 0;
     }
 
     uint32_t operator[](const stream_name_t &name) const
     {
-        assert(contains(m_map, name));
+        if (!contains(m_map, name)) { return 0; } // TODO (HRT-8835): Support multiple vdevices
         return m_map.at(name);
     }
 
     void increase(const stream_name_t &name)
     {
-        assert(contains(m_map, name));
+        if (!contains(m_map, name)) {return; } // TODO (HRT-8835): Support multiple vdevices
         m_map[name]++;
     }
 
     void decrease(const stream_name_t &name)
     {
-        assert(contains(m_map, name));
+        if (!contains(m_map, name)) { return; } // TODO (HRT-8835): Support multiple vdevices
         assert(m_map[name] > 0);
         m_map[name]--;
     }
@@ -171,12 +172,13 @@ class MonitorHandler : public Handler
     virtual void handle_trace(const FrameDequeueD2HTrace&) override;
     virtual void handle_trace(const FrameDequeueH2DTrace&) override;
     virtual void handle_trace(const FrameEnqueueD2HTrace&) override;
-    virtual void handle_trace(const SwitchCoreOpTrace&) override;
+    virtual void handle_trace(const ActivateCoreOpTrace&) override;
     virtual void handle_trace(const MonitorStartTrace&) override;
+    virtual void handle_trace(const MonitorEndTrace&) override;
     virtual void handle_trace(const AddDeviceTrace&) override;
 
 private:
-    hailo_status start_mon();
+    hailo_status start_mon(const std::string &unique_vdevice_hash);
 #if defined(__GNUC__)
     Expected<std::shared_ptr<TempFile>> open_temp_mon_file();
     void dump_state();
@@ -204,6 +206,7 @@ class MonitorHandler : public Handler
     // TODO: Consider adding Accumulator classes for more info (min, max, mean, etc..)
     std::unordered_map<scheduler_core_op_handle_t, CoreOpInfo> m_core_ops_info;
     std::unordered_map<device_id_t, DeviceInfo> m_devices_info;
+    std::string m_unique_vdevice_hash; // only one vdevice is allowed at a time. vdevice will be unregistered in its destruction.
 };
 }
 
diff --git a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
index de28bd71..daf284ed 100644
--- a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
+++ b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
@@ -19,6 +19,12 @@
 
 namespace hailort
 {
+#define PCIE_GEN1_SPEED "2.5GT/s"
+#define PCIE_GEN2_SPEED "5GT/s"
+#define PCIE_GEN3_SPEED "8GT/s"
+#define PCIE_GEN4_SPEED "16GT/s"
+#define PCIE_GEN5_SPEED "32GT/s"
+#define PCIE_GEN6_SPEED "64GT/s"
 
 struct ProfilerTime {
     uint32_t year;
@@ -29,6 +35,13 @@ struct ProfilerTime {
     int64_t time_since_epoch;
 };
 
+struct pci_info {
+    std::string gen;
+    std::string lanes;
+
+    pci_info() : gen("N/A"), lanes("N/A") {}
+};
+
 #if defined(__linux__)
 std::string os_name()
 {
@@ -71,6 +84,79 @@ std::uint64_t system_ram_size()
 
     return sys_info.totalram;
 }
+
+std::string exec(const char *cmd) {
+    const int buffer_size = 128;
+    std::array<char, buffer_size> buffer;
+    std::string result;
+    std::shared_ptr<FILE> pipe(popen(cmd, "r"), pclose);
+
+    if (!pipe) {
+        LOGGER__WARNING("Couldn't execute {}, popen() failed!", cmd);
+        return "";
+    }
+
+    while (!feof(pipe.get())) {
+        if (fgets(buffer.data(), buffer_size, pipe.get()) != nullptr) {
+            result += buffer.data();
+        }
+    }
+
+    return result;
+}
+
+pci_info parse_lspci_output(const std::string &output) {
+    std::istringstream lspci_stream(output);
+    pci_info pcie_info = {};
+    std::string line;
+    bool in_hailo_section = false;
+    int hailo_device_count = 0;
+
+    while (std::getline(lspci_stream, line)) {
+        // Sample output line: "LnkCap:	Port #0, Speed 8GT/s, Width x8, ASPM L0s L1, Exit Latency L0s <256ns, L1 <4us"
+        if (line.find("Co-processor: Hailo") != std::string::npos) {
+            in_hailo_section = true;
+            hailo_device_count++;
+            // TODO: HRT-8834/8835 Support multiple Hailo devices connected to the same host
+            if (1 < hailo_device_count) {
+                pcie_info.gen = "N/A";
+                pcie_info.lanes = "N/A";
+                return pcie_info;
+            }
+        }
+        if (!in_hailo_section) {
+            continue;
+        }
+        if (line.find("LnkCap") != std::string::npos) {
+            std::istringstream line_stream(line);
+            std::string token;
+            while (line_stream >> token) {
+                if ("Speed" == token) {
+                    line_stream >> token;
+                    if (!token.empty() && token.back() == ',') {
+                        token.pop_back();
+                    }
+                    if (PCIE_GEN1_SPEED == token) { pcie_info.gen = "1"; }
+                    else if (PCIE_GEN2_SPEED == token) { pcie_info.gen = "2"; }
+                    else if (PCIE_GEN3_SPEED == token) { pcie_info.gen = "3"; }
+                    else if (PCIE_GEN4_SPEED == token) { pcie_info.gen = "4"; }
+                    else if (PCIE_GEN5_SPEED == token) { pcie_info.gen = "5"; }
+                    else if (PCIE_GEN6_SPEED == token) { pcie_info.gen = "6"; }
+                }
+                if ("Width" == token) {
+                    line_stream >> token;
+                    pcie_info.lanes = token.substr(1);
+                }
+            }
+        }
+    }
+    return pcie_info;
+}
+
+pci_info get_pcie_info() {
+    std::string lspci_output = exec("lspci -vvv");
+    return parse_lspci_output(lspci_output);
+}
 #endif
 
 ProfilerTime get_curr_time()
diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
index bc4f1710..c3c56abc 100644
--- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
+++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
@@ -143,6 +143,14 @@ void SchedulerProfilerHandler::handle_trace(const InitProfilerProtoTrace &trace)
     init->set_os_ver(os_ver());
     init->set_cpu_arch(cpu_arch());
     init->set_sys_ram_size(system_ram_size());
+    if (0 == geteuid()) {
+        auto pcie_info = get_pcie_info();
+        init->mutable_pcie_info()->set_gen(pcie_info.gen);
+        init->mutable_pcie_info()->set_lanes(pcie_info.lanes);
+    } else {
+        init->mutable_pcie_info()->set_gen("Failed fetching info, root privilege is required");
+        init->mutable_pcie_info()->set_lanes("Failed fetching info, root privilege is required");
+    }
     #endif
     init->set_hailort_ver(get_libhailort_version_representation());
     init->mutable_time()->set_day(curr_time.day);
@@ -154,6 +162,17 @@ void SchedulerProfilerHandler::handle_trace(const InitProfilerProtoTrace &trace)
     init->set_time_stamp_since_epoch(curr_time.time_since_epoch);
 }
 
+void SchedulerProfilerHandler::handle_trace(const HefLoadedTrace &trace)
+{
+    std::lock_guard<std::mutex> lock(m_proto_lock);
+
+    auto added_trace = m_profiler_trace_proto.add_added_trace();
+    added_trace->mutable_loaded_hef()->set_hef_md5(reinterpret_cast<const char*>(trace.md5_hash));
+    added_trace->mutable_loaded_hef()->set_hef_name(trace.hef_name);
+    added_trace->mutable_loaded_hef()->set_dfc_version(trace.dfc_version);
+    added_trace->mutable_loaded_hef()->set_time_stamp(trace.timestamp);
+}
+
 void SchedulerProfilerHandler::handle_trace(const AddCoreOpTrace &trace)
 {
     log(JSON({
@@ -297,7 +316,7 @@ void SchedulerProfilerHandler::handle_trace(const FrameEnqueueD2HTrace &trace)
     added_trace->mutable_frame_enqueue()->set_time_stamp(trace.timestamp);
 }
 
-void SchedulerProfilerHandler::handle_trace(const SwitchCoreOpTrace &trace)
+void SchedulerProfilerHandler::handle_trace(const ActivateCoreOpTrace &trace)
 {
     log(JSON({
         {"action", json_to_string(trace.name)},
@@ -308,9 +327,20 @@ void SchedulerProfilerHandler::handle_trace(const SwitchCoreOpTrace &trace)
 
     std::lock_guard<std::mutex> lock(m_proto_lock);
     auto added_trace = m_profiler_trace_proto.add_added_trace();
-    added_trace->mutable_switched_core_op()->set_device_id(trace.device_id);
-    added_trace->mutable_switched_core_op()->set_new_core_op_handle(trace.core_op_handle);
-    added_trace->mutable_switched_core_op()->set_time_stamp(trace.timestamp);
+    added_trace->mutable_activate_core_op()->set_device_id(trace.device_id);
+    added_trace->mutable_activate_core_op()->set_new_core_op_handle(trace.core_op_handle);
+    added_trace->mutable_activate_core_op()->set_time_stamp(trace.timestamp);
+    added_trace->mutable_activate_core_op()->set_duration(trace.duration);
+}
+
+void SchedulerProfilerHandler::handle_trace(const DeactivateCoreOpTrace &trace)
+{
+    std::lock_guard<std::mutex> lock(m_proto_lock);
+    auto added_trace = m_profiler_trace_proto.add_added_trace();
+    added_trace->mutable_deactivate_core_op()->set_device_id(trace.device_id);
+    added_trace->mutable_deactivate_core_op()->set_core_op_handle(trace.core_op_handle);
+    added_trace->mutable_deactivate_core_op()->set_time_stamp(trace.timestamp);
+    added_trace->mutable_deactivate_core_op()->set_duration(trace.duration);
 }
 
 void SchedulerProfilerHandler::handle_trace(const SetCoreOpTimeoutTrace &trace)
diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
index 358d06f0..81924df9 100644
--- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
@@ -46,7 +46,8 @@ class SchedulerProfilerHandler : public Handler
     virtual void handle_trace(const FrameDequeueH2DTrace&) override;
     virtual void handle_trace(const FrameDequeueD2HTrace&) override;
     virtual void handle_trace(const FrameEnqueueD2HTrace&) override;
-    virtual void handle_trace(const SwitchCoreOpTrace&) override;
+    virtual void handle_trace(const ActivateCoreOpTrace&) override;
+    virtual void handle_trace(const DeactivateCoreOpTrace&) override;
     virtual void handle_trace(const AddDeviceTrace&) override;
     virtual void handle_trace(const SetCoreOpTimeoutTrace&) override;
     virtual void handle_trace(const SetCoreOpThresholdTrace&) override;
@@ -54,6 +55,7 @@ class SchedulerProfilerHandler : public Handler
     virtual void handle_trace(const OracleDecisionTrace&) override;
     virtual void handle_trace(const DumpProfilerStateTrace&) override;
     virtual void handle_trace(const InitProfilerProtoTrace&) override;
+    virtual void handle_trace(const HefLoadedTrace&) override;
 
 private:
     void log(JSON json);
diff --git a/hailort/libhailort/src/utils/shared_resource_manager.hpp b/hailort/libhailort/src/utils/shared_resource_manager.hpp
index afcad3a2..a40c8cb5 100644
--- a/hailort/libhailort/src/utils/shared_resource_manager.hpp
+++ b/hailort/libhailort/src/utils/shared_resource_manager.hpp
@@ -101,11 +101,15 @@ class SharedResourceManager
         : m_resources(max_resources())
     {}
 
-#ifdef _WIN32
-    // On windows, when the process terminates, all threads are and only then the static variable are destroyed.
-    // If the user hasn't called release_resource, we will leak its objects (since otherwise the object destructor may
-    // wait on some terminated threads and hang).
-    // Notice that on graceful cleanup m_resources should be empty.
+    // On graceful process clean, the destructor of this class will be called, and m_resources should be an empty
+    // list (since all resources we released). If it is not the case (for example, the user called ExitProcess), we
+    // don't want to release the objects - just leak them. It is OK to leak the objects since the user didn't call
+    // release_resource (what they expect us to do?).
+    // It is important to leak the memory since we may not be able to free the objects when the process is being
+    // destructed:
+    //    1. On windows for example, the static variables are destroyed *after* the threads stops.
+    //       Some shared resources waits for their threads to do something, and they can stack for ever.
+    //    2. The object destruction may relay on other singleton object destruction.
     ~SharedResourceManager()
     {
         for (auto &resource : m_resources) {
@@ -113,7 +117,6 @@ class SharedResourceManager
             resource.release();
         }
     }
-#endif /* _WIN32 */
 
     static uint32_t max_resources()
     {
diff --git a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
index e5953f34..fff74272 100644
--- a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
+++ b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
@@ -116,18 +116,11 @@ Expected<uint32_t> PartialClusterReader::get_partial_clusters_layout_bitmap(hail
     // If file does not exist - get default values for dev_arch
     if (!Filesystem::does_file_exists(std::string(PARTIAL_CLUSTER_READER_CLUSTER_LAYOUT_FILE_PATH))) {
         LOGGER__INFO("partial cluster layout bitmap file not found, Enabling all clusters by default");
-        auto default_bitmap_exp = get_arch_default_bitmap(dev_arch);
-        CHECK_EXPECTED(default_bitmap_exp);
-        fuse_file_data.first = default_bitmap_exp.release();
-
-        auto sku_value_exp = get_sku_value_from_arch(dev_arch);
-        CHECK_EXPECTED(sku_value_exp);
-        fuse_file_data.second = sku_value_exp.release();
+        TRY(fuse_file_data.first, get_arch_default_bitmap(dev_arch));
+        TRY(fuse_file_data.second, get_sku_value_from_arch(dev_arch));
     } else {
         // This will read bitmap and verify with SKU value
-        auto fuse_file_exp = read_fuse_file();
-        CHECK_EXPECTED(fuse_file_exp);
-        fuse_file_data = fuse_file_exp.release();
+        TRY(fuse_file_data, read_fuse_file());
     }
 
     const auto sku_value = fuse_file_data.second;
@@ -155,10 +148,7 @@ Expected<hailo_device_architecture_t> PartialClusterReader::get_actual_dev_arch_
         && (HAILO_ARCH_HAILO15H == fw_dev_arch)) {
         return HAILO_ARCH_HAILO15H;
     } else {
-        auto fuse_file_exp = read_fuse_file();
-        CHECK_EXPECTED(fuse_file_exp);
-        const auto fuse_file_data = fuse_file_exp.release();
-
+        TRY(const auto fuse_file_data, read_fuse_file());
         const auto sku_value = fuse_file_data.second;
         if (HAILO15M_SKU_VALUE == sku_value) {
             return HAILO_ARCH_HAILO15M;
diff --git a/hailort/libhailort/src/utils/thread_safe_queue.hpp b/hailort/libhailort/src/utils/thread_safe_queue.hpp
index f7dfe6f0..cd244485 100644
--- a/hailort/libhailort/src/utils/thread_safe_queue.hpp
+++ b/hailort/libhailort/src/utils/thread_safe_queue.hpp
@@ -137,13 +137,13 @@ class SpscQueue
         //   +1 for each dequeued item
         //   -1 for each enqueued item
         //   Blocks when the queue is full (which happens when it's value reaches zero, hence it starts at queue size)
-        const auto items_enqueued_sema = Semaphore::create_shared(0);
-        CHECK_AS_EXPECTED(nullptr != items_enqueued_sema, HAILO_OUT_OF_HOST_MEMORY, "Failed creating items_enqueued_sema semaphore");
+        auto items_enqueued_sema = Semaphore::create_shared(0);
+        CHECK_EXPECTED(items_enqueued_sema, "Failed creating items_enqueued_sema semaphore");
 
-        const auto items_dequeued_sema = Semaphore::create_shared(static_cast<uint32_t>(max_size));
-        CHECK_AS_EXPECTED(nullptr != items_dequeued_sema, HAILO_OUT_OF_HOST_MEMORY, "Failed creating items_dequeued_sema semaphore");
+        auto items_dequeued_sema = Semaphore::create_shared(static_cast<uint32_t>(max_size));
+        CHECK_EXPECTED(items_dequeued_sema, "Failed creating items_dequeued_sema semaphore");
 
-        return SpscQueue(max_size, items_enqueued_sema, items_dequeued_sema, shutdown_event, default_timeout);
+        return SpscQueue(max_size, items_enqueued_sema.release(), items_dequeued_sema.release(), shutdown_event, default_timeout);
     }
 
     static std::shared_ptr<SpscQueue> create_shared(size_t max_size, const EventPtr& shutdown_event,
@@ -210,9 +210,15 @@ class SpscQueue
         return dequeue(m_default_timeout);
     }
 
-    hailo_status enqueue(const T& result, std::chrono::milliseconds timeout) AE_NO_TSAN
+    hailo_status enqueue(const T& result, std::chrono::milliseconds timeout, bool ignore_shutdown_event = false) AE_NO_TSAN
     {
-        const auto wait_result = m_items_dequeued_sema_or_shutdown.wait(timeout);
+        hailo_status wait_result = HAILO_UNINITIALIZED;
+        if (ignore_shutdown_event) {
+            wait_result = m_items_dequeued_sema->wait(timeout);
+        } else {
+            wait_result = m_items_dequeued_sema_or_shutdown.wait(timeout);
+        }
+
         if (HAILO_SHUTDOWN_EVENT_SIGNALED == wait_result) {
             LOGGER__TRACE("Shutdown event has been signaled");
             return wait_result;
@@ -234,9 +240,9 @@ class SpscQueue
         return m_items_enqueued_sema_or_shutdown.signal();
     }
 
-    inline hailo_status enqueue(const T& result) AE_NO_TSAN
+    inline hailo_status enqueue(const T& result, bool ignore_shutdown_event = false) AE_NO_TSAN
     {
-        return enqueue(result, m_default_timeout);
+        return enqueue(result, m_default_timeout, ignore_shutdown_event);
     }
 
     // TODO: Do away with two copies of this function? (SDK-16481)
diff --git a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
index a2d0eaa1..1f2a7b0e 100644
--- a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
@@ -69,7 +69,7 @@ hailo_status InferRequestAccumulator::shutdown(std::chrono::milliseconds timeout
     // Now cancel all partial request
     for (auto &partial_request : m_partial_infer_requests) {
         for (auto &stream_transfer_request : partial_request) {
-            stream_transfer_request.second.callback(HAILO_STREAM_ABORTED_BY_USER);
+            stream_transfer_request.second.callback(HAILO_STREAM_ABORT);
         }
     }
     m_partial_infer_requests.clear();
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
index e11272f0..08b99913 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
@@ -32,7 +32,7 @@ using core_op_priority_t = uint8_t;
 constexpr const uint16_t SINGLE_CONTEXT_BATCH_SIZE = 1;
 
 class VDeviceCoreOp;
-
+class VdmaConfigCoreOp;
 
 class ScheduledCoreOp
 {
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
index a745f9d4..1c71752b 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
@@ -21,6 +21,7 @@ namespace hailort
 
 /** Input stream **/
 Expected<std::unique_ptr<ScheduledInputStream>> ScheduledInputStream::create(
+    VDevice &vdevice,
     std::map<device_id_t, std::reference_wrapper<InputStreamBase>> &&streams,
     const LayerInfo &layer_info,
     const scheduler_core_op_handle_t &core_op_handle,
@@ -35,7 +36,7 @@ Expected<std::unique_ptr<ScheduledInputStream>> ScheduledInputStream::create(
     }
 
     auto status = HAILO_UNINITIALIZED;
-    auto local_vdevice_stream = make_unique_nothrow<ScheduledInputStream>(std::move(streams), core_op_handle,
+    auto local_vdevice_stream = make_unique_nothrow<ScheduledInputStream>(vdevice, std::move(streams), core_op_handle,
         std::move(core_op_activated_event), layer_info, std::move(infer_requests_accumulator), status);
     CHECK_NOT_NULL_AS_EXPECTED(local_vdevice_stream, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
@@ -51,10 +52,12 @@ hailo_stream_interface_t ScheduledInputStream::get_interface() const
 
 Expected<std::unique_ptr<StreamBufferPool>> ScheduledInputStream::allocate_buffer_pool()
 {
-    auto queued_pool = QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(),
-        BufferStorageParams::create_dma());
-    CHECK_EXPECTED(queued_pool);
-    return std::unique_ptr<StreamBufferPool>(queued_pool.release());
+    TRY(auto queued_pool, QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(),
+        BufferStorageParams::create_dma()));
+
+    CHECK_SUCCESS(queued_pool->dma_map(m_vdevice, HAILO_DMA_BUFFER_DIRECTION_H2D));
+
+    return std::unique_ptr<StreamBufferPool>(std::move(queued_pool));
 }
 
 size_t ScheduledInputStream::get_max_ongoing_transfers() const
@@ -81,6 +84,7 @@ hailo_status ScheduledInputStream::write_async_impl(TransferRequest &&transfer_r
 
 /** Output stream **/
 Expected<std::unique_ptr<ScheduledOutputStream>> ScheduledOutputStream::create(
+    VDevice &vdevice,
     std::map<device_id_t, std::reference_wrapper<OutputStreamBase>> &&streams,
     const scheduler_core_op_handle_t &core_op_handle,
     const LayerInfo &layer_info,
@@ -96,7 +100,7 @@ Expected<std::unique_ptr<ScheduledOutputStream>> ScheduledOutputStream::create(
 
 
     auto status = HAILO_UNINITIALIZED;
-    auto stream = make_unique_nothrow<ScheduledOutputStream>(std::move(streams), core_op_handle,
+    auto stream = make_unique_nothrow<ScheduledOutputStream>(vdevice, std::move(streams), core_op_handle,
         layer_info, std::move(core_op_activated_event), std::move(infer_requests_accumulator), status);
     CHECK_NOT_NULL_AS_EXPECTED(stream, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
@@ -112,10 +116,12 @@ hailo_stream_interface_t ScheduledOutputStream::get_interface() const
 
 Expected<std::unique_ptr<StreamBufferPool>> ScheduledOutputStream::allocate_buffer_pool()
 {
-    auto queued_pool = QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(),
-        BufferStorageParams::create_dma());
-    CHECK_EXPECTED(queued_pool);
-    return std::unique_ptr<StreamBufferPool>(queued_pool.release());
+    TRY(auto queued_pool, QueuedStreamBufferPool::create(m_infer_requests_accumulator->queue_size(), get_frame_size(),
+        BufferStorageParams::create_dma()));
+
+    CHECK_SUCCESS(queued_pool->dma_map(m_vdevice, HAILO_DMA_BUFFER_DIRECTION_D2H));
+
+    return std::unique_ptr<StreamBufferPool>(std::move(queued_pool));
 }
 
 size_t ScheduledOutputStream::get_max_ongoing_transfers() const
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
index e96ddf0e..5666481b 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
@@ -31,6 +31,7 @@ class ScheduledInputStream : public AsyncInputStreamBase {
 public:
 
     static Expected<std::unique_ptr<ScheduledInputStream>> create(
+        VDevice &vdevice,
         std::map<device_id_t, std::reference_wrapper<InputStreamBase>> &&streams,
         const LayerInfo &layer_info,
         const scheduler_core_op_handle_t &core_op_handle,
@@ -38,6 +39,7 @@ class ScheduledInputStream : public AsyncInputStreamBase {
         std::shared_ptr<InferRequestAccumulator> infer_requests_accumulator);
 
     ScheduledInputStream(
+        VDevice &vdevice,
         std::map<device_id_t, std::reference_wrapper<InputStreamBase>> &&streams,
         const scheduler_core_op_handle_t &core_op_handle,
         EventPtr &&core_op_activated_event,
@@ -45,6 +47,7 @@ class ScheduledInputStream : public AsyncInputStreamBase {
         std::shared_ptr<InferRequestAccumulator> &&infer_requests_accumulator,
         hailo_status &status) :
             AsyncInputStreamBase(layer_info, std::move(core_op_activated_event), status),
+            m_vdevice(vdevice),
             m_streams(std::move(streams)),
             m_core_op_handle(core_op_handle),
             m_infer_requests_accumulator(infer_requests_accumulator),
@@ -61,6 +64,7 @@ class ScheduledInputStream : public AsyncInputStreamBase {
     virtual bool is_scheduled() override final { return true; };
 
 private:
+    VDevice &m_vdevice;
     std::map<device_id_t, std::reference_wrapper<InputStreamBase>> m_streams;
     scheduler_core_op_handle_t m_core_op_handle;
     std::shared_ptr<InferRequestAccumulator> m_infer_requests_accumulator;
@@ -71,6 +75,7 @@ class ScheduledInputStream : public AsyncInputStreamBase {
 class ScheduledOutputStream : public AsyncOutputStreamBase {
 public:
     static Expected<std::unique_ptr<ScheduledOutputStream>> create(
+        VDevice &vdevice,
         std::map<device_id_t, std::reference_wrapper<OutputStreamBase>> &&streams,
         const scheduler_core_op_handle_t &core_op_handle,
         const LayerInfo &layer_info,
@@ -78,6 +83,7 @@ class ScheduledOutputStream : public AsyncOutputStreamBase {
         std::shared_ptr<InferRequestAccumulator> infer_requests_accumulator);
 
     ScheduledOutputStream(
+        VDevice &vdevice,
         std::map<device_id_t, std::reference_wrapper<OutputStreamBase>> &&streams,
         const scheduler_core_op_handle_t &core_op_handle,
         const LayerInfo &layer_info,
@@ -85,6 +91,7 @@ class ScheduledOutputStream : public AsyncOutputStreamBase {
         std::shared_ptr<InferRequestAccumulator> &&infer_requests_accumulator,
         hailo_status &status) :
             AsyncOutputStreamBase(layer_info, std::move(core_op_activated_event), status),
+            m_vdevice(vdevice),
             m_streams(std::move(streams)),
             m_core_op_handle(core_op_handle),
             m_infer_requests_accumulator(infer_requests_accumulator),
@@ -121,6 +128,7 @@ class ScheduledOutputStream : public AsyncOutputStreamBase {
 
 
 private:
+    VDevice &m_vdevice;
     std::map<device_id_t, std::reference_wrapper<OutputStreamBase>> m_streams;
     scheduler_core_op_handle_t m_core_op_handle;
     std::shared_ptr<InferRequestAccumulator> m_infer_requests_accumulator;
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
index a8163bd6..060d3b23 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
@@ -14,7 +14,6 @@
 #include "vdevice/vdevice_core_op.hpp"
 #include "vdevice/scheduler/scheduler_oracle.hpp"
 #include "vdma/vdma_config_manager.hpp"
-#include "hef/hef_internal.hpp"
 
 #include <fstream>
 
@@ -76,6 +75,7 @@ void CoreOpsScheduler::remove_core_op(scheduler_core_op_handle_t core_op_handle)
 {
     std::unique_lock<std::shared_timed_mutex> lock(m_scheduler_mutex);
     m_scheduled_core_ops.at(core_op_handle)->remove_instance();
+    m_scheduler_thread.signal();
 }
 
 void CoreOpsScheduler::shutdown()
@@ -124,8 +124,7 @@ hailo_status CoreOpsScheduler::switch_core_op(const scheduler_core_op_handle_t &
             current_core_op = get_vdma_core_op(curr_device_info->current_core_op_handle, device_id);
         }
 
-        const bool is_batch_switch = (core_op_handle == curr_device_info->current_core_op_handle);
-        auto status = VdmaConfigManager::switch_core_op(current_core_op, next_core_op, hw_batch_size, is_batch_switch);
+        auto status = VdmaConfigManager::set_core_op(device_id, current_core_op, next_core_op, hw_batch_size);
         CHECK_SUCCESS(status, "Failed switching core-op");
     }
 
@@ -362,9 +361,13 @@ void CoreOpsScheduler::shutdown_core_op(scheduler_core_op_handle_t core_op_handl
         auto request = dequeue_infer_request(core_op_handle);
         assert(request);
         for (auto &transfer : request->transfers) {
-            transfer.second.callback(HAILO_STREAM_ABORTED_BY_USER);
+            transfer.second.callback(HAILO_STREAM_ABORT);
         }
-        request->callback(HAILO_STREAM_ABORTED_BY_USER);
+
+        // Before calling infer_callback, we must ensure all stream callbacks were called and released (since the
+        // user may capture some variables in the callbacks).
+        request->transfers.clear();
+        request->callback(HAILO_STREAM_ABORT);
     }
 }
 
@@ -375,7 +378,7 @@ void CoreOpsScheduler::schedule()
     for (auto &core_op_pair : m_scheduled_core_ops) {
         auto status = optimize_streaming_if_enabled(core_op_pair.first);
         if ((HAILO_SUCCESS != status) &&
-            (HAILO_STREAM_ABORTED_BY_USER != status)) {
+            (HAILO_STREAM_ABORT != status)) {
             LOGGER__ERROR("optimize_streaming_if_enabled thread failed with status={}", status);
         }
     };
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
index ebcdf092..3d252205 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
@@ -17,6 +17,7 @@
 #include "common/filesystem.hpp"
 
 #include "utils/thread_safe_map.hpp"
+#include "utils/thread_safe_queue.hpp"
 
 #include "vdevice/scheduler/scheduled_core_op_state.hpp"
 #include "vdevice/scheduler/scheduler_base.hpp"
diff --git a/hailort/libhailort/src/vdevice/vdevice.cpp b/hailort/libhailort/src/vdevice/vdevice.cpp
index f6bc795e..c51db77e 100644
--- a/hailort/libhailort/src/vdevice/vdevice.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice.cpp
@@ -24,6 +24,7 @@
 #include "network_group/network_group_internal.hpp"
 #include "net_flow/pipeline/infer_model_internal.hpp"
 #include "core_op/core_op.hpp"
+#include "hef/hef_internal.hpp"
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
 #include "service/rpc_client_utils.hpp"
@@ -101,21 +102,6 @@ Expected<ConfigureNetworkParams> VDevice::create_configure_params(Hef &hef, cons
     return hef.create_configure_params(stream_interface.release(), network_group_name);
 }
 
-hailo_status VDevice::dma_map(void *address, size_t size, hailo_stream_direction_t direction)
-{
-    (void) address;
-    (void) size;
-    (void) direction;
-    return HAILO_NOT_IMPLEMENTED;
-}
-
-hailo_status VDevice::dma_unmap(void *address, hailo_stream_direction_t direction)
-{
-    (void) address;
-    (void) direction;
-    return HAILO_NOT_IMPLEMENTED;
-}
-
 hailo_status VDevice::before_fork()
 {
     return HAILO_SUCCESS;
@@ -198,13 +184,32 @@ Expected<hailo_stream_interface_t> VDeviceHandle::get_default_streams_interface(
     return vdevice.value()->get_default_streams_interface();
 }
 
-Expected<std::shared_ptr<InferModel>> VDeviceHandle::create_infer_model(const std::string &hef_path)
+Expected<std::shared_ptr<InferModel>> VDeviceHandle::create_infer_model(const std::string &hef_path,
+    const std::string &network_name)
 {
     auto &manager = SharedResourceManager<std::string, VDeviceBase>::get_instance();
     auto vdevice = manager.resource_lookup(m_handle);
     CHECK_EXPECTED(vdevice);
 
-    return vdevice.value()->create_infer_model(hef_path);
+    return vdevice.value()->create_infer_model(hef_path, network_name);
+}
+
+hailo_status VDeviceHandle::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction)
+{
+    auto &manager = SharedResourceManager<std::string, VDeviceBase>::get_instance();
+    auto vdevice = manager.resource_lookup(m_handle);
+    CHECK_EXPECTED_AS_STATUS(vdevice);
+
+    return vdevice.value()->dma_map(address, size, direction);
+}
+
+hailo_status VDeviceHandle::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction)
+{
+    auto &manager = SharedResourceManager<std::string, VDeviceBase>::get_instance();
+    auto vdevice = manager.resource_lookup(m_handle);
+    CHECK_EXPECTED_AS_STATUS(vdevice);
+
+    return vdevice.value()->dma_unmap(address, size, direction);
 }
 
 bool VDevice::service_over_ip_mode()
@@ -382,8 +387,18 @@ hailo_status VDeviceClient::listener_run_in_thread(VDeviceIdentifier identifier)
 
     while (m_is_listener_thread_running) {
         auto callback_id = client->VDevice_get_callback_id(identifier);
-        if (callback_id.status() == HAILO_SHUTDOWN_EVENT_SIGNALED) {
-            LOGGER__INFO("Shutdown event was signaled in listener_run_in_thread");
+        if (HAILO_SUCCESS != callback_id.status()) {
+            std::unique_lock<std::mutex> lock(m_mutex);
+            for (auto &ng_ptr_pair : m_network_groups) {
+                ng_ptr_pair.second->execute_callbacks_on_error(callback_id.status());
+            }
+            if (callback_id.status() == HAILO_SHUTDOWN_EVENT_SIGNALED) {
+                LOGGER__INFO("Shutdown event was signaled in listener_run_in_thread");
+            } else if (callback_id.status() == HAILO_RPC_FAILED) {
+                LOGGER__ERROR("Lost communication with the service..");
+            } else {
+                LOGGER__ERROR("Failed to get callback_id from listener thread with {}", callback_id.status());
+            }
             break;
         }
         CHECK_EXPECTED_AS_STATUS(callback_id);
@@ -413,9 +428,8 @@ hailo_status VDeviceClient::finish_listener_thread()
 
 Expected<std::vector<std::reference_wrapper<Device>>> VDeviceClient::get_physical_devices() const
 {
+    // In case of service-over-ip, the returned list will be empty
     std::vector<std::reference_wrapper<Device>> devices_refs;
-    CHECK_AS_EXPECTED(0 < m_devices.size(), HAILO_INVALID_OPERATION, "get_physical_devices() usage is invalid when working with service over IP. In order to use a local service, unset env var {}", HAILORT_SERVICE_ADDRESS_ENV_VAR);
-
     for (auto &device : m_devices) {
         devices_refs.push_back(*device);
     }
@@ -433,6 +447,26 @@ Expected<hailo_stream_interface_t> VDeviceClient::get_default_streams_interface(
     return m_client->VDevice_get_default_streams_interface(m_identifier);
 }
 
+hailo_status VDeviceClient::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction)
+{
+    (void) address;
+    (void) size;
+    (void) direction;
+    // It is ok to do nothing on service, because the buffer is copied anyway to the service.
+    LOGGER__TRACE("VDevice `dma_map()` is doing nothing on service");
+    return HAILO_SUCCESS;
+}
+
+hailo_status VDeviceClient::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction)
+{
+    (void) address;
+    (void) size;
+    (void) direction;
+    // It is ok to do nothing on service, because the buffer is copied anyway to the service.
+    LOGGER__TRACE("VDevice `dma_map()` is doing nothing on service");
+    return HAILO_SUCCESS;
+}
+
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
 
@@ -504,7 +538,9 @@ hailo_status VDeviceBase::validate_params(const hailo_vdevice_params_t &params)
 Expected<std::unique_ptr<VDeviceBase>> VDeviceBase::create(const hailo_vdevice_params_t &params)
 {
     TRACE(InitProfilerProtoTrace);
-    TRACE(MonitorStartTrace);
+    auto unique_vdevice_hash = std::to_string(std::chrono::duration_cast<std::chrono::milliseconds>(
+        std::chrono::system_clock::now().time_since_epoch()).count());
+    TRACE(MonitorStartTrace, unique_vdevice_hash);
 
     auto devices_expected = create_devices(params);
     CHECK_EXPECTED(devices_expected);
@@ -541,7 +577,7 @@ Expected<std::unique_ptr<VDeviceBase>> VDeviceBase::create(const hailo_vdevice_p
         }
     }
 
-    auto vdevice = std::unique_ptr<VDeviceBase>(new (std::nothrow) VDeviceBase(std::move(devices), scheduler_ptr));
+    auto vdevice = std::unique_ptr<VDeviceBase>(new (std::nothrow) VDeviceBase(std::move(devices), scheduler_ptr, unique_vdevice_hash));
     CHECK_AS_EXPECTED(nullptr != vdevice, HAILO_OUT_OF_HOST_MEMORY);
 
     return vdevice;
@@ -559,6 +595,7 @@ VDeviceBase::~VDeviceBase()
         m_core_ops_scheduler->shutdown();
     }
     TRACE(DumpProfilerStateTrace);
+    TRACE(MonitorEndTrace, m_unique_vdevice_hash);
 }
 
 Expected<ConfiguredNetworkGroupVector> VDeviceBase::configure(Hef &hef,
@@ -630,8 +667,10 @@ Expected<ConfiguredNetworkGroupVector> VDeviceBase::configure(Hef &hef,
     return added_network_groups;
 }
 
-Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const std::string &hef_path)
+Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const std::string &hef_path, const std::string &network_name)
 {
+    CHECK_AS_EXPECTED(network_name.empty(), HAILO_NOT_IMPLEMENTED, "Passing network name is not supported yet!");
+
     auto hef_expected = Hef::create(hef_path);
     CHECK_EXPECTED(hef_expected);
     auto hef = hef_expected.release();
@@ -830,7 +869,7 @@ Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceBase::create_vdevice_core_op(Hef
 
     auto core_op_handle = allocate_core_op_handle();
 
-    return VDeviceCoreOp::create(m_active_core_op_holder, params.second, physical_core_ops,
+    return VDeviceCoreOp::create(*this, m_active_core_op_holder, params.second, physical_core_ops,
         m_core_ops_scheduler, core_op_handle, hef.hash());
 }
 
diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
index 153a8850..d0ca5831 100644
--- a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
@@ -18,7 +18,8 @@
 namespace hailort
 {
 
-Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::create(ActiveCoreOpHolder &active_core_op_holder,
+Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::create(VDevice &vdevice,
+    ActiveCoreOpHolder &active_core_op_holder,
     const ConfigureNetworkParams &configure_params,
     const std::map<device_id_t, std::shared_ptr<CoreOp>> &core_ops,
     CoreOpsSchedulerWeakPtr core_ops_scheduler, vdevice_core_op_handle_t core_op_handle,
@@ -38,11 +39,17 @@ Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::create(ActiveCoreOpHolde
     }
 
     // On HcpConfigCoreOp, we don't support get_async_max_queue_size (and the core op doesn't use the queue).
-    auto per_device_queue_size = core_ops.begin()->second->get_async_max_queue_size();
-    const auto queue_size = per_device_queue_size ? (*per_device_queue_size * core_ops.size()) : 0;
+    size_t queue_size = 0;
+    auto iface = core_ops.begin()->second->get_default_streams_interface();
+    CHECK_EXPECTED(iface);
+    if ((iface.value() != HAILO_STREAM_INTERFACE_ETH) && (iface.value() != HAILO_STREAM_INTERFACE_MIPI)) {
+        auto per_device_queue_size = core_ops.begin()->second->get_async_max_queue_size();
+        CHECK_EXPECTED(per_device_queue_size);
+        queue_size = *per_device_queue_size * core_ops.size();
+    }
 
     auto status = HAILO_UNINITIALIZED;
-    auto vdevice_core_op = make_shared_nothrow<VDeviceCoreOp>(active_core_op_holder, configure_params,
+    auto vdevice_core_op = make_shared_nothrow<VDeviceCoreOp>(vdevice, active_core_op_holder, configure_params,
         std::move(core_ops), core_ops_scheduler, core_op_handle, hef_hash, queue_size, status);
     CHECK_NOT_NULL_AS_EXPECTED(vdevice_core_op, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
@@ -67,8 +74,8 @@ Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::duplicate(std::shared_pt
         other->m_infer_requests_accumulator->queue_size() : 0;
 
     auto status = HAILO_UNINITIALIZED;
-    auto vdevice_core_op = make_shared_nothrow<VDeviceCoreOp>(other->m_active_core_op_holder, configure_params,
-        std::move(copy), other->m_core_ops_scheduler, other->m_core_op_handle,
+    auto vdevice_core_op = make_shared_nothrow<VDeviceCoreOp>(other->m_vdevice, other->m_active_core_op_holder,
+        configure_params, std::move(copy), other->m_core_ops_scheduler, other->m_core_op_handle,
         other->m_hef_hash, queue_size, status);
     CHECK_NOT_NULL_AS_EXPECTED(vdevice_core_op, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
@@ -79,13 +86,15 @@ Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::duplicate(std::shared_pt
     return vdevice_core_op;
 }
 
-VDeviceCoreOp::VDeviceCoreOp(ActiveCoreOpHolder &active_core_op_holder,
+VDeviceCoreOp::VDeviceCoreOp(VDevice &vdevice,
+    ActiveCoreOpHolder &active_core_op_holder,
     const ConfigureNetworkParams &configure_params,
     const std::map<device_id_t, std::shared_ptr<CoreOp>> &core_ops,
     CoreOpsSchedulerWeakPtr core_ops_scheduler, vdevice_core_op_handle_t core_op_handle,
     const std::string &hef_hash, size_t max_queue_size,
     hailo_status &status) :
         CoreOp(configure_params, core_ops.begin()->second->m_metadata, active_core_op_holder, status),
+        m_vdevice(vdevice),
         m_core_ops(std::move(core_ops)),
         m_core_ops_scheduler(core_ops_scheduler),
         m_core_op_handle(core_op_handle),
@@ -191,7 +200,7 @@ hailo_status VDeviceCoreOp::create_input_vdevice_stream_from_config_params(const
 
     if (m_core_ops_scheduler.lock()) {
         assert(m_infer_requests_accumulator);
-        auto scheduled_stream = ScheduledInputStream::create(std::move(low_level_streams),
+        auto scheduled_stream = ScheduledInputStream::create(m_vdevice, std::move(low_level_streams),
             edge_layer.value(), m_core_op_handle, m_core_op_activated_event, m_infer_requests_accumulator);
         CHECK_EXPECTED_AS_STATUS(scheduled_stream);
 
@@ -232,7 +241,7 @@ hailo_status VDeviceCoreOp::create_output_vdevice_stream_from_config_params(cons
 
     if (m_core_ops_scheduler.lock()) {
         assert(m_infer_requests_accumulator);
-        auto scheduled_stream = ScheduledOutputStream::create(std::move(low_level_streams),
+        auto scheduled_stream = ScheduledOutputStream::create(m_vdevice, std::move(low_level_streams),
             m_core_op_handle, edge_layer.value(), m_core_op_activated_event, m_infer_requests_accumulator);
         CHECK_EXPECTED_AS_STATUS(scheduled_stream);
 
diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
index b9c3af62..94828804 100644
--- a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
@@ -15,6 +15,7 @@
 #include "common/utils.hpp"
 #include "hailo/network_group.hpp"
 #include "hailo/vstream.hpp"
+#include "hailo/vdevice.hpp"
 
 #include "vdevice/scheduler/scheduler.hpp"
 #include "vdevice/scheduler/infer_request_accumulator.hpp"
@@ -31,6 +32,7 @@ class VDeviceCoreOp : public CoreOp
 {
 public:
     static Expected<std::shared_ptr<VDeviceCoreOp>> create(
+        VDevice &vdevice,
         ActiveCoreOpHolder &active_core_op_holder,
         const ConfigureNetworkParams &configure_params,
         const std::map<device_id_t, std::shared_ptr<CoreOp>> &core_ops,
@@ -44,7 +46,6 @@ class VDeviceCoreOp : public CoreOp
     VDeviceCoreOp(const VDeviceCoreOp &other) = delete;
     VDeviceCoreOp &operator=(const VDeviceCoreOp &other) = delete;
     VDeviceCoreOp &operator=(VDeviceCoreOp &&other) = delete;
-    VDeviceCoreOp(VDeviceCoreOp &&other) = default;
 
     bool equals(const Hef &hef, const std::pair<const std::string, ConfigureNetworkParams> &params_pair)
     {
@@ -92,7 +93,8 @@ class VDeviceCoreOp : public CoreOp
     virtual Expected<HwInferResults> run_hw_infer_estimator() override;
     virtual Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &) override;
 
-    VDeviceCoreOp(ActiveCoreOpHolder &active_core_op_holder,
+    VDeviceCoreOp(VDevice &vdevice,
+        ActiveCoreOpHolder &active_core_op_holder,
         const ConfigureNetworkParams &configure_params,
         const std::map<device_id_t, std::shared_ptr<CoreOp>> &core_ops,
         CoreOpsSchedulerWeakPtr core_ops_scheduler, scheduler_core_op_handle_t core_op_handle,
@@ -111,6 +113,7 @@ class VDeviceCoreOp : public CoreOp
 
     hailo_status add_to_trace();
 
+    VDevice &m_vdevice;
     std::map<device_id_t, std::shared_ptr<CoreOp>> m_core_ops;
     CoreOpsSchedulerWeakPtr m_core_ops_scheduler;
     const vdevice_core_op_handle_t m_core_op_handle;
diff --git a/hailort/libhailort/src/vdevice/vdevice_internal.hpp b/hailort/libhailort/src/vdevice/vdevice_internal.hpp
index 071d7949..5920b90c 100644
--- a/hailort/libhailort/src/vdevice/vdevice_internal.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_internal.hpp
@@ -82,7 +82,7 @@ class VDeviceBase : public VDevice
     // Currently only homogeneous vDevice is allow (= all devices are from the same type)
     virtual Expected<hailo_stream_interface_t> get_default_streams_interface() const override;
 
-    virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction) override
+    virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override
     {
         for (const auto &pair : m_devices) {
             auto &device = pair.second;
@@ -92,13 +92,13 @@ class VDeviceBase : public VDevice
         return HAILO_SUCCESS;
     }
 
-    virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction) override
+    virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override
     {
         hailo_status status = HAILO_SUCCESS;
         for (const auto &pair : m_devices) {
             auto &device = pair.second;
             // Best effort, propagate first error
-            const auto unmap_status = device->dma_unmap(address, direction);
+            const auto unmap_status = device->dma_unmap(address, size, direction);
             if (HAILO_SUCCESS != unmap_status) {
                 LOGGER__ERROR("Failed unmapping user buffer {} with status {}", address, unmap_status);
                 if (HAILO_SUCCESS == status) {
@@ -113,8 +113,9 @@ class VDeviceBase : public VDevice
     static hailo_status validate_params(const hailo_vdevice_params_t &params);
 
 private:
-    VDeviceBase(std::map<device_id_t, std::unique_ptr<Device>> &&devices, CoreOpsSchedulerPtr core_ops_scheduler) :
-        m_devices(std::move(devices)), m_core_ops_scheduler(core_ops_scheduler), m_next_core_op_handle(0)
+    VDeviceBase(std::map<device_id_t, std::unique_ptr<Device>> &&devices, CoreOpsSchedulerPtr core_ops_scheduler,
+        const std::string &unique_vdevice_hash="") :
+        m_devices(std::move(devices)), m_core_ops_scheduler(core_ops_scheduler), m_next_core_op_handle(0), m_unique_vdevice_hash(unique_vdevice_hash)
         {}
 
     static Expected<std::map<device_id_t, std::unique_ptr<Device>>> create_devices(const hailo_vdevice_params_t &params);
@@ -133,6 +134,7 @@ class VDeviceBase : public VDevice
     std::vector<std::shared_ptr<ConfiguredNetworkGroup>> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context
     ActiveCoreOpHolder m_active_core_op_holder;
     vdevice_core_op_handle_t m_next_core_op_handle;
+    const std::string m_unique_vdevice_hash; // Used to identify this vdevice in the monitor. consider removing - TODO (HRT-8835)
     std::mutex m_mutex;
 };
 
@@ -161,6 +163,8 @@ class VDeviceClient : public VDevice
     virtual hailo_status before_fork() override;
     virtual hailo_status after_fork_in_parent() override;
     virtual hailo_status after_fork_in_child() override;
+    virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
+    virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
 
 private:
     VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, VDeviceIdentifier &&identifier, std::vector<std::unique_ptr<hailort::Device>> &&devices);
@@ -201,7 +205,10 @@ class VDeviceHandle : public VDevice
     Expected<std::vector<std::reference_wrapper<Device>>> get_physical_devices() const override;
     Expected<std::vector<std::string>> get_physical_devices_ids() const override;
     Expected<hailo_stream_interface_t> get_default_streams_interface() const override;
-    Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path) override;
+    Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path,
+        const std::string &network_name = "") override;
+    virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
+    virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
 
 private:
     VDeviceHandle(uint32_t handle);
diff --git a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
index 39ab8a38..0cfa0b88 100644
--- a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
@@ -22,7 +22,11 @@ Expected<std::unique_ptr<VDeviceNativeInputStream>> VDeviceNativeInputStream::cr
     vdevice_core_op_handle_t core_op_handle)
 {
     std::unique_ptr<CallbackReorderQueue> reorder_queue = nullptr;
-    if (auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size()) {
+    // Ifaces of all streams should be the same
+    auto iface = streams.begin()->second.get().get_interface();
+    if ((iface != HAILO_STREAM_INTERFACE_ETH) && (iface != HAILO_STREAM_INTERFACE_MIPI)) {
+        auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size();
+        CHECK_EXPECTED(max_queue_size_per_stream);
         const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
         reorder_queue = make_unique_nothrow<CallbackReorderQueue>(max_queue_size);
         CHECK_NOT_NULL_AS_EXPECTED(reorder_queue, HAILO_OUT_OF_HOST_MEMORY);
@@ -134,7 +138,7 @@ hailo_status VDeviceNativeInputStream::write_impl(const MemoryView &buffer)
     TRACE(FrameEnqueueH2DTrace, m_core_op_handle, name());
 
     auto status = next_stream().write_impl(buffer);
-    if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){
+    if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){
         LOGGER__INFO("Failed write to stream {} (device: {}) with status={}", name(), m_next_transfer_stream, status);
         return status;
     }
@@ -160,7 +164,9 @@ Expected<size_t> VDeviceNativeInputStream::get_async_max_queue_size() const
     // transfers.
     auto &first_stream = m_streams.begin()->second.get();
     const auto max_queue_per_stream = first_stream.get_async_max_queue_size();
-    CHECK_EXPECTED(max_queue_per_stream);
+    if (!max_queue_per_stream) {
+        return make_unexpected(max_queue_per_stream.status()); // Not all streams has max_queue_size (e.g. eth) , so its not necessarily an error
+    }
 
     if (*max_queue_per_stream >= m_batch_size) {
         const auto batch_count_queued = *max_queue_per_stream / m_batch_size;
@@ -221,7 +227,11 @@ Expected<std::unique_ptr<VDeviceNativeOutputStream>> VDeviceNativeOutputStream::
     vdevice_core_op_handle_t core_op_handle)
 {
     std::unique_ptr<CallbackReorderQueue> reorder_queue = nullptr;
-    if (auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size()) {
+    // Ifaces of all streams should be the same
+    auto iface = streams.begin()->second.get().get_interface();
+    if ((iface != HAILO_STREAM_INTERFACE_ETH) && (iface != HAILO_STREAM_INTERFACE_MIPI)) {
+        auto max_queue_size_per_stream = streams.begin()->second.get().get_async_max_queue_size();
+        CHECK_EXPECTED(max_queue_size_per_stream);
         const auto max_queue_size = max_queue_size_per_stream.value() * streams.size();
         reorder_queue = make_unique_nothrow<CallbackReorderQueue>(max_queue_size);
         CHECK_NOT_NULL_AS_EXPECTED(reorder_queue, HAILO_OUT_OF_HOST_MEMORY);
@@ -317,7 +327,7 @@ hailo_stream_interface_t VDeviceNativeOutputStream::get_interface() const
 hailo_status VDeviceNativeOutputStream::read_impl(MemoryView buffer)
 {
     auto status = next_stream().read_impl(buffer);
-    if ((HAILO_STREAM_ABORTED_BY_USER == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){
+    if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)){
       LOGGER__INFO("Failed read from stream {} (device: {})", status, m_next_transfer_stream);
       return status;
     }
@@ -347,7 +357,9 @@ Expected<size_t> VDeviceNativeOutputStream::get_async_max_queue_size() const
     // transfers.
     auto &first_stream = m_streams.begin()->second.get();
     const auto max_queue_per_stream = first_stream.get_async_max_queue_size();
-    CHECK_EXPECTED(max_queue_per_stream);
+    if (!max_queue_per_stream) {
+        return make_unexpected(max_queue_per_stream.status()); // Not all streams has max_queue_size (e.g. eth) , so its not necessarily an error
+    }
 
     if (*max_queue_per_stream >= m_batch_size) {
         const auto batch_count_queued = *max_queue_per_stream / m_batch_size;
diff --git a/hailort/libhailort/src/vdma/CMakeLists.txt b/hailort/libhailort/src/vdma/CMakeLists.txt
index aed185a3..5641ff67 100644
--- a/hailort/libhailort/src/vdma/CMakeLists.txt
+++ b/hailort/libhailort/src/vdma/CMakeLists.txt
@@ -1,5 +1,22 @@
 cmake_minimum_required(VERSION 3.0.0)
 
+if(WIN32)
+    set(DRIVER_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/driver/os/windows")
+elseif(UNIX)
+    if (CMAKE_SYSTEM_NAME STREQUAL QNX)
+        set(DRIVER_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/driver/os/posix/qnx")
+    else()
+        set(DRIVER_OS_DIR "${CMAKE_CURRENT_SOURCE_DIR}/driver/os/posix/linux")
+    endif()
+else()
+    message(FATAL_ERROR "Unexpeced platform target, stopping build")
+endif()
+
+set(DRIVER_SRC_FILES
+    ${CMAKE_CURRENT_SOURCE_DIR}/driver/hailort_driver.cpp
+    ${DRIVER_OS_DIR}/driver_os_specific.cpp
+)
+
 set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/vdma_device.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/vdma_config_core_op.cpp
@@ -7,20 +24,30 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/vdma_stream.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/circular_stream_buffer_pool.cpp
 
+    ${CMAKE_CURRENT_SOURCE_DIR}/dma_mapped_buffer.cpp
+
     ${CMAKE_CURRENT_SOURCE_DIR}/pcie/pcie_device.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/integrated/integrated_device.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/channel/boundary_channel.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/channel/interrupts_dispatcher.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/channel/transfer_launcher.cpp
 
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/descriptor_list.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_buffer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_edge_layer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapped_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/dma_able_buffer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_buffer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_buffer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/memory/mapping_manager.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_edge_layer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_edge_layer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/memory/buffer_requirements.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/vdma_buffer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/continuous_buffer.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory/sg_buffer.cpp
+
+    ${DRIVER_SRC_FILES}
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
+
+# Export DRIVER_SRC_FILES as HALIO_DRIVER_SRC_FILES to parent scope
+set(HAILO_DRIVER_SRC_FILES ${DRIVER_SRC_FILES} PARENT_SCOPE)
\ No newline at end of file
diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
index 32873666..8c331051 100644
--- a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
+++ b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
@@ -12,7 +12,7 @@
 #include "common/os_utils.hpp"
 
 #include "vdma/channel/boundary_channel.hpp"
-#include "vdma/memory/vdma_buffer.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
 
 #include <list>
 #include <chrono>
@@ -25,31 +25,29 @@ namespace vdma {
 
 
 Expected<BoundaryChannelPtr> BoundaryChannel::create(vdma::ChannelId channel_id, Direction direction,
-    VdmaDevice &vdma_device, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name,
+    HailoRTDriver &driver, uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name,
     LatencyMeterPtr latency_meter)
 {
     hailo_status status = HAILO_UNINITIALIZED;
-    auto channel_ptr = make_shared_nothrow<BoundaryChannel>(channel_id, direction, vdma_device, descs_count,
+    auto channel_ptr = make_shared_nothrow<BoundaryChannel>(channel_id, direction, driver, descs_count,
         desc_page_size, stream_name, latency_meter, status);
     CHECK_NOT_NULL_AS_EXPECTED(channel_ptr, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating BoundaryChannel");
     return channel_ptr;
 }
 
-BoundaryChannel::BoundaryChannel(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device,
+BoundaryChannel::BoundaryChannel(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver,
                                  uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name,
                                  LatencyMeterPtr latency_meter, hailo_status &status) :
     m_channel_id(channel_id),
     m_direction(direction),
-    m_vdma_device(vdma_device),
-    m_driver(vdma_device.get_driver()),
-    m_host_registers(vdma_device.get_driver(), channel_id, direction),
+    m_driver(driver),
     m_desc_list(nullptr),
     m_stream_name(stream_name),
-    m_latency_meter(latency_meter),
     m_is_channel_activated(false),
     m_ongoing_transfers((latency_meter != nullptr) ? ONGOING_TRANSFERS_SIZE/2 : ONGOING_TRANSFERS_SIZE),
-    m_last_bounded_buffer(BoundedBuffer{nullptr, 0, 0})
+    m_latency_meter(latency_meter),
+    m_pending_latency_measurements(ONGOING_TRANSFERS_SIZE) // Make sure there will always be place for latency measure
 {
     if (Direction::BOTH == direction) {
         LOGGER__ERROR("Boundary channels must be unidirectional");
@@ -91,28 +89,13 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process
         return HAILO_STREAM_NOT_ACTIVATED;
     }
 
-    // Although the hw_num_processed should be a number between 0 and m_descs.size-1, if m_desc.size < 0x10000
-    // (the maximum desc size), the actual hw_num_processed is a number between 1 and m_descs.size. Therefore the
-    // value can be m_descs.size, in this case we change it to zero.
-    hw_num_processed = static_cast<uint16_t>(hw_num_processed & m_descs.size_mask);
-
     if (m_latency_meter != nullptr) {
-        // The latency meter gets an updated hw_num_processed via a call to vdma_interrupts_read_timestamps
-        // (the desc index of the last measured timestamp returned from that ioctl). Since update_latency_meter
-        // processed m_ongoing_transfers based on this hw_num_processed, and this function (i.e.
-        // trigger_channel_completion) also processes m_ongoing_transfers based on the value of hw_num_processed,
-        // we want the two to be the same. Hence, we'll use the more up to date num_processed returned by
-        // update_latency_meter.
-        // TODO: fix update_latency_meter flow (HRT-10284)
-        auto latency_meter_hw_num_processed = update_latency_meter();
-        CHECK_EXPECTED_AS_STATUS(latency_meter_hw_num_processed);
-        hw_num_processed = latency_meter_hw_num_processed.value();
+        CHECK_SUCCESS(update_latency_meter());
     }
 
     while (!m_ongoing_transfers.empty()) {
         // Reading previous_num_processed inside the loop since on_transfer_complete may increase this value.
         const auto previous_num_processed = static_cast<uint16_t>(CB_TAIL(m_descs));
-
         if (!is_transfer_complete(m_ongoing_transfers.front(), previous_num_processed, hw_num_processed)) {
             break;
         }
@@ -120,19 +103,7 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process
         auto transfer = std::move(m_ongoing_transfers.front());
         m_ongoing_transfers.pop_front();
 
-        hailo_status complete_status = HAILO_SUCCESS;
-
-        #ifndef NDEBUG
-            assert(!transfer.last_descs.empty());
-            auto &last_desc = (*m_desc_list)[transfer.last_descs.back()];
-            if (!last_desc.is_done() || last_desc.is_error()) {
-                LOGGER__ERROR("Error while processing descriptor {} of DMA {} on device {} DESC_STATUS=0x{:x}.",
-                    transfer.last_descs.back(), m_channel_id, m_driver.device_id(), last_desc.status());
-                complete_status = HAILO_INTERNAL_FAILURE;
-            }
-        #endif
-
-        on_transfer_complete(lock, transfer, complete_status);
+        on_transfer_complete(lock, transfer, HAILO_SUCCESS);
     }
 
     return HAILO_SUCCESS;
@@ -141,7 +112,7 @@ hailo_status BoundaryChannel::trigger_channel_completion(uint16_t hw_num_process
 CONTROL_PROTOCOL__host_buffer_info_t BoundaryChannel::get_boundary_buffer_info(uint32_t transfer_size) const
 {
     // Boundary channels always have scatter gather buffers
-    return VdmaBuffer::get_host_buffer_info(VdmaBuffer::Type::SCATTER_GATHER, m_desc_list->dma_address(), 
+    return VdmaEdgeLayer::get_host_buffer_info(VdmaEdgeLayer::Type::SCATTER_GATHER, m_desc_list->dma_address(), 
         m_desc_list->desc_page_size(), m_desc_list->count(), transfer_size);
 }
 
@@ -171,7 +142,7 @@ hailo_status BoundaryChannel::deactivate()
     return HAILO_SUCCESS;
 }
 
-hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request, bool user_owns_buffer)
+hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request)
 {
     std::unique_lock<std::mutex> lock(m_channel_mutex);
     if (!m_is_channel_activated) {
@@ -182,58 +153,86 @@ hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request
         return HAILO_QUEUE_IS_FULL;
     }
 
-    auto num_available = get_num_available();
+    auto num_available = static_cast<uint16_t>(CB_HEAD(m_descs));
     const uint16_t first_desc = num_available;
-    std::vector<uint16_t> transfer_last_descs;
+    uint16_t last_desc = std::numeric_limits<uint16_t>::max();
     uint16_t total_descs_count = 0;
 
-    for (size_t i = 0; i < transfer_request.transfer_buffers.size(); i++) {
-        auto mapped_buffer_exp = transfer_request.transfer_buffers[i].map_buffer(m_vdma_device, m_direction);
-        CHECK_EXPECTED_AS_STATUS(mapped_buffer_exp);
-        auto mapped_buffer = mapped_buffer_exp.release();
-
-        // Syncing the buffer to device change its ownership from host to the device.
-        // We sync on D2H as well if the user owns the buffer since the buffer might have been changed by
-        // the host between the time it was mapped and the current async transfer. If the buffer is not owned by the user,
-        // it won't be accessed for write.
-        if ((Direction::H2D == m_direction) || user_owns_buffer) {
-            auto status = transfer_request.transfer_buffers[i].synchronize(m_vdma_device, HailoRTDriver::DmaSyncDirection::TO_DEVICE);
-            CHECK_SUCCESS(status);
-        }
-
-        const auto desired_desc_num = m_desc_list->descriptors_in_buffer(transfer_request.transfer_buffers[i].size());
-        CHECK(desired_desc_num <= MAX_DESCS_COUNT, HAILO_INTERNAL_FAILURE);
-        const uint16_t desc_num = static_cast<uint16_t>(desired_desc_num);
-        assert(total_descs_count + desc_num < MAX_DESCS_COUNT);
-        total_descs_count = static_cast<uint16_t>(total_descs_count + desc_num);
+    const bool should_bind = !m_bounded_buffer;
+    if (!should_bind) {
+        CHECK_SUCCESS(validate_bound_buffer(transfer_request));
+    }
 
-        const auto last_desc_avail = static_cast<uint16_t>((num_available + desc_num - 1) & m_descs.size_mask);
+    std::vector<HailoRTDriver::TransferBuffer> driver_transfer_buffers;
 
-        transfer_last_descs.emplace_back(last_desc_avail);
+    auto current_num_available = num_available;
+    for (auto &transfer_buffer : transfer_request.transfer_buffers) {
+        TRY(auto mapped_buffer, transfer_buffer.map_buffer(m_driver, m_direction));
+        driver_transfer_buffers.emplace_back(HailoRTDriver::TransferBuffer{
+            mapped_buffer->handle(),
+            transfer_buffer.offset(),
+            transfer_buffer.size()
+        });
 
-        // Raise interrupt on last buffer
-        const auto should_buffer_raise_int = (i == (transfer_request.transfer_buffers.size() - 1));
-        auto status = prepare_descriptors(transfer_request.transfer_buffers[i].size(), num_available, mapped_buffer,
-            transfer_request.transfer_buffers[i].offset(), should_buffer_raise_int);
-        CHECK_SUCCESS(status);
+        const auto desired_desc_num = m_desc_list->descriptors_in_buffer(transfer_buffer.size());
+        CHECK(desired_desc_num <= MAX_SG_DESCS_COUNT, HAILO_INTERNAL_FAILURE);
+        const uint16_t desc_num = static_cast<uint16_t>(desired_desc_num);
+        assert(total_descs_count + desc_num < MAX_SG_DESCS_COUNT);
+        total_descs_count = static_cast<uint16_t>(total_descs_count + desc_num);
 
-        num_available = static_cast<uint16_t>((last_desc_avail + 1) & m_descs.size_mask);
+        last_desc = static_cast<uint16_t>((current_num_available + desc_num - 1) & m_descs.size_mask);
+        current_num_available = static_cast<uint16_t>((last_desc + 1) & m_descs.size_mask);
     }
 
+    auto first_desc_interrupts = InterruptsDomain::NONE;
     if ((nullptr != m_latency_meter) && (m_direction == Direction::H2D)) {
         // If we measure latency, we need an interrupt on the first descriptor for each H2D channel.
-        m_desc_list->program_single_descriptor((*m_desc_list)[first_desc], m_desc_list->desc_page_size(),
-            InterruptsDomain::HOST);
+        first_desc_interrupts = InterruptsDomain::HOST;
     }
+    const auto last_desc_interrupts = InterruptsDomain::HOST;
 
-    add_ongoing_transfer(std::move(transfer_request), first_desc, std::move(transfer_last_descs));
+    int num_processed = CB_TAIL(m_descs);
+    int num_free = CB_AVAIL(m_descs, num_available, num_processed);
+    if (total_descs_count > num_free) {
+        return HAILO_OUT_OF_DESCRIPTORS;
+    }
 
-    auto status = inc_num_available(total_descs_count);
-    CHECK_SUCCESS(status);
+    m_ongoing_transfers.push_back(OngoingTransfer{std::move(transfer_request), last_desc});
+    if (m_latency_meter) {
+        assert(!m_pending_latency_measurements.full());
+        m_pending_latency_measurements.push_back(m_direction == Direction::H2D ? first_desc : last_desc);
+    }
+    CB_ENQUEUE(m_descs, total_descs_count);
+
+    TRY(const auto desc_programmed, m_driver.launch_transfer(
+        m_channel_id,
+        m_desc_list->handle(),
+        num_available,
+        driver_transfer_buffers,
+        should_bind,
+        first_desc_interrupts,
+        last_desc_interrupts
+        ));
+    CHECK(total_descs_count == desc_programmed, HAILO_INTERNAL_FAILURE,
+        "Inconsistent desc programed expecting {} got {}", total_descs_count, desc_programmed);
 
     return HAILO_SUCCESS;
 }
 
+hailo_status BoundaryChannel::bind_buffer(MappedBufferPtr buffer)
+{
+    CHECK(m_bounded_buffer == nullptr, HAILO_INTERNAL_FAILURE,
+        "Buffer is already bound to channel {}", m_channel_id);
+    const auto expected_size = static_cast<size_t>(m_desc_list->desc_page_size()) * m_desc_list->count();
+    CHECK(buffer->size() == expected_size, HAILO_INVALID_ARGUMENT,
+        "Buffer size {} does not feet in desc list - descs count {} desc page size {}", buffer->size(),
+        m_desc_list->count(), m_desc_list->desc_page_size());
+    static const size_t DEFAULT_BUFFER_OFFSET = 0;
+    CHECK_SUCCESS(m_desc_list->configure_to_use_buffer(*buffer, buffer->size(), DEFAULT_BUFFER_OFFSET, m_channel_id));
+    m_bounded_buffer = buffer;
+    return HAILO_SUCCESS;
+}
+
 void BoundaryChannel::cancel_pending_transfers()
 {
     std::unique_lock<std::mutex> lock(m_channel_mutex);
@@ -241,7 +240,7 @@ void BoundaryChannel::cancel_pending_transfers()
         auto transfer = std::move(m_ongoing_transfers.front());
         m_ongoing_transfers.pop_front();
 
-        on_transfer_complete(lock, transfer, HAILO_STREAM_ABORTED_BY_USER);
+        on_transfer_complete(lock, transfer, HAILO_STREAM_ABORT);
     }
 }
 
@@ -255,16 +254,12 @@ size_t BoundaryChannel::get_max_ongoing_transfers(size_t transfer_size) const
     return std::min(max_transfers_in_buffer, m_ongoing_transfers.capacity());
 }
 
-Expected<uint16_t> BoundaryChannel::update_latency_meter()
+hailo_status BoundaryChannel::update_latency_meter()
 {
-    uint16_t last_num_processed = m_last_timestamp_num_processed;
-
-    auto timestamp_list = m_driver.vdma_interrupts_read_timestamps(m_channel_id);
-    CHECK_EXPECTED(timestamp_list);
-
-    if (0 == timestamp_list->count) {
-        // No new timestamps for this channel, return the previous result
-        return Expected<uint16_t>(last_num_processed);
+    TRY(auto timestamp_list, m_driver.vdma_interrupts_read_timestamps(m_channel_id));
+    if (0 == timestamp_list.count) {
+        // No new timestamps for this channel.
+        return HAILO_SUCCESS;
     }
 
     // TODO: now we have more iterations than we need. We know that the pending buffers + the timestamp list
@@ -272,157 +267,59 @@ Expected<uint16_t> BoundaryChannel::update_latency_meter()
     // also ongoing_transfers[i+1,i+2,...]
     // not in those timestamps
 
-    for (const auto &transfer : m_ongoing_transfers) {
-        uint16_t latency_desc = static_cast<uint16_t>(transfer.latency_measure_desc);
-        for (size_t i = 0; i < timestamp_list->count; i++) {
-            const auto &irq_timestamp = timestamp_list->timestamp_list[i];
-            const auto desc_num_processed = static_cast<uint16_t>(irq_timestamp.desc_num_processed & m_descs.size_mask);
-            if (is_desc_between(last_num_processed, desc_num_processed, latency_desc)) {
-                if (m_direction == Direction::H2D) {
-                    m_latency_meter->add_start_sample(irq_timestamp.timestamp);
-                }
-                else {
-                    m_latency_meter->add_end_sample(m_stream_name, irq_timestamp.timestamp);
-                }
-                break;
+    auto find_timestamp = [&](uint16_t latency_desc) -> Expected<std::chrono::nanoseconds> {
+        for (size_t i = 0; i < timestamp_list.count; i++) {
+            const auto &irq_timestamp = timestamp_list.timestamp_list[i];
+            if (is_desc_between(m_last_timestamp_num_processed, irq_timestamp.desc_num_processed, latency_desc)) {
+                return std::chrono::nanoseconds{irq_timestamp.timestamp};
             }
         }
+        return make_unexpected(HAILO_NOT_FOUND);
+    };
+
+    while (!m_pending_latency_measurements.empty()) {
+        auto timestamp = find_timestamp(m_pending_latency_measurements.front());
+        if (!timestamp) {
+            break;
+        }
+
+        if (m_direction == Direction::H2D) {
+            m_latency_meter->add_start_sample(*timestamp);
+        } else {
+            m_latency_meter->add_end_sample(m_stream_name, *timestamp);
+        }
+        m_pending_latency_measurements.pop_front();
     }
 
-    m_last_timestamp_num_processed = static_cast<uint16_t>(
-        timestamp_list->timestamp_list[timestamp_list->count-1].desc_num_processed & m_descs.size_mask);
-    return Expected<uint16_t>(m_last_timestamp_num_processed);
+    m_last_timestamp_num_processed = timestamp_list.timestamp_list[timestamp_list.count-1].desc_num_processed;
+    return HAILO_SUCCESS;
 }
 
 bool BoundaryChannel::is_transfer_complete(const OngoingTransfer &transfer, uint16_t previous_num_processed,
     uint16_t current_num_processed) const
 {
     // Transfer is complete if its last descriptor is in [previous_num_processed, current_num_processed) or
-    // the the buffer is empty (previous_num_processed == get_num_available())
-    assert(!transfer.last_descs.empty());
-    return is_desc_between(previous_num_processed, current_num_processed, transfer.last_descs.back()) ||
-        (current_num_processed == get_num_available());
+    // the the buffer is empty (previous_num_processed == CB_HEAD(m_descs))
+    return is_desc_between(previous_num_processed, current_num_processed, transfer.last_desc) ||
+        (current_num_processed == CB_HEAD(m_descs));
 }
 
 void BoundaryChannel::on_transfer_complete(std::unique_lock<std::mutex> &lock,
     OngoingTransfer &transfer, hailo_status complete_status)
 {
-    // Clear relevant descriptors from previous transfer
-    if (nullptr != m_latency_meter) {
-        m_desc_list->clear_descriptor(transfer.latency_measure_desc);
-    }
-
-    assert(!transfer.last_descs.empty());
-    for (const auto& last_desc : transfer.last_descs) {
-        m_desc_list->clear_descriptor(last_desc);
-    }
-
     // We increase desc num_proc (can happen only in this flow). After it is increased -
     //  1. On D2H channels - the output can be read by the user.
     //  2. On H2D channels - new input can be written to the buffer.
-    _CB_SET(m_descs.tail, (transfer.last_descs.back() + 1) & m_descs.size_mask);
+    _CB_SET(m_descs.tail, (transfer.last_desc + 1) & m_descs.size_mask);
 
     // Finally, we notify user callbacks registered with the transfer.
     // We want to make sure that the callbacks are called after the descriptors can be reused (So the user will
     // be able to start new transfer).
     lock.unlock();
-
-    if (Direction::D2H == m_direction) {
-        for (auto& transfer_buffer : transfer.request.transfer_buffers) {
-            auto sync_status = transfer_buffer.synchronize(m_vdma_device, HailoRTDriver::DmaSyncDirection::TO_HOST);
-            if (HAILO_SUCCESS != sync_status) {
-                LOGGER__ERROR("Failed to sync buffer for output channel {} device {}", m_channel_id, m_driver.device_id());
-                if (HAILO_SUCCESS != complete_status) {
-                    complete_status = sync_status;
-                }
-            }
-        }
-    }
-
     transfer.request.callback(complete_status);
     lock.lock();
 }
 
-hailo_status BoundaryChannel::prepare_descriptors(size_t transfer_size, uint16_t starting_desc,
-    MappedBufferPtr mapped_buffer, size_t buffer_offset, bool raise_interrupt)
-{
-    if (mapped_buffer != nullptr) {
-        CHECK((buffer_offset % m_desc_list->desc_page_size()) == 0, HAILO_INTERNAL_FAILURE,
-            "Buffer offset {} must be desc page size aligned {}", buffer_offset, m_desc_list->desc_page_size());
-        const size_t buffer_offset_in_descs = buffer_offset / m_desc_list->desc_page_size();
-        if (!is_buffer_already_configured(mapped_buffer, buffer_offset_in_descs, starting_desc)) {
-            // We need to configure the buffer now.
-
-            // First, store information on the buffer.
-            m_last_bounded_buffer.buffer = mapped_buffer;
-            m_last_bounded_buffer.starting_desc = starting_desc;
-            m_last_bounded_buffer.buffer_offset_in_descs = static_cast<uint16_t>(buffer_offset_in_descs);
-
-            // Now we want that m_desc_list[starting_desc] will be mapped into mapped_buffer[buffer_offset].
-            // The descriptors list configure always starts from buffer_offset=0, so in order to achieve our
-            // configuration, we configure the buffer starting from desc=(starting_desc - buffer_offset_in_desc).
-            // Then, after configuring buffer_offset bytes from the buffer, the desc_index will be starting desc.
-            const int desc_diff = static_cast<int>(starting_desc) - static_cast<int>(buffer_offset_in_descs);
-            const auto configure_starting_desc = static_cast<uint16_t>(m_descs.size + desc_diff) % m_descs.size;
-
-            // Finally do the actual configuration.
-            auto status = m_desc_list->configure_to_use_buffer(*mapped_buffer, m_channel_id, configure_starting_desc);
-            CHECK_SUCCESS(status);
-        }
-    }
-
-    auto last_desc_interrupts_domain = raise_interrupt ? InterruptsDomain::HOST : InterruptsDomain::NONE;
-    // TODO: HRT-11188 - fix starting_desc parameter
-    auto actual_desc_count = m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain,
-        starting_desc);
-    CHECK_EXPECTED_AS_STATUS(actual_desc_count, "Failed to program desc_list for channel {}", m_channel_id);
-
-    return HAILO_SUCCESS;
-}
-
-bool BoundaryChannel::is_buffer_already_configured(MappedBufferPtr buffer, size_t buffer_offset_in_descs,
-    size_t starting_desc) const
-{
-    if (m_last_bounded_buffer.buffer != buffer) {
-        // Last buffer is nullptr or not the same as the given.
-        return false;
-    }
-
-    // If the diff between starting_desc and m_last_bounded_buffer.starting_desc and the diff between
-    // buffer_offset_in_descs - m_last_bounded_buffer.buffer_offset_in_descs are equal, it means that the buffer is
-    // already configured.
-    // Note that we don't afraid of overflow since buffer_offset_in_descs * desc_page_size() must fit inside the buffer.
-    const auto starting_desc_diff = (starting_desc - m_last_bounded_buffer.starting_desc) % m_descs.size;
-    const auto buffer_offset_diff_in_descs = (buffer_offset_in_descs - m_last_bounded_buffer.buffer_offset_in_descs) % m_descs.size;
-    return starting_desc_diff == buffer_offset_diff_in_descs;
-}
-
-void BoundaryChannel::add_ongoing_transfer(TransferRequest &&transfer_request, uint16_t first_desc,
-        std::vector<uint16_t> &&last_descs)
-{
-    OngoingTransfer transfer{};
-    transfer.request = std::move(transfer_request);
-    transfer.last_descs = std::move(last_descs);
-    transfer.latency_measure_desc = (m_direction == HailoRTDriver::DmaDirection::H2D) ? first_desc :
-        transfer.last_descs.back();
-    m_ongoing_transfers.push_back(std::move(transfer));
-}
-
-hailo_status BoundaryChannel::inc_num_available(uint16_t value)
-{
-    int num_available = get_num_available();
-    int num_processed = CB_TAIL(m_descs);
-    int num_free = CB_AVAIL(m_descs, num_available, num_processed);
-    if (value > num_free) {
-        return HAILO_OUT_OF_DESCRIPTORS;
-    }
-
-    CB_ENQUEUE(m_descs, value);
-    num_available = (num_available + value) & m_descs.size_mask;
-
-    return m_host_registers.set_num_available(static_cast<uint16_t>(num_available));
-}
-
 bool BoundaryChannel::is_desc_between(uint16_t begin, uint16_t end, uint16_t desc)
 {
     if (begin == end) {
@@ -439,26 +336,6 @@ bool BoundaryChannel::is_desc_between(uint16_t begin, uint16_t end, uint16_t des
     }
 }
 
-uint16_t BoundaryChannel::get_num_available() const
-{
-    uint16_t num_available = (uint16_t)CB_HEAD(m_descs);
-
-#ifndef NDEBUG
-    // Validate synchronization with HW
-    auto hw_num_avail = m_host_registers.get_num_available();
-    assert(hw_num_avail);
-
-    // On case of channel aborted, the num_available is set to 0 (so we don't accept sync)
-    auto is_aborted_exp = m_host_registers.is_aborted();
-    assert(is_aborted_exp);
-
-    if (!is_aborted_exp.value()) {
-        assert(hw_num_avail.value() == num_available);
-    }
-#endif
-    return num_available;
-}
-
 hailo_status BoundaryChannel::allocate_descriptor_list(uint32_t descs_count, uint16_t desc_page_size)
 {
     static const bool CIRCULAR = true;
@@ -471,5 +348,23 @@ hailo_status BoundaryChannel::allocate_descriptor_list(uint32_t descs_count, uin
     return HAILO_SUCCESS;
 }
 
+hailo_status BoundaryChannel::validate_bound_buffer(TransferRequest &transfer_request)
+{
+    assert(m_bounded_buffer);
+    CHECK(transfer_request.transfer_buffers.size() == 1, HAILO_INTERNAL_FAILURE,
+        "When bound buffer is used, transfer request must contain only one buffer");
+
+    auto &transfer_buffer = transfer_request.transfer_buffers[0];
+    const auto num_available = CB_HEAD(m_descs);
+    const auto expected_offset = static_cast<size_t>(m_desc_list->desc_page_size()) * num_available;
+    CHECK(transfer_buffer.offset() == expected_offset, HAILO_INTERNAL_FAILURE,
+        "Unexpected buffer offset, expected {} actual {}", expected_offset, transfer_buffer.offset());
+    CHECK(transfer_buffer.base_buffer().data() == reinterpret_cast<const uint8_t*>(m_bounded_buffer->user_address()), HAILO_INTERNAL_FAILURE,
+        "Got the wrong buffer");
+    CHECK(transfer_buffer.base_buffer().size() == m_bounded_buffer->size(), HAILO_INTERNAL_FAILURE,
+        "Got invalid buffer size {}, expected {}", transfer_buffer.base_buffer().size(), m_bounded_buffer->size());
+    return HAILO_SUCCESS;
+}
+
 } /* namespace vdma */
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
index 38b7d026..8b136138 100644
--- a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
+++ b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
@@ -10,8 +10,6 @@
 #ifndef _HAILO_VDMA_BOUNDARY_CHANNEL_HPP_
 #define _HAILO_VDMA_BOUNDARY_CHANNEL_HPP_
 
-#include "vdma/vdma_device.hpp"
-#include "vdma/channel/vdma_channel_regs.hpp"
 #include "vdma/channel/channel_id.hpp"
 #include "vdma/memory/descriptor_list.hpp"
 #include "stream_common/transfer_common.hpp"
@@ -28,8 +26,7 @@ namespace vdma {
 
 struct OngoingTransfer {
     TransferRequest request;
-    std::vector<uint16_t> last_descs;
-    uint16_t latency_measure_desc;
+    uint16_t last_desc;
 };
 
 class BoundaryChannel;
@@ -39,10 +36,10 @@ class BoundaryChannel final
 public:
     using Direction = HailoRTDriver::DmaDirection;
 
-    static Expected<BoundaryChannelPtr> create(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device,
+    static Expected<BoundaryChannelPtr> create(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver,
         uint32_t descs_count, uint16_t desc_page_size, const std::string &stream_name = "", LatencyMeterPtr latency_meter = nullptr);
 
-    BoundaryChannel(vdma::ChannelId channel_id, Direction direction, VdmaDevice &vdma_device, uint32_t descs_count,
+    BoundaryChannel(vdma::ChannelId channel_id, Direction direction, HailoRTDriver &driver, uint32_t descs_count,
         uint16_t desc_page_size, const std::string &stream_name, LatencyMeterPtr latency_meter,
         hailo_status &status);
     BoundaryChannel(const BoundaryChannel &other) = delete;
@@ -60,13 +57,16 @@ class BoundaryChannel final
     hailo_status trigger_channel_completion(uint16_t hw_num_processed);
 
     // Calls all pending transfer callbacks (if they exist), marking them as canceled by passing
-    // HAILO_STREAM_ABORTED_BY_USER as a status to the callbacks.
+    // HAILO_STREAM_ABORT as a status to the callbacks.
     // Note: This function is to be called on a deactivated channel object. Calling on an active channel will lead to
     // unexpected results
     void cancel_pending_transfers();
 
-    // user_owns_buffer is set when the buffer is owned by the user (otherwise we may have some assumtions).
-    hailo_status launch_transfer(TransferRequest &&transfer_request, bool user_owns_buffer);
+    hailo_status launch_transfer(TransferRequest &&transfer_request);
+
+    // To avoid buffer bindings, one can call this function to statically bind a full buffer to the channel. The buffer
+    // size should be exactly desc_page_size() * descs_count() of current descriptors list.
+    hailo_status bind_buffer(MappedBufferPtr buffer);
 
     size_t get_max_ongoing_transfers(size_t transfer_size) const;
 
@@ -88,63 +88,49 @@ class BoundaryChannel final
     }
 
 private:
-    static void empty_transfer_done_callback(hailo_status){}
 
-    // Returns the desc index of the last desc whose timestamp was measured in the driver
-    Expected<uint16_t> update_latency_meter();
+    hailo_status update_latency_meter();
 
     bool is_transfer_complete(const OngoingTransfer &transfer, uint16_t previous_num_processed,
         uint16_t current_num_processed) const;
     void on_transfer_complete(std::unique_lock<std::mutex> &lock, OngoingTransfer &transfer,
         hailo_status complete_status);
-    hailo_status prepare_descriptors(size_t transfer_size, uint16_t starting_desc,
-        MappedBufferPtr mapped_buffer, size_t buffer_offset, bool raise_interrupt = true);
-
-    bool is_buffer_already_configured(MappedBufferPtr buffer, size_t buffer_offset_in_descs, size_t starting_desc) const;
-    void add_ongoing_transfer(TransferRequest &&transfer_request, uint16_t first_desc,
-        std::vector<uint16_t> &&last_descs);
 
     static bool is_desc_between(uint16_t begin, uint16_t end, uint16_t desc);
-    uint16_t get_num_available() const;
-    hailo_status inc_num_available(uint16_t value);
     hailo_status allocate_descriptor_list(uint32_t descs_count, uint16_t desc_page_size);
+    hailo_status validate_bound_buffer(TransferRequest &transfer_request);
 
     const vdma::ChannelId m_channel_id;
     const Direction m_direction;
-    VdmaDevice &m_vdma_device;
     HailoRTDriver &m_driver;
-    VdmaChannelRegs m_host_registers;
     std::shared_ptr<DescriptorList> m_desc_list; // Host side descriptor list
     const std::string m_stream_name;
     circbuf_t m_descs;
-    LatencyMeterPtr m_latency_meter;
     bool m_is_channel_activated;
     std::mutex m_channel_mutex;
     CircularArray<OngoingTransfer> m_ongoing_transfers;
 
-    // Contains the last num_processed of the last interrupt (only used on latency measurement)
+    // About HW latency measurements:
+    //  - For each ongoing transfer, we push some num-proc value to the pending_latency_measurements array. When this
+    //    descriptor is processed, we can add a sample to the latency meter.
+    //  - On H2D, the descriptor is the first descriptor on each transfer, so we start the measure after the first
+    //    vdma descriptor is processed. We don't measure on launch_transfer since the hw may be busy processing
+    //    requests. When the first descriptor is processed, we can be sure the hw has really started processing the
+    //    frame.
+    //  - On D2H, the descriptor is the last descriptor on each transfer, so we end the measure after the transfer is
+    //    processed.
+    //  - To get the timestamp, the read_timestamps ioctl is called. This ioctl returns pairs of num-processed and
+    //    and their interrupt timestamp, then, using m_last_timestamp_num_processed, we can check if some
+    //    pending_latency_measurement is done.
+    //  - We don't use m_ongoing_transfers to store the latency measurements because we to finish an ongoing transfer
+    //    we use hw num processed given by trigger_channel_completion, which may be different that the hw num processed
+    //    returned from read_timestamps_ioctl (one is measured in the ioctl and the other is measured in the interrupt).
+    LatencyMeterPtr m_latency_meter;
+    CircularArray<uint16_t> m_pending_latency_measurements;
     uint16_t m_last_timestamp_num_processed;
 
-    struct BoundedBuffer {
-        MappedBufferPtr buffer;
-
-        // The buffer is bounded starting from this descriptor.
-        uint16_t starting_desc;
-
-        // Offset inside the buffer (in desc_page_size granularity) of the "actual start" of the buffer.
-        // It implies that:
-        //      desc_list[starting_desc] will point to buffer[buffers_desc_offset * desc_page_size].
-        uint16_t buffer_offset_in_descs;
-    };
-
-    // We store the last bounded buffer as cache in order to avoid unnecessary descriptors list reprogramming.
-    // It is good enough to store only the last bounded buffer because we have two modes of execution:
-    //      1. User allocated buffers - On each transfer we bind new buffer. Even if the user always uses the same
-    //         buffers, due to the circular nature of descriptor list, reprogramming will almost always be needed (So
-    //         cacheing won't help).
-    //      2. Single circular buffer (internally) - In this case we don't need to bind each time (maybe after the
-    //         channel is re-activated). Caching the last bounded buffer is enough.
-    BoundedBuffer m_last_bounded_buffer;
+    // When bind_buffer is called, we keep a reference to the buffer here. This is used to avoid buffer bindings.
+    std::shared_ptr<MappedBuffer> m_bounded_buffer;
 };
 
 } /* namespace vdma */
diff --git a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp
index b039e41e..50b0c49d 100644
--- a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp
+++ b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp
@@ -10,7 +10,7 @@
 #ifndef _HAILO_VDMA_INTERRUPTS_DISPATCHER_HPP_
 #define _HAILO_VDMA_INTERRUPTS_DISPATCHER_HPP_
 
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 #include <thread>
 #include <functional>
 #include <condition_variable>
diff --git a/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp b/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp
new file mode 100644
index 00000000..96c5c93e
--- /dev/null
+++ b/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp
@@ -0,0 +1,128 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file transfer_launcher.cpp
+ * @brief Manages a thread that launches non-bound async vdma read/writes
+ **/
+
+#include "transfer_launcher.hpp"
+#include "common/utils.hpp"
+#include "common/os_utils.hpp"
+
+namespace hailort {
+namespace vdma {
+
+Expected<std::unique_ptr<TransferLauncher>> TransferLauncher::create()
+{
+    auto thread = make_unique_nothrow<TransferLauncher>();
+    CHECK_NOT_NULL_AS_EXPECTED(thread, HAILO_OUT_OF_HOST_MEMORY);
+    return thread;
+}
+
+TransferLauncher::TransferLauncher() :
+    m_mutex(),
+    m_cond(),
+    m_queue(),
+    m_should_quit(false),
+    m_thread_active(false),
+    m_worker_thread([this] { worker_thread(); })
+{}
+
+TransferLauncher::~TransferLauncher()
+{
+    const auto status = stop();
+    if (status != HAILO_SUCCESS) {
+        LOGGER__ERROR("Failed stopping transfer launcher thread on destructor");
+    }
+
+    if (m_worker_thread.joinable()) {
+        signal_thread_quit();
+        m_worker_thread.join();
+    }
+}
+
+hailo_status TransferLauncher::enqueue_transfer(Transfer &&transfer)
+{
+    {
+        std::lock_guard<std::mutex> lock(m_mutex);
+        m_queue.emplace(std::move(transfer));
+    }
+
+    m_cond.notify_one();
+    return HAILO_SUCCESS;
+}
+
+hailo_status TransferLauncher::start()
+{
+    {
+        std::lock_guard<std::mutex> lock(m_mutex);
+        CHECK(!m_thread_active, HAILO_INVALID_OPERATION, "Transfer launcher thread already running");
+
+        m_thread_active = true;
+    }
+    m_cond.notify_one();
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status TransferLauncher::stop()
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+
+    if (!m_thread_active) {
+        // Already stopped
+        return HAILO_SUCCESS;
+    }
+
+    m_thread_active = false;
+
+    while (!m_queue.empty()) {
+        m_queue.pop();
+        // TODO: need to call the callbacks to signal that they were aborted? (HRT-13110)
+        //       like this:
+        //         auto transfer_request = m_queue.front();
+        //         m_queue.pop();
+        //         transfer_request.callback(HAILO_STREAM_ABORT);
+        //       or can it be done in BoundaryChannel::cancel_pending_transfers?
+    }
+
+    // TODO: Keep stop flow used in interrupt thread? (HRT-13110)
+    //       E.g. look for comment "The wait is needed because otherwise, on a fast stop()..."
+
+    return HAILO_SUCCESS;
+}
+
+void TransferLauncher::worker_thread()
+{
+    OsUtils::set_current_thread_name("TRANSFR_LNCH");
+
+    while (true) {
+        Transfer transfer;
+        {
+            std::unique_lock<std::mutex> lock(m_mutex);
+            m_cond.wait(lock, [this] { return m_should_quit || (!m_queue.empty() && m_thread_active); });
+            if (m_should_quit) {
+                return;
+            }
+
+            // There's work to do
+            transfer = std::move(m_queue.front());
+            m_queue.pop();
+        }
+        transfer();
+    }
+}
+
+void TransferLauncher::signal_thread_quit()
+{
+    {
+        std::lock_guard<std::mutex> lock(m_mutex);
+        m_should_quit = true;
+    }
+    m_cond.notify_all();
+}
+
+} /* namespace vdma */
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp b/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp
new file mode 100644
index 00000000..87136848
--- /dev/null
+++ b/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp
@@ -0,0 +1,62 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file transfer_launcher.hpp
+ * @brief Manages a thread that launches non-bound async vdma read/writes
+ **/
+
+#ifndef _HAILO_TRANSFER_LAUNCHER_HPP_
+#define _HAILO_TRANSFER_LAUNCHER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+
+#include <memory>
+#include <functional>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#include <queue>
+
+namespace hailort {
+namespace vdma {
+
+class TransferLauncher final
+{
+public:
+    // TODO: fix this to be a proper transfer object (HRT-13110)
+    using Transfer = std::function<void()>;
+
+    static Expected<std::unique_ptr<TransferLauncher>> create();
+    TransferLauncher();
+    ~TransferLauncher();
+
+    TransferLauncher(TransferLauncher &&) = delete;
+    TransferLauncher(const TransferLauncher &) = delete;
+    TransferLauncher &operator=(TransferLauncher &&) = delete;
+    TransferLauncher &operator=(const TransferLauncher &) = delete;
+
+    hailo_status enqueue_transfer(Transfer &&transfer);
+    hailo_status start();
+    hailo_status stop();
+
+private:
+    void worker_thread();
+    void signal_thread_quit();
+
+    std::mutex m_mutex;
+    std::condition_variable m_cond;
+    // TODO: use SpscQueue (HRT-10554)
+    std::queue<Transfer> m_queue;
+    // m_should_quit is used to quit the thread (called on destruction)
+    bool m_should_quit;
+    bool m_thread_active;
+    std::thread m_worker_thread;
+};
+
+} /* namespace vdma */
+} /* namespace hailort */
+
+#endif /* _HAILO_TRANSFER_LAUNCHER_HPP_ */
diff --git a/hailort/libhailort/src/vdma/channel/vdma_channel_regs.hpp b/hailort/libhailort/src/vdma/channel/vdma_channel_regs.hpp
deleted file mode 100644
index 22990484..00000000
--- a/hailort/libhailort/src/vdma/channel/vdma_channel_regs.hpp
+++ /dev/null
@@ -1,81 +0,0 @@
-/**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file vdma_channel_regs.hpp
- * @brief utilties used to parse/modify PLDA Vdma channel registers
- **/
-
-#ifndef _HAILO_VDMA_CHANNEL__REGS_HPP_
-#define _HAILO_VDMA_CHANNEL__REGS_HPP_
-
-#include "hw_consts.hpp"
-#include "hailo/expected.hpp"
-#include "os/hailort_driver.hpp"
-
-#include <cstdint>
-
-namespace hailort
-{
-
-#define DESCPRIPTOR_LIST_MAX_DEPTH (16)
-
-
-inline bool vdma_channel_control_is_aborted(uint8_t control_reg)
-{
-    return (control_reg & 1) == 0;
-}
-
-
-class VdmaChannelRegs final {
-public:
-    VdmaChannelRegs(HailoRTDriver &driver, vdma::ChannelId channel_id, HailoRTDriver::DmaDirection direction) :
-        m_driver(driver),
-        m_channel_id(channel_id),
-        m_direction(direction)
-    {}
-
-    Expected<uint16_t> get_num_available() const
-    {
-        return read_integer<uint16_t>(VDMA_CHANNEL_NUM_AVAIL_OFFSET);
-    }
-
-    hailo_status set_num_available(uint16_t value)
-    {
-        return write_integer<uint16_t>(VDMA_CHANNEL_NUM_AVAIL_OFFSET, value);
-    }
-
-#ifndef NDEBUG
-    Expected<bool> is_aborted() const
-    {
-        const auto control_reg = read_integer<uint8_t>(VDMA_CHANNEL_CONTROL_OFFSET);
-        CHECK_EXPECTED(control_reg);
-        return vdma_channel_control_is_aborted(*control_reg);
-    }
-#endif /* NDEBUG */
-
-private:
-
-    template<typename IntegerType>
-    Expected<IntegerType> read_integer(uint32_t offset) const
-    {
-        auto value = m_driver.read_vdma_channel_register(m_channel_id, m_direction, offset, sizeof(IntegerType));
-        CHECK_EXPECTED(value);
-        return static_cast<IntegerType>(value.release());
-    }
-
-    template<typename IntegerType>
-    hailo_status write_integer(uint32_t offset, IntegerType value)
-    {
-        return m_driver.write_vdma_channel_register(m_channel_id, m_direction, offset,  sizeof(value), value);
-    }
-
-    HailoRTDriver &m_driver;
-    const vdma::ChannelId m_channel_id;
-    const HailoRTDriver::DmaDirection m_direction;
-};
-
-} /* namespace hailort */
-
-#endif /*_HAILO_VDMA_CHANNEL__REGS_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
index f6d8e4b1..6be62879 100644
--- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
+++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
@@ -8,6 +8,7 @@
 
 #include "circular_stream_buffer_pool.hpp"
 #include "vdma/memory/descriptor_list.hpp"
+#include "utils/buffer_storage.hpp"
 
 #include "utils.h"
 
@@ -15,36 +16,37 @@ namespace hailort
 {
 
 Expected<std::unique_ptr<CircularStreamBufferPool>> CircularStreamBufferPool::create(VdmaDevice &device,
-    HailoRTDriver::DmaDirection direction, size_t desc_page_size, size_t descs_count, size_t transfer_size)
+    hailo_dma_buffer_direction_t direction, size_t desc_page_size, size_t descs_count, size_t transfer_size)
 {
     // TODO: HRT-11220 calculate desc_count/desc_page_size base on transfer_size and queue_size
-    CHECK_AS_EXPECTED(is_powerof2(descs_count), HAILO_INTERNAL_FAILURE, "descs_count {} must be power of 2", descs_count);
-    CHECK_AS_EXPECTED(is_powerof2(desc_page_size), HAILO_INTERNAL_FAILURE, "desc_page_size {} must be power of 2",
+    CHECK(is_powerof2(descs_count), HAILO_INTERNAL_FAILURE, "descs_count {} must be power of 2", descs_count);
+    CHECK(is_powerof2(desc_page_size), HAILO_INTERNAL_FAILURE, "desc_page_size {} must be power of 2",
         desc_page_size);
 
     const auto buffer_size = desc_page_size * descs_count;
-    CHECK_AS_EXPECTED(transfer_size < buffer_size, HAILO_INTERNAL_FAILURE, "Transfer size {} must be smaller than buffer size {}",
+    CHECK(transfer_size < buffer_size, HAILO_INTERNAL_FAILURE, "Transfer size {} must be smaller than buffer size {}",
         transfer_size, buffer_size);
 
-    auto mapped_buffer = allocate_buffer(device, direction, buffer_size);
-    CHECK_EXPECTED(mapped_buffer);
+    TRY(auto base_buffer, allocate_buffer(device, buffer_size));
+    TRY(auto mapping, DmaMappedBuffer::create(device, base_buffer.data(), base_buffer.size(), direction));
 
     auto circular_buffer_pool = make_unique_nothrow<CircularStreamBufferPool>(desc_page_size, descs_count,
-        transfer_size, mapped_buffer.release());
-    CHECK_NOT_NULL_AS_EXPECTED(circular_buffer_pool, HAILO_OUT_OF_HOST_MEMORY);
+        transfer_size, std::move(base_buffer), std::move(mapping));
+    CHECK_NOT_NULL(circular_buffer_pool, HAILO_OUT_OF_HOST_MEMORY);
 
     return circular_buffer_pool;
 }
 
 CircularStreamBufferPool::CircularStreamBufferPool(size_t desc_page_size, size_t descs_count, size_t transfer_size,
-    BufferPtr &&mapped_buffer) :
+    Buffer &&base_buffer, DmaMappedBuffer &&mappings) :
         m_desc_page_size(desc_page_size),
         m_transfer_size(transfer_size),
-        m_mapped_buffer(std::move(mapped_buffer)),
+        m_base_buffer(std::move(base_buffer)),
+        m_mappings(std::move(mappings)),
         m_next_enqueue_desc_offset(0)
 {
     assert(is_powerof2(descs_count) && (descs_count > 0));
-    assert(m_mapped_buffer->size() == (m_desc_page_size * descs_count));
+    assert(m_base_buffer.size() == (m_desc_page_size * descs_count));
     CB_INIT(m_queue, descs_count);
     m_queue.head = static_cast<int>(descs_count - 1);
 }
@@ -67,7 +69,7 @@ Expected<TransferBuffer> CircularStreamBufferPool::dequeue()
     const size_t offset_in_buffer = CB_TAIL(m_queue) * m_desc_page_size;
     CB_DEQUEUE(m_queue, descs_in_transfer());
     return TransferBuffer {
-        m_mapped_buffer,
+        MemoryView(m_base_buffer),
         m_transfer_size,
         offset_in_buffer
     };
@@ -78,7 +80,7 @@ hailo_status CircularStreamBufferPool::enqueue(TransferBuffer &&buffer_info)
     const size_t descs_required = descs_in_transfer();
     const size_t descs_available = CB_AVAIL(m_queue, CB_HEAD(m_queue), CB_TAIL(m_queue));
     CHECK(descs_available >= descs_required, HAILO_INTERNAL_FAILURE, "Can enqueue without previous dequeue");
-    CHECK(buffer_info.base_buffer() == m_mapped_buffer, HAILO_INTERNAL_FAILURE, "Got the wrong buffer");
+    CHECK(buffer_info.base_buffer().data() == m_base_buffer.data(), HAILO_INTERNAL_FAILURE, "Got the wrong buffer");
     CHECK(buffer_info.size() == m_transfer_size, HAILO_INTERNAL_FAILURE, "Got invalid buffer size {}, expected {}",
         buffer_info.size(), m_transfer_size);
 
@@ -99,24 +101,14 @@ void CircularStreamBufferPool::reset_pointers()
     m_next_enqueue_desc_offset = 0;
 }
 
-Expected<BufferPtr> CircularStreamBufferPool::allocate_buffer(VdmaDevice &device,
-    HailoRTDriver::DmaDirection direction, size_t size)
+Expected<Buffer> CircularStreamBufferPool::allocate_buffer(VdmaDevice &device, size_t size)
 {
-    auto dma_able_buffer = vdma::DmaAbleBuffer::create_by_allocation(size, device.get_driver());
-    CHECK_EXPECTED(dma_able_buffer);
+    TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_by_allocation(size, device.get_driver()));
 
-    auto dma_storage = make_shared_nothrow<DmaStorage>(dma_able_buffer.release());
+    auto dma_storage = make_shared_nothrow<DmaStorage>(std::move(dma_able_buffer));
     CHECK_NOT_NULL_AS_EXPECTED(dma_storage, HAILO_OUT_OF_HOST_MEMORY);
 
-    // TODO HRT-11595: We map the buffer here to avoid mapping buffer during descriptors list creation (it cause
-    // deadlock on the linux driver). After HRT-11595, we won't need to call dma_map.
-    auto map_result = dma_storage->dma_map(device, to_hailo_dma_direction(direction));
-    CHECK_EXPECTED(map_result);
-
-    auto mapped_buffer = make_shared_nothrow<Buffer>(std::move(dma_storage));
-    CHECK_NOT_NULL_AS_EXPECTED(mapped_buffer, HAILO_OUT_OF_HOST_MEMORY);
-
-    return mapped_buffer;
+    return Buffer::create(dma_storage);
 }
 
 size_t CircularStreamBufferPool::descs_in_transfer() const
diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
index 04fd9028..4fd87653 100644
--- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
+++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
@@ -14,6 +14,7 @@
 #include "common/circular_buffer.hpp"
 #include "stream_common/stream_buffer_pool.hpp"
 #include "vdma/vdma_device.hpp"
+#include "hailo/dma_mapped_buffer.hpp"
 
 #include <condition_variable>
 
@@ -29,10 +30,10 @@ namespace hailort
 class CircularStreamBufferPool final : public StreamBufferPool {
 public:
     static Expected<std::unique_ptr<CircularStreamBufferPool>> create(VdmaDevice &device,
-        HailoRTDriver::DmaDirection direction, size_t desc_page_size, size_t descs_count, size_t transfer_size);
+        hailo_dma_buffer_direction_t direction, size_t desc_page_size, size_t descs_count, size_t transfer_size);
 
     CircularStreamBufferPool(size_t desc_page_size, size_t descs_count, size_t transfer_size,
-        BufferPtr &&mapped_buffer);
+        Buffer &&base_buffer, DmaMappedBuffer &&mappings);
 
     virtual size_t max_queue_size() const override;
     size_t buffers_ready_to_dequeue() const;
@@ -41,13 +42,12 @@ class CircularStreamBufferPool final : public StreamBufferPool {
 
     virtual hailo_status enqueue(TransferBuffer &&buffer_info) override;
 
-    BufferPtr get_mapped_buffer() { return m_mapped_buffer; }
+    Buffer &get_base_buffer() { return m_base_buffer; }
 
     virtual void reset_pointers() override;
 
 private:
-    static Expected<BufferPtr> allocate_buffer(VdmaDevice &device,
-        HailoRTDriver::DmaDirection direction, size_t size);
+    static Expected<Buffer> allocate_buffer(VdmaDevice &device, size_t size);
 
     size_t descs_in_transfer() const;
 
@@ -57,7 +57,8 @@ class CircularStreamBufferPool final : public StreamBufferPool {
     const size_t m_transfer_size;
 
     // m_mapped_buffer.size() must be CB_SIZE(m_queue) * m_desc_page_size
-    BufferPtr m_mapped_buffer;
+    Buffer m_base_buffer;
+    DmaMappedBuffer m_mappings;
 
     // Head/tail based queue that manages the buffer pool.
     // The head and tail are in m_desc_page_size granularity.
diff --git a/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp b/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp
new file mode 100644
index 00000000..b770b921
--- /dev/null
+++ b/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp
@@ -0,0 +1,93 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file dma_mapped_buffer.cpp
+ **/
+
+#include "hailo/dma_mapped_buffer.hpp"
+#include "hailo/hailort.h"
+#include "hailo/vdevice.hpp"
+
+#include "common/logger_macros.hpp"
+#include "common/utils.hpp"
+
+namespace hailort
+{
+
+class DmaMappedBuffer::Impl final {
+public:
+    Impl(VDevice &vdevice, void *address, size_t size, hailo_dma_buffer_direction_t direction, hailo_status &status)
+    {
+        create_mapping(vdevice, address, size, direction, status);
+    }
+
+    Impl(Device &device, void *address, size_t size, hailo_dma_buffer_direction_t direction, hailo_status &status)
+    {
+        create_mapping(device, address, size, direction, status);
+    }
+
+    ~Impl()
+    {
+        if (m_unmap) {
+            m_unmap();
+        }
+    }
+
+    Impl(const Impl&) = delete;
+    Impl& operator=(const Impl&) = delete;
+
+private:
+
+    template<typename DeviceType>
+    void create_mapping(DeviceType &device, void *address, size_t size, hailo_dma_buffer_direction_t direction, hailo_status &status) {
+        status = device.dma_map(address, size, direction);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed to map dma buffer, status: {}", status);
+            return;
+        }
+
+        m_unmap = [&device, address, size, direction]() {
+            auto status = device.dma_unmap(address, size, direction);
+            if (HAILO_SUCCESS != status) {
+                LOGGER__ERROR("Failed to unmap dma buffer, status: {}", status);
+            }
+        };
+    }
+
+    std::function<void()> m_unmap;
+};
+
+Expected<DmaMappedBuffer> DmaMappedBuffer::create(VDevice &vdevice, void *user_address, size_t size,
+    hailo_dma_buffer_direction_t direction) {
+
+    hailo_status status = HAILO_UNINITIALIZED;
+    std::unique_ptr<Impl> impl(new (std::nothrow) Impl(vdevice, user_address, size, direction, status));
+    CHECK_NOT_NULL_AS_EXPECTED(impl, HAILO_OUT_OF_HOST_MEMORY);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return Expected<DmaMappedBuffer>(DmaMappedBuffer{std::move(impl)});
+}
+
+Expected<DmaMappedBuffer> DmaMappedBuffer::create(Device &device, void *user_address, size_t size,
+    hailo_dma_buffer_direction_t direction) {
+
+    hailo_status status = HAILO_UNINITIALIZED;
+    std::unique_ptr<Impl> impl(new (std::nothrow) Impl(device, user_address, size, direction, status));
+    CHECK_NOT_NULL_AS_EXPECTED(impl, HAILO_OUT_OF_HOST_MEMORY);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return Expected<DmaMappedBuffer>(DmaMappedBuffer{std::move(impl)});
+}
+
+// Defined in cpp since Impl definition is needed.
+DmaMappedBuffer::~DmaMappedBuffer() = default;
+DmaMappedBuffer::DmaMappedBuffer(DmaMappedBuffer &&) = default;
+DmaMappedBuffer &DmaMappedBuffer::operator=(DmaMappedBuffer &&) = default;
+
+DmaMappedBuffer::DmaMappedBuffer(std::unique_ptr<Impl> impl) :
+    m_impl(std::move(impl))
+{}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/os/posix/hailort_driver.cpp b/hailort/libhailort/src/vdma/driver/hailort_driver.cpp
similarity index 62%
rename from hailort/libhailort/src/os/posix/hailort_driver.cpp
rename to hailort/libhailort/src/vdma/driver/hailort_driver.cpp
index c3720fc8..36f0c204 100755
--- a/hailort/libhailort/src/os/posix/hailort_driver.cpp
+++ b/hailort/libhailort/src/vdma/driver/hailort_driver.cpp
@@ -1,22 +1,29 @@
-#include "os/hailort_driver.hpp"
-#include "os/driver_scan.hpp"
-#include "hailo_ioctl_common.h"
+/**
+ * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file hailort_driver.cpp
+ * @brief Low level interface to PCI driver
+ **/
+
+#include "vdma/driver/hailort_driver.hpp"
+#include "vdma/driver/os/driver_os_specific.hpp"
+
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
+#include "hailo_ioctl_common.h"
 
-#include <sys/types.h>
-#include <sys/stat.h>
+#if defined(__linux__)
 #include <sys/mman.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <unistd.h>
+#elif defined(__QNX__)
 #include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <stdint.h>
-#include <errno.h>
+#include <sys/mman.h>
+#elif defined(_WIN32)
+#pragma comment(lib, "cfgmgr32.lib")
+#else
+#error "unsupported platform!"
+#endif
 
 namespace hailort
 {
@@ -24,6 +31,20 @@ namespace hailort
 static_assert(VDMA_CHANNELS_PER_ENGINE == MAX_VDMA_CHANNELS_PER_ENGINE, "Driver and libhailort parameters mismatch");
 static_assert(MAX_VDMA_ENGINES == MAX_VDMA_ENGINES_COUNT, "Driver and libhailort parameters mismatch");
 static_assert(MIN_D2H_CHANNEL_INDEX == VDMA_DEST_CHANNELS_START, "Driver and libhailort parameters mismatch");
+static_assert(ONGOING_TRANSFERS_SIZE == HAILO_VDMA_MAX_ONGOING_TRANSFERS, "Driver and libhailort parameters mismatch");
+static_assert(MAX_IRQ_TIMESTAMPS_SIZE == CHANNEL_IRQ_TIMESTAMPS_SIZE, "Driver and libhailort parameters mismatch");
+
+static_assert(static_cast<int>(InterruptsDomain::NONE) == HAILO_VDMA_INTERRUPTS_DOMAIN_NONE, "Driver and libhailort parameters mismatch");
+static_assert(static_cast<int>(InterruptsDomain::HOST) == HAILO_VDMA_INTERRUPTS_DOMAIN_HOST, "Driver and libhailort parameters mismatch");
+static_assert(static_cast<int>(InterruptsDomain::DEVICE) == HAILO_VDMA_INTERRUPTS_DOMAIN_DEVICE, "Driver and libhailort parameters mismatch");
+static_assert(static_cast<int>(InterruptsDomain::BOTH) ==
+    (HAILO_VDMA_INTERRUPTS_DOMAIN_DEVICE | HAILO_VDMA_INTERRUPTS_DOMAIN_HOST), "Driver and libhailort parameters mismatch");
+
+
+#define CHECK_IOCTL_RESULT(err, message) do { \
+        auto __err = (err); \
+        CHECK(0 == __err, HAILO_DRIVER_FAIL, message " errno: {}", __err); \
+    } while (0)
 
 static hailo_dma_data_direction direction_to_dma_data_direction(HailoRTDriver::DmaDirection direction) {
     switch (direction) {
@@ -41,7 +62,7 @@ static hailo_dma_data_direction direction_to_dma_data_direction(HailoRTDriver::D
 }
 
 static enum hailo_cpu_id translate_cpu_id(hailo_cpu_id_t cpu_id)
-{   
+{
     switch (cpu_id) {
     case HAILO_CPU_ID_0:
         return HAILO_CPU_ID_CPU0;
@@ -87,31 +108,22 @@ static hailo_transfer_memory_type translate_memory_type(HailoRTDriver::MemoryTyp
     return HAILO_TRANSFER_MEMORY_MAX_ENUM;
 }
 
-static Expected<ChannelInterruptTimestampList> create_interrupt_timestamp_list(
-    hailo_vdma_interrupts_read_timestamp_params &inter_data)
-{
-    CHECK_AS_EXPECTED(inter_data.timestamps_count <= MAX_IRQ_TIMESTAMPS_SIZE, HAILO_DRIVER_FAIL,
-        "Invalid channel interrupts timestamps count returned {}", inter_data.timestamps_count);
-    ChannelInterruptTimestampList timestamp_list{};
-
-    timestamp_list.count = inter_data.timestamps_count;
-    for (size_t i = 0; i < timestamp_list.count; i++) {
-        timestamp_list.timestamp_list[i].timestamp = std::chrono::nanoseconds(inter_data.timestamps[i].timestamp_ns);
-        timestamp_list.timestamp_list[i].desc_num_processed = inter_data.timestamps[i].desc_num_processed;
-    }
-    return timestamp_list;
-}
-
 // TODO: validate wraparounds for buffer/mapping handles in the driver (HRT-9509)
 const uintptr_t HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE;
 const size_t HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE = INVALID_DRIVER_HANDLE_VALUE;
 const uint8_t HailoRTDriver::INVALID_VDMA_CHANNEL_INDEX = INVALID_VDMA_CHANNEL;
 
+#if defined(__linux__) || defined(_WIN32)
+const vdma_mapped_buffer_driver_identifier HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER = INVALID_DRIVER_HANDLE_VALUE;
+#elif __QNX__
+const vdma_mapped_buffer_driver_identifier HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER = -1;
+#else
+#error "unsupported platform!"
+#endif
+
 Expected<std::unique_ptr<HailoRTDriver>> HailoRTDriver::create(const DeviceInfo &device_info)
 {
-    auto fd = FileDescriptor(open(device_info.dev_path.c_str(), O_RDWR));
-    CHECK_AS_EXPECTED(fd >= 0, HAILO_DRIVER_FAIL,
-        "Failed to open device file {} with error {}", device_info.dev_path, errno);
+    TRY(auto fd, open_device_file(device_info.dev_path));
 
     hailo_status status = HAILO_UNINITIALIZED;
     std::unique_ptr<HailoRTDriver> driver(new (std::nothrow) HailoRTDriver(device_info, std::move(fd), status));
@@ -121,45 +133,6 @@ Expected<std::unique_ptr<HailoRTDriver>> HailoRTDriver::create(const DeviceInfo
     return driver;
 }
 
-#if defined(__linux__)
-static bool is_blocking_ioctl(unsigned long request)
-{
-    switch (request) {
-    case HAILO_VDMA_INTERRUPTS_WAIT:
-    case HAILO_FW_CONTROL:
-    case HAILO_READ_NOTIFICATION:
-        return true;
-    default:
-        return false;
-    }
-}
-
-hailo_status HailoRTDriver::hailo_ioctl(int fd, unsigned long request, void* request_struct, int &error_status)
-{
-    // We lock m_driver lock on all request but the blocking onces. Read m_driver_lock doc in the header
-    std::unique_lock<std::mutex> lock;
-    if (!is_blocking_ioctl(request)) {
-        lock = std::unique_lock<std::mutex>(m_driver_lock);
-    }
-
-    int res = ioctl(fd, request, request_struct);
-    error_status = errno;
-    return (res >= 0) ? HAILO_SUCCESS : HAILO_DRIVER_FAIL;
-}
-#elif defined(__QNX__)
-hailo_status HailoRTDriver::hailo_ioctl(int fd, unsigned long request, void* request_struct, int &error_status)
-{
-    int res = ioctl(fd, static_cast<int>(request), request_struct);
-    if (0 > res) {
-        error_status = -res;
-        return HAILO_DRIVER_FAIL;
-    }
-    return HAILO_SUCCESS;
-}
-#else
-#error "Unsupported platform"
-#endif
-
 static hailo_status validate_driver_version(const hailo_driver_info &driver_info)
 {
     hailo_version_t library_version{};
@@ -179,24 +152,24 @@ HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd,
     m_device_info(device_info),
     m_allocate_driver_buffer(false)
 {
-    hailo_driver_info driver_info = {};
-    int err = 0;
-    if (HAILO_SUCCESS != (status = hailo_ioctl(m_fd, HAILO_QUERY_DRIVER_INFO, &driver_info, err))) {
-        LOGGER__ERROR("Failed query driver info, errno {}", err);
+    hailo_driver_info driver_info{};
+    auto err = run_ioctl(HAILO_QUERY_DRIVER_INFO, &driver_info);
+    if (0 != err) {
+        LOGGER__ERROR("Failed to query driver info, errno {}", err);
+        status = HAILO_DRIVER_FAIL;
         return;
     }
-    LOGGER__INFO("Hailo PCIe driver version {}.{}.{}", driver_info.major_version,
-        driver_info.minor_version, driver_info.revision_version);
-
     status = validate_driver_version(driver_info);
     if (HAILO_SUCCESS != status) {
         LOGGER__ERROR("Driver version mismatch, status {}", status);
         return;
     }
 
-    hailo_device_properties device_properties = {};
-    if (HAILO_SUCCESS != (status = hailo_ioctl(m_fd, HAILO_QUERY_DEVICE_PROPERTIES, &device_properties, err))) {
+    hailo_device_properties device_properties{};
+    err = run_ioctl(HAILO_QUERY_DEVICE_PROPERTIES, &device_properties);
+    if (0 != err) {
         LOGGER__ERROR("Failed query pcie device properties, errno {}", err);
+        status = HAILO_DRIVER_FAIL;
         return;
     }
 
@@ -226,34 +199,16 @@ HailoRTDriver::HailoRTDriver(const DeviceInfo &device_info, FileDescriptor &&fd,
     status = HAILO_SUCCESS;
 }
 
-Expected<std::vector<uint8_t>> HailoRTDriver::read_notification()
-{
-    hailo_d2h_notification notification_buffer = {};
-
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_READ_NOTIFICATION, &notification_buffer, err);
-    if (HAILO_SUCCESS != status) {
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    std::vector<uint8_t> notification(notification_buffer.buffer_len);
-    memcpy(notification.data(), notification_buffer.buffer, notification_buffer.buffer_len);
-    return notification;
-}
-
-hailo_status HailoRTDriver::disable_notifications()
+HailoRTDriver::~HailoRTDriver()
 {
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_DISABLE_NOTIFICATION, 0, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("HAILO_DISABLE_NOTIFICATION failed with errno: {}", err);
-        return HAILO_DRIVER_FAIL;
+    for (const auto &buffer_info : m_mapped_buffer) {
+        auto status = vdma_buffer_unmap_ioctl(buffer_info.handle);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed to unmap buffer handle {} status {}", buffer_info.handle, status);
+        }
     }
-
-    return HAILO_SUCCESS;
 }
 
-#if defined(__linux__)
 Expected<std::vector<HailoRTDriver::DeviceInfo>> HailoRTDriver::scan_devices()
 {
     auto device_names = list_devices();
@@ -267,74 +222,6 @@ Expected<std::vector<HailoRTDriver::DeviceInfo>> HailoRTDriver::scan_devices()
     }
     return devices_info;
 }
-#elif defined(__QNX__)
-Expected<std::vector<HailoRTDriver::DeviceInfo>> HailoRTDriver::scan_devices()
-{
-    auto device_names = list_devices();
-    CHECK_EXPECTED(device_names, "Failed listing pcie devices");
-
-    // TODO: HRT-6785 - support multiple devices - currently device_names is vector of one device - in future will be multiple
-    std::vector<HailoRTDriver::DeviceInfo> devices_info;
-    uint32_t index = 0;
-    for (const auto &device_name : device_names.value()) {
-        auto device_info = query_device_info(device_name, index);
-        CHECK_EXPECTED(device_info, "failed parsing device info for {}", device_name);
-        devices_info.push_back(device_info.release());
-        index++;
-    }
-    return devices_info;
-}
-#else
-static_assert(true, "Error, Unsupported Platform");
-#endif //defined (__linux__)
-
-Expected<uint32_t> HailoRTDriver::read_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction,
-    size_t offset, size_t  reg_size)
-{
-    CHECK_AS_EXPECTED(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id);
-    CHECK_AS_EXPECTED(data_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Invalid direction given");
-    hailo_vdma_channel_read_register_params params = {
-        .engine_index = channel_id.engine_index,
-        .channel_index = channel_id.channel_index,
-        .direction = direction_to_dma_data_direction(data_direction),
-        .offset = offset,
-        .reg_size = reg_size,
-        .data = 0
-    };
-
-    int err = 0;
-    auto status = hailo_ioctl(m_fd, HAILO_VDMA_CHANNEL_READ_REGISTER, &params, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("HailoRTDriver::read_vdma_channel_register failed with errno:{}", err);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    return std::move(params.data);
-}
-
-hailo_status HailoRTDriver::write_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction,
-    size_t offset, size_t reg_size, uint32_t data)
-{
-    CHECK(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id);
-    CHECK(data_direction != DmaDirection::BOTH, HAILO_INVALID_ARGUMENT, "Invalid direction given");
-    hailo_vdma_channel_write_register_params params = {
-        .engine_index = channel_id.engine_index,
-        .channel_index = channel_id.channel_index,
-        .direction = direction_to_dma_data_direction(data_direction),
-        .offset = offset,
-        .reg_size = reg_size,
-        .data = data
-    };
-
-    int err = 0;
-    auto status = hailo_ioctl(m_fd, HAILO_VDMA_CHANNEL_WRITE_REGISTER, &params, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("HailoRTDriver::write_vdma_channel_register failed with errno:{}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS;
-}
 
 hailo_status HailoRTDriver::read_memory(MemoryType memory_type, uint64_t address, void *buf, size_t size)
 {
@@ -386,97 +273,6 @@ hailo_status HailoRTDriver::write_memory(MemoryType memory_type, uint64_t addres
     return HAILO_SUCCESS;
 }
 
-hailo_status HailoRTDriver::read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size)
-{
-    hailo_memory_transfer_params transfer = {
-        .transfer_direction = TRANSFER_READ,
-        .memory_type = translate_memory_type(memory_type),
-        .address = address,
-        .count = size,
-        .buffer = {0}
-    };
-
-    if (m_dma_type == DmaType::PCIE) {
-        CHECK(address < std::numeric_limits<uint32_t>::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address);
-    }
-
-    if (size > sizeof(transfer.buffer)) {
-        LOGGER__ERROR("Invalid size to read, size given {} is larger than max size {}", size, sizeof(transfer.buffer));
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_MEMORY_TRANSFER, &transfer, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("HailoRTDriver::read_memory failed with errno:{}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    memcpy(buf, transfer.buffer, transfer.count);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size)
-{
-    hailo_memory_transfer_params transfer = {
-        .transfer_direction = TRANSFER_WRITE,
-        .memory_type = translate_memory_type(memory_type),
-        .address = address,
-        .count = size,
-        .buffer = {0}
-    };
-
-    if (m_dma_type == DmaType::PCIE) {
-        CHECK(address < std::numeric_limits<uint32_t>::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address);
-    }
-
-    if (size > sizeof(transfer.buffer)) {
-        LOGGER__ERROR("Invalid size to read, size given {} is larger than max size {}", size, sizeof(transfer.buffer));
-        return HAILO_INVALID_ARGUMENT;
-    }
-
-    memcpy(transfer.buffer, buf, transfer.count);
-
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_MEMORY_TRANSFER, &transfer, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("HailoRTDriver::write_memory failed with errno:{}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction,
-    size_t offset, size_t count)
-{
-#if defined(__linux__)
-    hailo_vdma_buffer_sync_params sync_info{
-        .handle = handle,
-        .sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE,
-        .offset = offset,
-        .count = count
-    };
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_SYNC, &sync_info, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("HAILO_VDMA_BUFFER_SYNC failed with errno:{}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-    return HAILO_SUCCESS;
-// TODO: HRT-6717 - Remove ifdef when Implement sync ioctl (if determined needed in qnx)
-#elif defined( __QNX__)
-    (void) handle;
-    (void) sync_direction;
-    (void) offset;
-    (void) count;
-    return HAILO_SUCCESS;
-#else
-#error "unsupported platform!"
-#endif // __linux__
-}
-
 hailo_status HailoRTDriver::vdma_interrupts_enable(const ChannelsBitmap &channels_bitmap, bool enable_timestamps_measure)
 {
     CHECK(is_valid_channels_bitmap(channels_bitmap), HAILO_INVALID_ARGUMENT, "Invalid channel bitmap given");
@@ -484,10 +280,7 @@ hailo_status HailoRTDriver::vdma_interrupts_enable(const ChannelsBitmap &channel
     std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine);
     params.enable_timestamps_measure = enable_timestamps_measure;
 
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_ENABLE, &params, err);
-    CHECK_SUCCESS(status, "Failed to enable vdma interrupts with errno:{}", err);
-
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_INTERRUPTS_ENABLE, &params), "Failed to enabled vdma interrupts");
     return HAILO_SUCCESS;
 }
 
@@ -497,16 +290,25 @@ hailo_status HailoRTDriver::vdma_interrupts_disable(const ChannelsBitmap &channe
     hailo_vdma_interrupts_disable_params params{};
     std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine);
 
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_DISABLE, &params, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to disable vdma interrupts with errno:{}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_INTERRUPTS_DISABLE, &params), "Failed to disable vdma interrupts");
     return HAILO_SUCCESS;
 }
 
+static Expected<ChannelInterruptTimestampList> create_interrupt_timestamp_list(
+    hailo_vdma_interrupts_read_timestamp_params &inter_data)
+{
+    CHECK_AS_EXPECTED(inter_data.timestamps_count <= MAX_IRQ_TIMESTAMPS_SIZE, HAILO_DRIVER_FAIL,
+        "Invalid channel interrupts timestamps count returned {}", inter_data.timestamps_count);
+    ChannelInterruptTimestampList timestamp_list{};
+
+    timestamp_list.count = inter_data.timestamps_count;
+    for (size_t i = 0; i < timestamp_list.count; i++) {
+        timestamp_list.timestamp_list[i].timestamp = std::chrono::nanoseconds(inter_data.timestamps[i].timestamp_ns);
+        timestamp_list.timestamp_list[i].desc_num_processed = inter_data.timestamps[i].desc_num_processed;
+    }
+    return timestamp_list;
+}
+
 static Expected<IrqData> to_irq_data(const hailo_vdma_interrupts_wait_params& params,
     uint8_t engines_count)
 {
@@ -530,6 +332,7 @@ static Expected<IrqData> to_irq_data(const hailo_vdma_interrupts_wait_params& pa
         irq.channels_irq_data[i].desc_num_processed = params.irq_data[i].host_num_processed;
         irq.channels_irq_data[i].host_error = params.irq_data[i].host_error;
         irq.channels_irq_data[i].device_error = params.irq_data[i].device_error;
+        irq.channels_irq_data[i].validation_success = params.irq_data[i].validation_success;
     }
     return irq;
 }
@@ -540,27 +343,40 @@ Expected<IrqData> HailoRTDriver::vdma_interrupts_wait(const ChannelsBitmap &chan
     hailo_vdma_interrupts_wait_params params{};
     std::copy(channels_bitmap.begin(), channels_bitmap.end(), params.channels_bitmap_per_engine);
 
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_WAIT, &params, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to wait vdma interrupts with errno:{}", err);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_INTERRUPTS_WAIT, &params), "Failed wait vdma interrupts");
 
     return to_irq_data(params, static_cast<uint8_t>(m_dma_engines_count));
 }
 
 Expected<ChannelInterruptTimestampList> HailoRTDriver::vdma_interrupts_read_timestamps(vdma::ChannelId channel_id)
 {
-    hailo_vdma_interrupts_read_timestamp_params data{};
-    data.engine_index = channel_id.engine_index;
-    data.channel_index = channel_id.channel_index;
+    hailo_vdma_interrupts_read_timestamp_params params{};
+    params.engine_index = channel_id.engine_index;
+    params.channel_index = channel_id.channel_index;
 
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS, &data, err);
-    CHECK_SUCCESS_AS_EXPECTED(status);
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_INTERRUPTS_READ_TIMESTAMPS, &params), "Failed read vdma interrupts timestamps");
+
+    return create_interrupt_timestamp_list(params);
+}
+
+Expected<std::vector<uint8_t>> HailoRTDriver::read_notification()
+{
+    hailo_d2h_notification notification_buffer{};
+    int result = run_ioctl(HAILO_READ_NOTIFICATION, &notification_buffer);
+    if (result != 0) {
+        LOGGER__DEBUG("Failed read notification, errno {}", result);
+        return make_unexpected(HAILO_DRIVER_FAIL);
+    }
+
+    std::vector<uint8_t> notification(notification_buffer.buffer_len);
+    memcpy(notification.data(), notification_buffer.buffer, notification_buffer.buffer_len);
+    return notification;
+}
 
-    return create_interrupt_timestamp_list(data);
+hailo_status HailoRTDriver::disable_notifications()
+{
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_DISABLE_NOTIFICATION, nullptr), "Failed disable notifications");
+    return HAILO_SUCCESS;
 }
 
 hailo_status HailoRTDriver::fw_control(const void *request, size_t request_len, const uint8_t request_md5[PCIE_EXPECTED_MD5_LENGTH],
@@ -581,15 +397,12 @@ hailo_status HailoRTDriver::fw_control(const void *request, size_t request_len,
     memcpy(&command.buffer, request, request_len);
     command.timeout_ms = static_cast<uint32_t>(timeout.count());
     command.cpu_id = translate_cpu_id(cpu_id);
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_FW_CONTROL, &command, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("HAILO_FW_CONTROL failed with errno:{}", err);
-        return HAILO_FW_CONTROL_FAILURE;
-    }
+
+    auto result = run_ioctl(HAILO_FW_CONTROL, &command);
+    CHECK(result == 0, HAILO_FW_CONTROL_FAILURE, "Failed in fw_control, errno:{}", result);
 
     if (*response_len < command.buffer_len) {
-        LOGGER__ERROR("FW control response len needs to be atleast {} (size given {})", command.buffer_len, *response_len);
+        LOGGER__ERROR("FW control response len needs to be at least {} (size given {})", command.buffer_len, *response_len);
         *response_len = command.buffer_len;
         return HAILO_INSUFFICIENT_BUFFER;
     }
@@ -605,22 +418,15 @@ hailo_status HailoRTDriver::read_log(uint8_t *buffer, size_t buffer_size, size_t
     CHECK_ARG_NOT_NULL(buffer);
     CHECK_ARG_NOT_NULL(read_bytes);
 
-    hailo_read_log_params params {
-        .cpu_id = translate_cpu_id(cpu_id),
-        .buffer = {0},
-        .buffer_size = buffer_size,
-        .read_bytes = 0
-    };
+    hailo_read_log_params params{};
+    params.cpu_id = translate_cpu_id(cpu_id);
+    params.buffer_size = buffer_size;
+    params.read_bytes = 0;
 
     CHECK(buffer_size <= sizeof(params.buffer), HAILO_DRIVER_FAIL,
         "Given buffer size {} is bigger than buffer size used to read logs {}", buffer_size, sizeof(params.buffer));
 
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_READ_LOG, &params, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to read log with errno:{}", err);
-        return HAILO_DRIVER_FAIL;
-    }
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_READ_LOG, &params), "Failed to read fw log");
 
     CHECK(params.read_bytes <= sizeof(params.buffer), HAILO_DRIVER_FAIL,
         "Amount of bytes read from log {} is bigger than size of buffer {}", params.read_bytes, sizeof(params.buffer));
@@ -630,163 +436,472 @@ hailo_status HailoRTDriver::read_log(uint8_t *buffer, size_t buffer_size, size_t
 
     return HAILO_SUCCESS;
 }
- 
+
 hailo_status HailoRTDriver::reset_nn_core()
 {
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_RESET_NN_CORE, nullptr, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to reset nn core with errno:{}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_RESET_NN_CORE, nullptr), "Failed reset nn_core");
     return HAILO_SUCCESS;
 }
 
-#if defined(__linux__)
 Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size,
-    DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle)
-{
-    hailo_vdma_buffer_map_params map_user_buffer_info {
-        .user_address = user_address,
-        .size = required_size,
-        .data_direction = direction_to_dma_data_direction(data_direction),
-        .allocated_buffer_handle = driver_buff_handle,
-        .mapped_handle = 0
-    };
-
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to map user buffer with errno:{}", err);
-        return make_unexpected(HAILO_DRIVER_FAIL);
+    DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle) {
+
+    std::unique_lock<std::mutex> mapping_lock(m_mapped_buffer_lock);
+    auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(),
+        [user_address, required_size, data_direction](const auto& mapped_buffer_info) {
+            return (mapped_buffer_info.address == user_address) &&
+                   (mapped_buffer_info.size == required_size) &&
+                   (mapped_buffer_info.direction == data_direction);
+    });
+    if (mapped_buffer != m_mapped_buffer.end()) {
+        // Buffer already mapped, increase ref count and use it.
+        assert(mapped_buffer->mapped_count > 0);
+        CHECK_AS_EXPECTED(mapped_buffer->driver_buff_handle == driver_buff_handle, HAILO_INVALID_ARGUMENT,
+            "Mapped buffer driver handle {} is different than required handle {}", mapped_buffer->driver_buff_handle,
+            driver_buff_handle);
+
+        mapped_buffer->mapped_count++;
+        return Expected<VdmaBufferHandle>(mapped_buffer->handle);
+    } else {
+        // Buffer not mapped, map it now
+        auto handle = vdma_buffer_map_ioctl(user_address, required_size, data_direction, driver_buff_handle);
+        CHECK_EXPECTED(handle);
+
+        const auto mapping_count = 1;
+        m_mapped_buffer.emplace_back(MappedBufferInfo {
+            handle.value(),
+            user_address,
+            data_direction,
+            required_size,
+            driver_buff_handle,
+            mapping_count
+        });
+
+        return handle.release();
     }
+}
 
-    return VdmaBufferHandle(map_user_buffer_info.mapped_handle);
+hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle) {
+    std::unique_lock<std::mutex> mapping_lock(m_mapped_buffer_lock);
+    auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(),
+        [handle](const auto& mapped_buffer_info) {
+            return mapped_buffer_info.handle == handle;
+    });
+    CHECK(mapped_buffer != m_mapped_buffer.end(), HAILO_NOT_FOUND, "Mapped buffer handle {} not found", handle);
+
+    assert(mapped_buffer->mapped_count > 0);
+    mapped_buffer->mapped_count--;
+    if (mapped_buffer->mapped_count == 0) {
+        m_mapped_buffer.erase(mapped_buffer);
+        return vdma_buffer_unmap_ioctl(handle);
+    }
+    return HAILO_SUCCESS;
 }
-#elif defined( __QNX__)
-Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map(void *user_address, size_t required_size,
-    DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle)
+
+hailo_status HailoRTDriver::vdma_buffer_unmap(void *user_address, size_t size, DmaDirection data_direction)
 {
-    // Mapping is done by the driver_buff_handle (shm file descriptor), and not by address.
-    (void)user_address;
+    std::unique_lock<std::mutex> mapping_lock(m_mapped_buffer_lock);
+    auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(),
+        [user_address, size, data_direction](const auto& mapped_buffer_info) {
+            return (mapped_buffer_info.address == user_address) &&
+                   (mapped_buffer_info.size == size) &&
+                   (mapped_buffer_info.direction == data_direction);
+    });
+    CHECK(mapped_buffer != m_mapped_buffer.end(), HAILO_NOT_FOUND, "Mapped buffer {} {} not found",
+        user_address, size);
+
+    assert(mapped_buffer->mapped_count > 0);
+    mapped_buffer->mapped_count--;
+    if (mapped_buffer->mapped_count == 0) {
+        const auto handle = mapped_buffer->handle;
+        m_mapped_buffer.erase(mapped_buffer);
+        return vdma_buffer_unmap_ioctl(handle);
+    }
+    return HAILO_SUCCESS;
+}
 
-    // Create shared memory handle to send to driver
-    shm_handle_t shm_handle;
-    int err = shm_create_handle(driver_buff_handle, m_resource_manager_pid, O_RDWR,
-        &shm_handle, 0);
-    if (0 != err) {
-        LOGGER__ERROR("Error creating shm object handle, errno is: {}", errno);
-        return make_unexpected(HAILO_INTERNAL_FAILURE);
+hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDirection sync_direction,
+    size_t offset, size_t count)
+{
+#ifndef __QNX__
+    hailo_vdma_buffer_sync_params sync_info{};
+    sync_info.handle = handle;
+    sync_info.sync_type = (sync_direction == DmaSyncDirection::TO_HOST) ? HAILO_SYNC_FOR_CPU : HAILO_SYNC_FOR_DEVICE;
+    sync_info.offset = offset;
+    sync_info.count = count;
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_BUFFER_SYNC, &sync_info), "Failed sync vdma buffer");
+    return HAILO_SUCCESS;
+// TODO: HRT-6717 - Remove ifdef when Implement sync ioctl (if determined needed in qnx)
+#else /*  __QNX__ */
+    (void) handle;
+    (void) sync_direction;
+    (void) offset;
+    (void) count;
+    return HAILO_SUCCESS;
+#endif
+}
+
+Expected<DescriptorsListInfo> HailoRTDriver::descriptors_list_create(size_t desc_count, uint16_t desc_page_size,
+    bool is_circular)
+{
+    uintptr_t desc_handle = INVALID_DRIVER_HANDLE_VALUE;
+    uint64_t dma_address = 0;
+    TRY(std::tie(desc_handle, dma_address),
+        descriptors_list_create_ioctl(desc_count, desc_page_size, is_circular));
+
+    auto user_address = descriptors_list_create_mmap(desc_handle, desc_count);
+    if (!user_address) {
+        auto status = descriptors_list_release_ioctl(desc_handle);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed releasing descriptors list, status {}", status);
+            // continue
+        }
+        return make_unexpected(user_address.status());
     }
 
-    hailo_vdma_buffer_map_params map_user_buffer_info {
-        .shared_memory_handle = shm_handle,
-        .size = required_size,
-        .data_direction = direction_to_dma_data_direction(data_direction),
-        .allocated_buffer_handle = INVALID_DRIVER_HANDLE_VALUE,
-        .mapped_handle = 0
-    };
+    return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()};
+}
 
-    // Note: The driver will accept the shm_handle, and will mmap it to its own address space. After the driver maps the
-    // the shm, calling shm_delete_handle is not needed (but can't harm on the otherhand).
-    // If the ioctl fails, we can't tell if the shm was mapped or not, so we delete it ourself.
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to map user buffer with errno:{}", err);
-        shm_delete_handle(shm_handle);
-        return make_unexpected(HAILO_DRIVER_FAIL);
+hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info)
+{
+    hailo_status status = HAILO_SUCCESS;
+
+    auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count);
+    if (HAILO_SUCCESS != unmap_status) {
+        LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status);
+        status = unmap_status;
+        // continue
     }
 
-    return VdmaBufferHandle(map_user_buffer_info.mapped_handle);
+    auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle);
+    if (HAILO_SUCCESS != release_status) {
+        LOGGER__ERROR("Descriptors list release status failed with {}", release_status);
+        status = release_status;
+        // continue
+    }
+
+    return status;
 }
-#else
-#error "unsupported platform!"
-#endif // __linux__
 
-hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle)
+hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle,
+    size_t buffer_size, size_t buffer_offset, uint8_t channel_index, uint32_t starting_desc)
 {
-    hailo_vdma_buffer_unmap_params unmap_user_buffer_info {
-        .mapped_handle = handle
-    };
+    hailo_desc_list_bind_vdma_buffer_params config_info{};
+    config_info.buffer_handle = buffer_handle;
+    config_info.buffer_size = buffer_size;
+    config_info.buffer_offset = buffer_offset;
+    config_info.desc_handle = desc_handle;
+    config_info.channel_index = channel_index;
+    config_info.starting_desc = starting_desc;
 
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_BUFFER_UNMAP, &unmap_user_buffer_info, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to unmap user buffer with errno:{}", err);
-        return HAILO_DRIVER_FAIL;
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_DESC_LIST_BIND_VDMA_BUFFER, &config_info), "Failed bind buffer to desc list");
+    return HAILO_SUCCESS;
+}
+
+Expected<uint32_t> HailoRTDriver::launch_transfer(vdma::ChannelId channel_id, uintptr_t desc_handle,
+    uint32_t starting_desc, const std::vector<TransferBuffer> &transfer_buffers,
+    bool should_bind, InterruptsDomain first_desc_interrupts, InterruptsDomain last_desc_interrupts)
+{
+    CHECK(is_valid_channel_id(channel_id), HAILO_INVALID_ARGUMENT, "Invalid channel id {} given", channel_id);
+    CHECK(transfer_buffers.size() <= ARRAY_ENTRIES(hailo_vdma_launch_transfer_params::buffers), HAILO_INVALID_ARGUMENT,
+        "Invalid transfer buffers size {} given", transfer_buffers.size());
+
+    hailo_vdma_launch_transfer_params params{};
+    params.engine_index = channel_id.engine_index;
+    params.channel_index = channel_id.channel_index;
+    params.desc_handle = desc_handle;
+    params.starting_desc = starting_desc;
+    params.buffers_count = static_cast<uint8_t>(transfer_buffers.size());
+    for (size_t i = 0; i < transfer_buffers.size(); i++) {
+        params.buffers[i].mapped_buffer_handle = transfer_buffers[i].buffer_handle;
+        params.buffers[i].offset = static_cast<uint32_t>(transfer_buffers[i].offset);
+        params.buffers[i].size = static_cast<uint32_t>(transfer_buffers[i].size);
     }
+    params.should_bind = should_bind;
+    params.first_interrupts_domain = (hailo_vdma_interrupts_domain)first_desc_interrupts;
+    params.last_interrupts_domain = (hailo_vdma_interrupts_domain)last_desc_interrupts;
 
+#ifdef NDEBUG
+    params.is_debug = false;
+#else
+    params.is_debug = true;
+#endif
+
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_LAUNCH_TRANSFER, &params), "Failed launch transfer");
+    return Expected<uint32_t>(params.descs_programed);
+}
+
+#if defined(__linux__)
+Expected<uintptr_t> HailoRTDriver::vdma_low_memory_buffer_alloc(size_t size)
+{
+    hailo_allocate_low_memory_buffer_params params{};
+    params.buffer_size = size;
+    params.buffer_handle = 0;
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC, &params), "Failed to allocate buffer");
+
+    return std::move(params.buffer_handle);
+}
+
+hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t buffer_handle)
+{
+    hailo_free_low_memory_buffer_params params{};
+    params.buffer_handle = buffer_handle;
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_LOW_MEMORY_BUFFER_FREE, &params), "Failed to free allocated buffer");
     return HAILO_SUCCESS;
 }
 
-Expected<DescriptorsListInfo> HailoRTDriver::descriptors_list_create(size_t desc_count, bool is_circular)
+Expected<ContinousBufferInfo> HailoRTDriver::vdma_continuous_buffer_alloc(size_t size)
 {
-    auto handle_to_dma_address_pair = descriptors_list_create_ioctl(desc_count, is_circular);
-    CHECK_EXPECTED(handle_to_dma_address_pair);
+    auto handle_to_dma_address_pair = continous_buffer_alloc_ioctl(size);
+    if (!handle_to_dma_address_pair) {
+        // Log in continous_buffer_alloc_ioctl
+        return make_unexpected(handle_to_dma_address_pair.status());
+    }
 
     const auto desc_handle = handle_to_dma_address_pair->first;
     const auto dma_address = handle_to_dma_address_pair->second;
 
-    auto user_address = descriptors_list_create_mmap(desc_handle, desc_count);
+    auto user_address = continous_buffer_mmap(desc_handle, size);
     if (!user_address) {
-        auto status = descriptors_list_release_ioctl(desc_handle);
+        auto status = continous_buffer_free_ioctl(desc_handle);
         if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Failed releasing descriptors list, status {}", status);
+            LOGGER__ERROR("Failed releasing conitnous buffer, status {}", status);
             // continue
         }
         return make_unexpected(user_address.status());
     }
 
-    return DescriptorsListInfo{desc_handle, dma_address, desc_count, user_address.release()};
+    return ContinousBufferInfo{desc_handle, dma_address, size, user_address.release()};
 }
 
-hailo_status HailoRTDriver::descriptors_list_release(const DescriptorsListInfo &descriptors_list_info)
+hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info)
 {
     hailo_status status = HAILO_SUCCESS;
 
-    auto unmap_status = descriptors_list_create_munmap(descriptors_list_info.user_address, descriptors_list_info.desc_count);
+    auto unmap_status = continous_buffer_munmap(buffer_info.user_address, buffer_info.size);
     if (HAILO_SUCCESS != unmap_status) {
-        LOGGER__ERROR("Descriptors list unmap failed with {}", unmap_status);
+        LOGGER__ERROR("Continous buffer list unmap failed with {}", unmap_status);
         status = unmap_status;
         // continue
     }
 
-    auto release_status = descriptors_list_release_ioctl(descriptors_list_info.handle);
+    auto release_status = continous_buffer_free_ioctl(buffer_info.handle);
     if (HAILO_SUCCESS != release_status) {
-        LOGGER__ERROR("Descriptors list release status failed with {}", release_status);
+        LOGGER__ERROR("Continous buffer release status failed with {}", release_status);
         status = release_status;
         // continue
     }
 
     return status;
 }
+#elif defined(__QNX__) || defined(_WIN32)
+
+Expected<uintptr_t> HailoRTDriver::vdma_low_memory_buffer_alloc(size_t /* size */)
+{
+    LOGGER__ERROR("Low memory buffer not supported for platform");
+    return make_unexpected(HAILO_NOT_SUPPORTED);
+}
+
+hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t /* buffer_handle */)
+{
+    LOGGER__ERROR("Low memory buffer not supported for platform");
+    return make_unexpected(HAILO_NOT_SUPPORTED);
+}
+
+Expected<ContinousBufferInfo> HailoRTDriver::vdma_continuous_buffer_alloc(size_t /* size */)
+{
+    LOGGER__ERROR("Continous buffer not supported for platform");
+    return make_unexpected(HAILO_NOT_SUPPORTED);
+}
+
+hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &/* buffer_info */)
+{
+    LOGGER__ERROR("Continous buffer not supported for platform");
+    return HAILO_NOT_SUPPORTED;
+}
+
+#else
+#error "unsupported platform!"
+#endif
+
+hailo_status HailoRTDriver::mark_as_used()
+{
+    hailo_mark_as_in_use_params params{};
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_MARK_AS_IN_USE, &params), "Failed mark as used");
+    return params.in_use ? HAILO_DEVICE_IN_USE : HAILO_SUCCESS;
+}
+
+#if defined(__linux__)
+static bool is_blocking_ioctl(unsigned long request)
+{
+    switch (request) {
+    case HAILO_VDMA_INTERRUPTS_WAIT:
+    case HAILO_FW_CONTROL:
+    case HAILO_READ_NOTIFICATION:
+        return true;
+    default:
+        return false;
+    }
+}
+
+template<typename PointerType>
+int HailoRTDriver::run_ioctl(uint32_t ioctl_code, PointerType param)
+{
+    // We lock m_driver lock on all request but the blocking onces. Read m_driver_lock doc in the header
+    std::unique_lock<std::mutex> lock;
+    if (!is_blocking_ioctl(ioctl_code)) {
+        lock = std::unique_lock<std::mutex>(m_driver_lock);
+    }
+
+    return run_hailo_ioctl(m_fd, ioctl_code, param);
+}
+#elif defined(__QNX__) || defined(_WIN32)
+
+template<typename PointerType>
+int HailoRTDriver::run_ioctl(uint32_t ioctl_code, PointerType param)
+{
+    return run_hailo_ioctl(m_fd, ioctl_code, param);
+}
+#else
+#error "Unsupported platform"
+#endif
+
+hailo_status HailoRTDriver::read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size)
+{
+    CHECK(size != 0, HAILO_INVALID_ARGUMENT, "Invalid size to read");
+    CHECK(buf != nullptr, HAILO_INVALID_ARGUMENT, "Read buffer pointer is NULL");
+
+    if (m_dma_type == DmaType::PCIE) {
+        CHECK(address < std::numeric_limits<uint32_t>::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address);
+    }
+
+    hailo_memory_transfer_params transfer{};
+    transfer.transfer_direction = TRANSFER_READ;
+    transfer.memory_type = translate_memory_type(memory_type);
+    transfer.address = address;
+    transfer.count = size;
+    memset(transfer.buffer, 0, sizeof(transfer.buffer));
+
+    CHECK(size <= sizeof(transfer.buffer), HAILO_INVALID_ARGUMENT,
+        "Invalid size to read, size given {} is larger than max size {}", size, sizeof(transfer.buffer));
+
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_MEMORY_TRANSFER, &transfer), "Failed read memory");
+
+    memcpy(buf, transfer.buffer, transfer.count);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status HailoRTDriver::write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size)
+{
+    CHECK(size != 0, HAILO_INVALID_ARGUMENT, "Invalid size to read");
+    CHECK(buf != nullptr, HAILO_INVALID_ARGUMENT, "Read buffer pointer is NULL");
+
+    if (m_dma_type == DmaType::PCIE) {
+        CHECK(address < std::numeric_limits<uint32_t>::max(), HAILO_INVALID_ARGUMENT, "Address out of range {}", address);
+    }
+
+    hailo_memory_transfer_params transfer{};
+    transfer.transfer_direction = TRANSFER_WRITE;
+    transfer.memory_type = translate_memory_type(memory_type);
+    transfer.address = address;
+    transfer.count = size;
+    memset(transfer.buffer, 0, sizeof(transfer.buffer));
+
+    CHECK(size <= sizeof(transfer.buffer), HAILO_INVALID_ARGUMENT,
+        "Invalid size to write, size given {} is larger than max size {}", size, sizeof(transfer.buffer));
+
+    memcpy(transfer.buffer, buf, transfer.count);
+
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_MEMORY_TRANSFER, &transfer), "Failed write memory");
+    return HAILO_SUCCESS;
+}
+
+#if defined(__linux__) || defined(_WIN32)
+Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map_ioctl(void *user_address, size_t required_size,
+    DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle)
+{
+    hailo_vdma_buffer_map_params map_user_buffer_info{};
+    map_user_buffer_info.user_address = user_address;
+    map_user_buffer_info.size = required_size;
+    map_user_buffer_info.data_direction = direction_to_dma_data_direction(data_direction);
+    map_user_buffer_info.allocated_buffer_handle = driver_buff_handle;
+    map_user_buffer_info.mapped_handle = 0;
+
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info), "Failed map vdma buffer");
 
-Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count, bool is_circular)
+    return std::move(map_user_buffer_info.mapped_handle);
+}
+#elif defined(__QNX__)
+Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map_ioctl(void *user_address, size_t required_size,
+    DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle)
 {
+    // Mapping is done by the driver_buff_handle (shm file descriptor), and not by address.
+    (void)user_address;
+    CHECK(driver_buff_handle != INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER, HAILO_NOT_SUPPORTED,
+        "On QNX only shared-memory buffers are allowed to be mapped");
+
+    // Create shared memory handle to send to driver
+    shm_handle_t shm_handle;
+    int err = shm_create_handle(driver_buff_handle, m_resource_manager_pid, O_RDWR,
+        &shm_handle, 0);
+    if (0 != err) {
+        LOGGER__ERROR("Error creating shm object handle, errno is: {}", errno);
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    hailo_vdma_buffer_map_params map_user_buffer_info {
+        .shared_memory_handle = shm_handle,
+        .size = required_size,
+        .data_direction = direction_to_dma_data_direction(data_direction),
+        .allocated_buffer_handle = INVALID_DRIVER_HANDLE_VALUE,
+        .mapped_handle = 0
+    };
+
+    // Note: The driver will accept the shm_handle, and will mmap it to its own address space. After the driver maps the
+    // the shm, calling shm_delete_handle is not needed (but can't harm on the otherhand).
+    // If the ioctl fails, we can't tell if the shm was mapped or not, so we delete it ourself.
+    err = run_ioctl(HAILO_VDMA_BUFFER_MAP, &map_user_buffer_info);
+    if (err != 0) {
+        LOGGER__ERROR("Failed to map user buffer with errno:{}", err);
+        shm_delete_handle(shm_handle);
+        return make_unexpected(HAILO_DRIVER_FAIL);
+    }
+
+    return VdmaBufferHandle(map_user_buffer_info.mapped_handle);
+}
+#else
+#error "unsupported platform!"
+#endif // __linux__
+
+hailo_status HailoRTDriver::vdma_buffer_unmap_ioctl(VdmaBufferHandle handle)
+{
+    hailo_vdma_buffer_unmap_params unmap_user_buffer_info{};
+    unmap_user_buffer_info.mapped_handle = handle;
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_BUFFER_UNMAP, &unmap_user_buffer_info), "Failed unmap vdma buffer");
+    return HAILO_SUCCESS;
+}
+
+Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::descriptors_list_create_ioctl(size_t desc_count,
+    uint16_t desc_page_size, bool is_circular)
+{
+    CHECK(is_powerof2(desc_page_size), HAILO_INVALID_ARGUMENT, "Invalid desc page size {}", desc_page_size);
+
     hailo_desc_list_create_params create_desc_info{};
     create_desc_info.desc_count = desc_count;
+    create_desc_info.desc_page_size = desc_page_size;
     create_desc_info.is_circular = is_circular;
 
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_CREATE, &create_desc_info, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to create descriptors list with errno:{}", err);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_DESC_LIST_CREATE, &create_desc_info), "Failed create desc list");
 
     return std::make_pair(create_desc_info.desc_handle, create_desc_info.dma_address);
 }
 
 hailo_status HailoRTDriver::descriptors_list_release_ioctl(uintptr_t desc_handle)
 {
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_RELEASE, &desc_handle, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to release descriptors list with errno: {}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
+    struct hailo_desc_list_release_params params{};
+    params.desc_handle = desc_handle;
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_DESC_LIST_RELEASE, &params), "Failed release desc list");
     return HAILO_SUCCESS;
 }
 
@@ -826,12 +941,7 @@ Expected<void *> HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_hand
         .user_address = nullptr,
     };
 
-    int err = 0;
-    auto status = HailoRTDriver::hailo_ioctl(m_fd, HAILO_NON_LINUX_DESC_LIST_MMAP, &map_vdma_list_params, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Mmap descriptors list ioctl failed with errno:{}", err);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_NON_LINUX_DESC_LIST_MMAP, &map_vdma_list_params), "Failed mmap descriptors list");
 
     void *address = mmap(nullptr, buffer_size, PROT_WRITE | PROT_READ | PROT_NOCACHE, MAP_SHARED | MAP_PHYS, NOFD,
         (off_t)map_vdma_list_params.user_address);
@@ -850,119 +960,37 @@ hailo_status HailoRTDriver::descriptors_list_create_munmap(void *address, size_t
     return HAILO_SUCCESS;
 }
 
-#else
-#error "unsupported platform!"
-#endif
-
-hailo_status HailoRTDriver::descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle,
-    uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc)
+#elif defined(_WIN32)
+Expected<void *> HailoRTDriver::descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count)
 {
-    hailo_desc_list_bind_vdma_buffer_params config_info;
-    config_info.buffer_handle = buffer_handle;
-    config_info.desc_handle = desc_handle;
-    config_info.desc_page_size = desc_page_size;
-    config_info.channel_index = channel_index;
-    config_info.starting_desc = starting_desc;
-
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_DESC_LIST_BIND_VDMA_BUFFER, &config_info, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to bind vdma buffer to descriptors list with errno: {}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS; 
+    hailo_non_linux_desc_list_mmap_params params{};
+    params.desc_handle = desc_handle;
+    params.size = desc_count * SIZE_OF_SINGLE_DESCRIPTOR;
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_NON_LINUX_DESC_LIST_MMAP, &params), "Failed mmap desc list");
+    void *user_address = params.user_address;
+    return user_address;
 }
 
-Expected<uintptr_t> HailoRTDriver::vdma_low_memory_buffer_alloc(size_t size)
+hailo_status HailoRTDriver::descriptors_list_create_munmap(void *, size_t )
 {
-    CHECK_AS_EXPECTED(m_allocate_driver_buffer, HAILO_INVALID_OPERATION,
-        "Tried to allocate buffer from driver even though operation is not supported");
-
-    hailo_allocate_low_memory_buffer_params allocate_params = {
-        .buffer_size    = size,
-        .buffer_handle  = 0
-    };
-
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_LOW_MEMORY_BUFFER_ALLOC, &allocate_params, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to allocate buffer with errno: {}", err);
-        return make_unexpected(HAILO_DRIVER_FAIL);
-    }
-
-    return std::move(allocate_params.buffer_handle);
-}
-
-hailo_status HailoRTDriver::vdma_low_memory_buffer_free(uintptr_t buffer_handle)
-{
-    CHECK(m_allocate_driver_buffer, HAILO_INVALID_OPERATION,
-        "Tried to free allocated buffer from driver even though operation is not supported");
-
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_LOW_MEMORY_BUFFER_FREE, (void*)buffer_handle, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to free allocated buffer with errno: {}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
-    return HAILO_SUCCESS; 
+    // On windows, the unmap is done on the release ioctl
+    return HAILO_SUCCESS;
 }
-
+#else
+#error "unsupported platform!"
+#endif
 
 #if defined(__linux__)
-Expected<ContinousBufferInfo> HailoRTDriver::vdma_continuous_buffer_alloc(size_t size)
-{
-    auto handle_to_dma_address_pair = continous_buffer_alloc_ioctl(size);
-    if (!handle_to_dma_address_pair) {
-        // Log in continous_buffer_alloc_ioctl
-        return make_unexpected(handle_to_dma_address_pair.status());
-    }
-
-    const auto desc_handle = handle_to_dma_address_pair->first;
-    const auto dma_address = handle_to_dma_address_pair->second;
-
-    auto user_address = continous_buffer_mmap(desc_handle, size);
-    if (!user_address) {
-        auto status = continous_buffer_free_ioctl(desc_handle);
-        if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Failed releasing conitnous buffer, status {}", status);
-            // continue
-        }
-        return make_unexpected(user_address.status());
-    }
-
-    return ContinousBufferInfo{desc_handle, dma_address, size, user_address.release()};
-}
-
-hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &buffer_info)
-{
-    hailo_status status = HAILO_SUCCESS;
-
-    auto unmap_status = continous_buffer_munmap(buffer_info.user_address, buffer_info.size);
-    if (HAILO_SUCCESS != unmap_status) {
-        LOGGER__ERROR("Continous buffer list unmap failed with {}", unmap_status);
-        status = unmap_status;
-        // continue
-    }
-
-    auto release_status = continous_buffer_free_ioctl(buffer_info.handle);
-    if (HAILO_SUCCESS != release_status) {
-        LOGGER__ERROR("Continous buffer release status failed with {}", release_status);
-        status = release_status;
-        // continue
-    }
-
-    return status;
-}
 
 Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::continous_buffer_alloc_ioctl(size_t size)
 {
-    hailo_allocate_continuous_buffer_params params { .buffer_size = size, .buffer_handle = 0, .dma_address = 0 };
+    hailo_allocate_continuous_buffer_params params{};
+    params.buffer_size = size;
+    params.buffer_handle = 0;
+    params.dma_address = 0;
 
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC, &params, err);
-    if (HAILO_SUCCESS != status) {
+    int err = run_ioctl(HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC, &params);
+    if (err != 0) {
         if (ENOMEM == err) {
             LOGGER__WARN("Failed to allocate continuous buffer, size 0x{:x}. This failure means there is not a sufficient amount of CMA memory",
                 size);
@@ -975,15 +1003,11 @@ Expected<std::pair<uintptr_t, uint64_t>> HailoRTDriver::continous_buffer_alloc_i
     return std::make_pair(params.buffer_handle, params.dma_address);
 }
 
-hailo_status HailoRTDriver::continous_buffer_free_ioctl(uintptr_t desc_handle)
+hailo_status HailoRTDriver::continous_buffer_free_ioctl(uintptr_t buffer_handle)
 {
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_VDMA_CONTINUOUS_BUFFER_FREE, (void*)desc_handle, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to free continuous buffer with errno: {}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-
+    hailo_free_continuous_buffer_params params{};
+    params.buffer_handle = buffer_handle;
+    CHECK_IOCTL_RESULT(run_ioctl(HAILO_VDMA_CONTINUOUS_BUFFER_FREE, &params), "Failed free continuous buffer");
     return HAILO_SUCCESS;
 }
 
@@ -1009,41 +1033,8 @@ hailo_status HailoRTDriver::continous_buffer_munmap(void *address, size_t size)
     return HAILO_SUCCESS;
 }
 
-#elif defined(__QNX__)
-
-Expected<ContinousBufferInfo> HailoRTDriver::vdma_continuous_buffer_alloc(size_t /* size */)
-{
-    LOGGER__ERROR("Continous buffer not supported for platform");
-    return make_unexpected(HAILO_NOT_SUPPORTED);
-}
-
-hailo_status HailoRTDriver::vdma_continuous_buffer_free(const ContinousBufferInfo &/* buffer_info */)
-{
-    LOGGER__ERROR("Continous buffer not supported for platform");
-    return HAILO_NOT_SUPPORTED;
-}
-
-#else
-#error "unsupported platform!"
 #endif
 
-hailo_status HailoRTDriver::mark_as_used()
-{
-    hailo_mark_as_in_use_params params = {
-        .in_use = false
-    };
-    int err = 0;
-    auto status = hailo_ioctl(this->m_fd, HAILO_MARK_AS_IN_USE, &params, err);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed to mark device as in use with errno: {}", err);
-        return HAILO_DRIVER_FAIL;
-    }
-    if (params.in_use) {
-        return HAILO_DEVICE_IN_USE;
-    }
-    return HAILO_SUCCESS;
-}
-
 bool HailoRTDriver::is_valid_channel_id(const vdma::ChannelId &channel_id)
 {
     return (channel_id.engine_index < m_dma_engines_count) && (channel_id.channel_index < MAX_VDMA_CHANNELS_PER_ENGINE);
diff --git a/hailort/libhailort/src/os/hailort_driver.hpp b/hailort/libhailort/src/vdma/driver/hailort_driver.hpp
similarity index 80%
rename from hailort/libhailort/src/os/hailort_driver.hpp
rename to hailort/libhailort/src/vdma/driver/hailort_driver.hpp
index 50f242f1..b5f99f92 100755
--- a/hailort/libhailort/src/os/hailort_driver.hpp
+++ b/hailort/libhailort/src/vdma/driver/hailort_driver.hpp
@@ -24,6 +24,8 @@
 #include <chrono>
 #include <utility>
 #include <array>
+#include <list>
+#include <cerrno>
 
 #ifdef __QNX__
 #include <sys/mman.h>
@@ -75,6 +77,7 @@ struct ChannelIrqData {
     uint16_t desc_num_processed;
     uint8_t host_error;
     uint8_t device_error;
+    bool validation_success;
 };
 
 struct IrqData {
@@ -85,7 +88,7 @@ struct IrqData {
 // Bitmap per engine
 using ChannelsBitmap = std::array<uint32_t, MAX_VDMA_ENGINES_COUNT>;
 
-#if defined(__linux__) || defined(_MSC_VER)
+#if defined(__linux__) || defined(_WIN32)
 // Unique handle returned from the driver.
 using vdma_mapped_buffer_driver_identifier = uintptr_t;
 #elif defined(__QNX__)
@@ -93,7 +96,7 @@ using vdma_mapped_buffer_driver_identifier = uintptr_t;
 using vdma_mapped_buffer_driver_identifier = int;
 #else
 #error "unsupported platform!"
-#endif // defined(__linux__) || defined(_MSC_VER)
+#endif
 
 struct DescriptorsListInfo {
     uintptr_t handle; // Unique identifier for the driver.
@@ -109,6 +112,25 @@ struct ContinousBufferInfo {
     void *user_address;
 };
 
+enum class InterruptsDomain
+{
+    NONE    = 0,
+    DEVICE  = 1 << 0,
+    HOST    = 1 << 1,
+    BOTH    = DEVICE | HOST
+};
+
+inline InterruptsDomain operator|(InterruptsDomain a, InterruptsDomain b)
+{
+    return static_cast<InterruptsDomain>(static_cast<int>(a) | static_cast<int>(b));
+}
+
+inline InterruptsDomain& operator|=(InterruptsDomain &a, InterruptsDomain b)
+{
+    a = a | b;
+    return a;
+}
+
 class HailoRTDriver final
 {
 public:
@@ -157,23 +179,13 @@ class HailoRTDriver final
 
     static Expected<std::unique_ptr<HailoRTDriver>> create(const DeviceInfo &device_info);
 
-// TODO: HRT-7309 add implementation for Windows
-#if defined(__linux__) || defined(__QNX__)
-    hailo_status hailo_ioctl(int fd, unsigned long request, void* request_struct, int &error_status);
-#endif // defined(__linux__) || defined(__QNX__)
+    ~HailoRTDriver();
 
     static Expected<std::vector<DeviceInfo>> scan_devices();
 
     hailo_status read_memory(MemoryType memory_type, uint64_t address, void *buf, size_t size);
     hailo_status write_memory(MemoryType memory_type, uint64_t address, const void *buf, size_t size);
 
-    Expected<uint32_t> read_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, size_t offset,
-        size_t reg_size);
-    hailo_status write_vdma_channel_register(vdma::ChannelId channel_id, DmaDirection data_direction, size_t offset,
-        size_t reg_size, uint32_t data);
-
-    hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaSyncDirection sync_direction, size_t offset, size_t count);
-
     hailo_status vdma_interrupts_enable(const ChannelsBitmap &channels_bitmap, bool enable_timestamps_measure);
     hailo_status vdma_interrupts_disable(const ChannelsBitmap &channel_id);
     Expected<IrqData> vdma_interrupts_wait(const ChannelsBitmap &channels_bitmap);
@@ -214,15 +226,20 @@ class HailoRTDriver final
     * Unmaps user buffer mapped using HailoRTDriver::map_buffer.
     */
     hailo_status vdma_buffer_unmap(VdmaBufferHandle handle);
+    hailo_status vdma_buffer_unmap(void *user_address, size_t size, DmaDirection data_direction);
+
+    hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaSyncDirection sync_direction, size_t offset, size_t count);
 
     /**
      * Allocate vdma descriptors list object that can bind to some buffer. Used for scatter gather vdma.
      *
-     * @param[in] desc_count - number of descriptors to allocate. The descriptor max size is DESC_MAX_SIZE.
+     * @param[in] desc_count - number of descriptors to allocate. The descriptor max size is desc_page_size.
+     * @param[in] desc_page_size - maximum size of each descriptor. Must be a power of 2.
      * @param[in] is_circular - if true, the descriptors list can be used in a circular (and desc_count must be power
      *                          of 2)
      */
-    Expected<DescriptorsListInfo> descriptors_list_create(size_t desc_count, bool is_circular);
+    Expected<DescriptorsListInfo> descriptors_list_create(size_t desc_count, uint16_t desc_page_size,
+        bool is_circular);
 
     /**
      * Frees a vdma descriptors buffer allocated by 'descriptors_list_create'.
@@ -233,7 +250,21 @@ class HailoRTDriver final
      * Configure vdma channel descriptors to point to the given user address.
      */
     hailo_status descriptors_list_bind_vdma_buffer(uintptr_t desc_handle, VdmaBufferHandle buffer_handle,
-        uint16_t desc_page_size, uint8_t channel_index, uint32_t starting_desc);
+        size_t buffer_size, size_t buffer_offset, uint8_t channel_index,
+        uint32_t starting_desc);
+
+    struct TransferBuffer {
+        VdmaBufferHandle buffer_handle;
+        size_t offset;
+        size_t size;
+    };
+
+    /**
+     * Launches some transfer on the given channel.
+     */
+    Expected<uint32_t> launch_transfer(vdma::ChannelId channel_id, uintptr_t desc_handle,
+        uint32_t starting_desc, const std::vector<TransferBuffer> &transfer_buffer, bool should_bind,
+        InterruptsDomain first_desc_interrupts, InterruptsDomain last_desc_interrupts);
 
     Expected<uintptr_t> vdma_low_memory_buffer_alloc(size_t size);
     hailo_status vdma_low_memory_buffer_free(uintptr_t buffer_handle);
@@ -297,12 +328,21 @@ class HailoRTDriver final
     static const uintptr_t INVALID_DRIVER_BUFFER_HANDLE_VALUE;
     static const size_t INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE;
     static const uint8_t INVALID_VDMA_CHANNEL_INDEX;
+    static const vdma_mapped_buffer_driver_identifier INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER;
 
 private:
+    template<typename PointerType>
+    int run_ioctl(uint32_t ioctl_code, PointerType param);
+
     hailo_status read_memory_ioctl(MemoryType memory_type, uint64_t address, void *buf, size_t size);
     hailo_status write_memory_ioctl(MemoryType memory_type, uint64_t address, const void *buf, size_t size);
 
-    Expected<std::pair<uintptr_t, uint64_t>> descriptors_list_create_ioctl(size_t desc_count, bool is_circular);
+    Expected<VdmaBufferHandle> vdma_buffer_map_ioctl(void *user_address, size_t required_size,
+        DmaDirection data_direction, const vdma_mapped_buffer_driver_identifier &driver_buff_handle);
+    hailo_status vdma_buffer_unmap_ioctl(VdmaBufferHandle handle);
+
+    Expected<std::pair<uintptr_t, uint64_t>> descriptors_list_create_ioctl(size_t desc_count, uint16_t desc_page_size,
+        bool is_circular);
     hailo_status descriptors_list_release_ioctl(uintptr_t desc_handle);
     Expected<void *> descriptors_list_create_mmap(uintptr_t desc_handle, size_t desc_count);
     hailo_status descriptors_list_create_munmap(void *address, size_t desc_count);
@@ -346,6 +386,20 @@ class HailoRTDriver final
     // Need to refactor the driver lock mechanism and then remove the mutex from here.
     std::mutex m_driver_lock;
 #endif
+
+    // TODO HRT-11937: when ioctl is combined, move caching to driver
+    struct MappedBufferInfo {
+        VdmaBufferHandle handle;
+        void *address;
+        DmaDirection direction;
+        size_t size;
+        vdma_mapped_buffer_driver_identifier driver_buff_handle;
+        size_t mapped_count;
+    };
+
+    std::mutex m_mapped_buffer_lock;
+    std::list<MappedBufferInfo> m_mapped_buffer;
+
 };
 
 inline hailo_dma_buffer_direction_t to_hailo_dma_direction(HailoRTDriver::DmaDirection dma_direction)
@@ -356,6 +410,14 @@ inline hailo_dma_buffer_direction_t to_hailo_dma_direction(HailoRTDriver::DmaDir
                                                                   HAILO_DMA_BUFFER_DIRECTION_MAX_ENUM;
 }
 
+inline HailoRTDriver::DmaDirection to_hailo_driver_direction(hailo_dma_buffer_direction_t dma_direction)
+{
+    assert(dma_direction <= HAILO_DMA_BUFFER_DIRECTION_BOTH);
+    return (dma_direction == HAILO_DMA_BUFFER_DIRECTION_H2D)  ? HailoRTDriver::DmaDirection::H2D :
+           (dma_direction == HAILO_DMA_BUFFER_DIRECTION_D2H)  ? HailoRTDriver::DmaDirection::D2H :
+                                                                HailoRTDriver::DmaDirection::BOTH;
+}
+
 } /* namespace hailort */
 
 #endif  /* _HAILORT_DRIVER_HPP_ */
diff --git a/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp b/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp
new file mode 100644
index 00000000..01a40d66
--- /dev/null
+++ b/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp
@@ -0,0 +1,71 @@
+/**
+ * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+**/
+/**
+ * @file driver_os_specific.hpp
+ * @brief Contains some functions for hailort driver which have OS specific implementation.
+ **/
+
+#ifndef _HAILO_DRIVER_OS_SPECIFIC_HPP_
+#define _HAILO_DRIVER_OS_SPECIFIC_HPP_
+
+#include "hailo/expected.hpp"
+#include "os/file_descriptor.hpp"
+#include "vdma/driver/hailort_driver.hpp"
+
+#ifdef _WIN32
+#include "hailo_ioctl_common.h" //  for tCompatibleHailoIoctlData
+#endif
+
+namespace hailort
+{
+
+Expected<FileDescriptor> open_device_file(const std::string &path);
+Expected<std::vector<std::string>> list_devices();
+Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_name);
+
+#ifndef _WIN32
+
+// Runs the ioctl, returns errno value (or 0 on success)
+int run_hailo_ioctl(underlying_handle_t file, uint32_t ioctl_code, void *param);
+
+#else /* _WIN32 */
+
+/**
+ * On windows, all IOCTLs shares the same structure for input and output (tCompatibleHailoIoctlData).
+ * To make windows and posix code the same, we need to convert the actual structure type (for example
+ * hailo_memory_transfer_params) to the compatible structure (tCompatibleHailoIoctlData::Buffer::MemoryTransfer).
+ *
+ * This template static class is used to covert to compatible (for input parameters) and from compatible (for output
+ * parameters).
+ */
+template<typename PointerType>
+class WindowsIoctlParamCast final {
+public:
+    static tCompatibleHailoIoctlData to_compatible(PointerType param_ptr);
+    static void from_compatible(const tCompatibleHailoIoctlData& data, PointerType param_ptr);
+};
+
+
+int run_ioctl_compatible_data(underlying_handle_t file, uint32_t ioctl_code, tCompatibleHailoIoctlData& data);
+
+// Runs the ioctl, returns GetLastError() value (or 0 on success)
+template<typename PointerType>
+int run_hailo_ioctl(underlying_handle_t file, uint32_t ioctl_code, PointerType param)
+{
+    static_assert(
+        (std::is_pointer<PointerType>::value) || (std::is_same<PointerType, nullptr_t>::value),
+        "run_ioctl is accepting only pointer or nullptr_t as param");
+
+    tCompatibleHailoIoctlData data = WindowsIoctlParamCast<PointerType>::to_compatible(param);
+    int result = run_ioctl_compatible_data(file, ioctl_code, data);
+    WindowsIoctlParamCast<PointerType>::from_compatible(data, param);
+    return result;
+}
+
+#endif
+
+} /* namespace hailort */
+
+#endif /* _HAILO_DRIVER_OS_SPECIFIC_HPP_ */
diff --git a/hailort/libhailort/src/os/posix/linux/driver_scan.cpp b/hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp
similarity index 76%
rename from hailort/libhailort/src/os/posix/linux/driver_scan.cpp
rename to hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp
index 6ba7daed..f66c6e2c 100644
--- a/hailort/libhailort/src/os/posix/linux/driver_scan.cpp
+++ b/hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp
@@ -3,14 +3,21 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file driver_scan.cpp
- * @brief Parse pcie driver sysfs
+ * @file driver_os_specific.cpp
+ * @brief Implementation for linux.
  **/
 
-#include "os/driver_scan.hpp"
+#include "vdma/driver/os/driver_os_specific.hpp"
+
+#include "common/utils.hpp"
+
 #include <stdarg.h>
 #include <dirent.h>
 #include <fstream>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
 
 namespace hailort
 {
@@ -18,6 +25,13 @@ namespace hailort
 #define HAILO_CLASS_PATH ("/sys/class/hailo_chardev")
 #define HAILO_BOARD_LOCATION_FILENAME ("board_location")
 
+Expected<FileDescriptor> open_device_file(const std::string &path)
+{
+    int fd = open(path.c_str(), O_RDWR);
+    CHECK(fd >= 0, HAILO_DRIVER_FAIL,
+        "Failed to open device file {} with error {}", path, errno);
+    return FileDescriptor(fd);
+}
 
 Expected<std::vector<std::string>> list_devices()
 {
@@ -66,4 +80,9 @@ Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_
     return device_info;
 }
 
+int run_hailo_ioctl(underlying_handle_t file, uint32_t ioctl_code, void *param) {
+    int res = ioctl(file, ioctl_code, param);
+    return (res < 0) ? errno : 0;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/os/posix/qnx/driver_scan.cpp b/hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp
similarity index 75%
rename from hailort/libhailort/src/os/posix/qnx/driver_scan.cpp
rename to hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp
index 5c422ebd..99dc2bf0 100644
--- a/hailort/libhailort/src/os/posix/qnx/driver_scan.cpp
+++ b/hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp
@@ -3,12 +3,17 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file driver_scan.cpp
- * @brief Get list and parse pcie driver info
+ * @file driver_os_specific.cpp
+ * @brief Implementation for QNX.
  **/
 
-#include "os/driver_scan.hpp"
+#include "vdma/driver/os/driver_os_specific.hpp"
 #include <dirent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+
 extern "C" {
 #include <pci/pci.h>
 }
@@ -21,6 +26,14 @@ namespace hailort
 // Every device name will start with "hailo"
 #define HAILO_PCIE_DEVICE_NAME_PREFIX ("hailo")
 
+Expected<FileDescriptor> open_device_file(const std::string &path)
+{
+    int fd = open(path.c_str(), O_RDWR);
+    CHECK(fd >= 0, HAILO_DRIVER_FAIL,
+        "Failed to open device file {} with error {}", path, errno);
+    return FileDescriptor(fd);
+}
+
 Expected<std::vector<std::string>> list_devices()
 {
     DIR *dir_iter = opendir(HAILO_PCIE_CLASS_PATH);
@@ -54,11 +67,12 @@ Expected<std::vector<std::string>> list_devices()
     return devices;
 }
 
-Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_name, uint32_t index)
+Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_name)
 {
     HailoRTDriver::DeviceInfo dev_info = {};
 
-    // pci_device_find finds all relevant devices - find specific using index
+    // Multiple devices not supported on QNX
+    const auto index = 0;
     pci_bdf_t pci_dev = pci_device_find(index, HAILO_VENDOR_ID, PCI_DID_ANY, PCI_CCODE_ANY);
     if (PCI_BDF_NONE == pci_dev) {
         LOGGER__ERROR("Error finding relevant device");
@@ -71,4 +85,9 @@ Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_
     return dev_info;
 }
 
+int run_hailo_ioctl(underlying_handle_t file, uint32_t ioctl_code, void *param) {
+    int res = ioctl(file, ioctl_code, param);
+    return (res < 0) ? -res : 0;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/os/windows/driver_scan.cpp b/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
similarity index 54%
rename from hailort/libhailort/src/os/windows/driver_scan.cpp
rename to hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
index cec1bb6c..04e8c239 100644
--- a/hailort/libhailort/src/os/windows/driver_scan.cpp
+++ b/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
@@ -3,16 +3,17 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file driver_scan.cpp
- * @brief Get list and parse pcie driver info
+ * @file driver_os_specific.cpp
+ * @brief Implementation for windows.
  */
 
+#include "vdma/driver/os/driver_os_specific.hpp"
+
 #include "os/windows/osdep.hpp"
 #include "common/logger_macros.hpp"
 #include "common/utils.hpp"
 #include "common/os/windows/string_conversion.hpp"
-#include "os/driver_scan.hpp"
-#include "../../../../drivers/win/include/Public.h"
+#include "hailo_ioctl_common.h"
 
 namespace hailort
 {
@@ -140,6 +141,22 @@ CDeviceInterfaceProperty::CDeviceInterfaceProperty(
     PostProcess(cr);
 }
 
+Expected<FileDescriptor> open_device_file(const std::string &dev_path)
+{
+    auto handle = CreateFileA(
+        dev_path.c_str(),
+        GENERIC_READ | GENERIC_WRITE,
+        FILE_SHARE_READ,
+        NULL,
+        OPEN_EXISTING,
+        FILE_FLAG_OVERLAPPED,
+        NULL);
+    CHECK(handle != INVALID_HANDLE_VALUE, HAILO_DRIVER_FAIL, "Failed creating hailo driver file {}, error  {}",
+        dev_path, GetLastError());
+
+    return FileDescriptor(handle);
+}
+
 Expected<std::vector<std::string>> list_devices()
 {
     GUID guid = GUID_DEVINTERFACE_HailoKM;
@@ -205,4 +222,85 @@ Expected<HailoRTDriver::DeviceInfo> query_device_info(const std::string &device_
     return device_info;
 }
 
+/**
+ * To reduce boilerplate code, we use the COMPATIBLE_PARAM_CAST macro to generate the template specialization for each
+ * parameter type. The macro accept the struct type and its member name in the compatible structure.
+ */
+#define COMPATIBLE_PARAM_CAST(ParamType, NameInCompatible) \
+    template<>                                                                                                     \
+    tCompatibleHailoIoctlData WindowsIoctlParamCast<ParamType *>::to_compatible(ParamType * param_ptr) {           \
+        tCompatibleHailoIoctlData data{};                                                                          \
+        data.Buffer.NameInCompatible = *(param_ptr);                                                               \
+        return data;                                                                                               \
+    }                                                                                                              \
+                                                                                                                   \
+    template<>                                                                                                     \
+    void WindowsIoctlParamCast<ParamType *>::from_compatible(const tCompatibleHailoIoctlData &data,                \
+        ParamType *param_ptr) {                                                                                    \
+        *(param_ptr) = data.Buffer.NameInCompatible;                                                               \
+    }
+
+COMPATIBLE_PARAM_CAST(hailo_memory_transfer_params, MemoryTransfer);
+COMPATIBLE_PARAM_CAST(hailo_vdma_interrupts_enable_params, VdmaInterruptsEnable)
+COMPATIBLE_PARAM_CAST(hailo_vdma_interrupts_disable_params, VdmaInterruptsDisable)
+COMPATIBLE_PARAM_CAST(hailo_vdma_interrupts_read_timestamp_params, VdmaInterruptsReadTimestamps)
+COMPATIBLE_PARAM_CAST(hailo_vdma_interrupts_wait_params, VdmaInterruptsWait)
+COMPATIBLE_PARAM_CAST(hailo_vdma_buffer_sync_params, VdmaBufferSync)
+COMPATIBLE_PARAM_CAST(hailo_fw_control, FirmwareControl)
+COMPATIBLE_PARAM_CAST(hailo_vdma_buffer_map_params, VdmaBufferMap)
+COMPATIBLE_PARAM_CAST(hailo_vdma_buffer_unmap_params, VdmaBufferUnmap)
+COMPATIBLE_PARAM_CAST(hailo_desc_list_create_params, DescListCreate)
+COMPATIBLE_PARAM_CAST(hailo_desc_list_release_params, DescListReleaseParam)
+COMPATIBLE_PARAM_CAST(hailo_desc_list_bind_vdma_buffer_params, DescListBind)
+COMPATIBLE_PARAM_CAST(hailo_d2h_notification, D2HNotification)
+COMPATIBLE_PARAM_CAST(hailo_device_properties, DeviceProperties)
+COMPATIBLE_PARAM_CAST(hailo_driver_info, DriverInfo)
+COMPATIBLE_PARAM_CAST(hailo_non_linux_desc_list_mmap_params, DescListMmap)
+COMPATIBLE_PARAM_CAST(hailo_read_log_params, ReadLog)
+COMPATIBLE_PARAM_CAST(hailo_mark_as_in_use_params, MarkAsInUse)
+COMPATIBLE_PARAM_CAST(hailo_vdma_launch_transfer_params, LaunchTransfer)
+
+// Special handle for nullptr_t. This case occurs when there is no parameters passed.
+tCompatibleHailoIoctlData WindowsIoctlParamCast<nullptr_t>::to_compatible(nullptr_t data)
+{
+    (void) data;
+    return tCompatibleHailoIoctlData{};
+}
+
+void WindowsIoctlParamCast<nullptr_t>::from_compatible(const tCompatibleHailoIoctlData &compatible, nullptr_t data)
+{
+    (void) compatible;
+    (void) data;
+}
+
+int run_ioctl_compatible_data(underlying_handle_t file, uint32_t ioctl_code, tCompatibleHailoIoctlData& data)
+{
+    data.Parameters.u.value = ioctl_code;
+    FileDescriptor event = CreateEvent(NULL, true, false, NULL);
+    if (event == nullptr) {
+        const auto last_error = GetLastError();
+        LOGGER__ERROR("Failed creating event {}", last_error);
+        return static_cast<errno_t>(last_error);
+    }
+
+    OVERLAPPED overlapped{};
+    RtlZeroMemory(&overlapped, sizeof(overlapped));
+    overlapped.hEvent = event;
+
+    ULONG returned = 0;
+    bool res = DeviceIoControl(file, HAILO_IOCTL_COMPATIBLE, &data, sizeof(data),
+                               &data, sizeof(data), &returned, &overlapped);
+    if (!res) {
+        ULONG last_error = GetLastError();
+        if (last_error != ERROR_IO_PENDING) {
+            return static_cast<errno_t>(last_error);
+        }
+        if (!GetOverlappedResult(file, &overlapped, &returned, true)) {
+            return static_cast<errno_t>(GetLastError());
+        }
+    }
+
+    return 0;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
index 24c0c72b..ec27e22b 100644
--- a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
+++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
@@ -8,6 +8,7 @@
 
 #include "buffer_requirements.hpp"
 #include "vdma/memory/descriptor_list.hpp"
+#include "vdma/memory/continuous_edge_layer.hpp"
 #include "utils.h"
 
 #include <numeric>
@@ -15,66 +16,43 @@
 namespace hailort {
 namespace vdma {
 
-// Minimum size of ccb buffers in descriptors, taken from the CCB spec.
-static constexpr uint32_t MIN_CCB_DESCRIPTORS_COUNT = 16;
-
-Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_requirements_single_transfer(
-    uint16_t max_desc_page_size, uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size,
-    bool is_circular, const bool force_default_page_size, const bool force_batch_size, const bool is_vdma_aligned_buffer)
-{
-    // First, get the result for the min size
-    auto results = get_sg_buffer_requirements_multiple_transfers(max_desc_page_size, min_batch_size,
-        {transfer_size}, is_circular, force_default_page_size, force_batch_size);
-    CHECK_EXPECTED(results);
-
-    // In order to fetch all descriptors, the amount of active descs is lower by one that the amount
-    // of descs given  (Otherwise we won't be able to determine if the buffer is empty or full).
-    // Therefore we add 1 in order to compensate.
-    uint32_t descs_per_transfer = DIV_ROUND_UP(transfer_size, results->desc_page_size());
-    if (!is_vdma_aligned_buffer) {
-        // Add desc for boundary channel because might need extra descriptor for user non aligned buffer async API
-        descs_per_transfer++;
-    }
-    uint32_t descs_count = std::min((descs_per_transfer * max_batch_size) + 1, MAX_DESCS_COUNT);
-    if (is_circular) {
-        descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT);
-    }
-
-    return BufferSizesRequirements{ descs_count, results->desc_page_size() };
-}
-
-Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_requirements_multiple_transfers(
-    uint16_t max_desc_page_size, uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes,
-    bool is_circular, const bool force_default_page_size, const bool force_batch_size)
+Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requirements_multiple_transfers(
+    vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, uint16_t batch_size,
+    const std::vector<uint32_t> &transfer_sizes, bool is_circular, bool force_default_page_size,
+    bool force_batch_size)
 {
-    const uint16_t initial_desc_page_size = find_initial_desc_page_size(transfer_sizes, max_desc_page_size, force_default_page_size);
-
-    CHECK_AS_EXPECTED(max_desc_page_size <= MAX_DESC_PAGE_SIZE, HAILO_INTERNAL_FAILURE,
+    const uint32_t MAX_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
+        MAX_SG_DESCS_COUNT : MAX_CCB_DESCS_COUNT;
+    const uint32_t MIN_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
+        MIN_SG_DESCS_COUNT : MIN_CCB_DESCS_COUNT;
+    const uint16_t MAX_PAGE_SIZE = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
+        MAX_SG_PAGE_SIZE : MAX_CCB_PAGE_SIZE;
+    const uint16_t MIN_PAGE_SIZE = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
+        MIN_SG_PAGE_SIZE : MIN_CCB_PAGE_SIZE;
+
+    const uint16_t initial_desc_page_size = find_initial_desc_page_size(buffer_type, transfer_sizes, max_desc_page_size,
+        force_default_page_size, MIN_PAGE_SIZE);
+
+    CHECK_AS_EXPECTED(max_desc_page_size <= MAX_PAGE_SIZE, HAILO_INTERNAL_FAILURE,
         "max_desc_page_size given {} is bigger than hw max desc page size {}",
-            max_desc_page_size, MAX_DESC_PAGE_SIZE);
-    CHECK_AS_EXPECTED(MIN_DESC_PAGE_SIZE <= max_desc_page_size, HAILO_INTERNAL_FAILURE,
+            max_desc_page_size, MAX_PAGE_SIZE);
+    CHECK_AS_EXPECTED(MIN_PAGE_SIZE <= max_desc_page_size, HAILO_INTERNAL_FAILURE,
         "max_desc_page_size given {} is lower that hw min desc page size {}",
-            max_desc_page_size, MIN_DESC_PAGE_SIZE);
+            max_desc_page_size, MIN_PAGE_SIZE);
 
-    const uint16_t min_desc_page_size = MIN_DESC_PAGE_SIZE;
     CHECK_AS_EXPECTED(initial_desc_page_size <= max_desc_page_size, HAILO_INTERNAL_FAILURE,
         "Initial descriptor page size ({}) is larger than maximum descriptor page size ({})",
         initial_desc_page_size, max_desc_page_size);
-    CHECK_AS_EXPECTED(initial_desc_page_size >= min_desc_page_size, HAILO_INTERNAL_FAILURE,
+    CHECK_AS_EXPECTED(initial_desc_page_size >= MIN_PAGE_SIZE, HAILO_INTERNAL_FAILURE,
         "Initial descriptor page size ({}) is smaller than minimum descriptor page size ({})",
-        initial_desc_page_size, min_desc_page_size);
+        initial_desc_page_size, MIN_PAGE_SIZE);
     CHECK_AS_EXPECTED(MAX_DESCS_COUNT >= get_required_descriptor_count(transfer_sizes, max_desc_page_size),
-        HAILO_OUT_OF_DESCRIPTORS,
-        "Network shapes exceeds driver descriptors capabilities."
-        "Minimal descriptors count: {}, max allowed on the driver: {}."
-        "(A common cause for this error could be the large transfer size - which is {}).",
-        get_required_descriptor_count(transfer_sizes, max_desc_page_size), (MAX_DESCS_COUNT - 1),
-        std::accumulate(transfer_sizes.begin(), transfer_sizes.end(), 0));
+        HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
 
     // Defined as uint32_t to prevent overflow (as we multiply it by two in each iteration of the while loop bellow)
-    uint32_t local_desc_page_size = initial_desc_page_size;
+    auto local_desc_page_size = static_cast<uint32_t>(initial_desc_page_size);
 
-    uint32_t descs_count = get_required_descriptor_count(transfer_sizes, initial_desc_page_size);
+    auto descs_count = get_required_descriptor_count(transfer_sizes, initial_desc_page_size);
     // Too many descriptors; try a larger desc_page_size which will lead to less descriptors used
     while ((descs_count * batch_size) > (MAX_DESCS_COUNT - 1)) {
         CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size << 1), HAILO_INTERNAL_FAILURE,
@@ -83,11 +61,7 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_require
 
         if (local_desc_page_size > max_desc_page_size) {
             if (force_batch_size) {
-                LOGGER__ERROR("Network shapes and batch size exceeds driver descriptors capabilities. "
-                "Required descriptors count: {}, max allowed on the driver: {}. "
-                "(A common cause for this error could be the batch size - which is {}).",
-                (batch_size * descs_count), (MAX_DESCS_COUNT - 1), batch_size);
-                return make_unexpected(HAILO_OUT_OF_DESCRIPTORS);
+                return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
             } else {
                 // If not forcing minimum batch (It's acceptable to run infer on lower batch instead of returning error)
                 // once reached over the max page size, stop
@@ -110,44 +84,63 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_sg_buffer_require
         // The length of a descriptor list is always a power of 2. Therefore, on circular buffers the hw will have to
         // access all descriptors.
         descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT);
-        CHECK_AS_EXPECTED(descs_count <= MAX_DESCS_COUNT, HAILO_OUT_OF_DESCRIPTORS);
+        CHECK_AS_EXPECTED(descs_count <= MAX_DESCS_COUNT, HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
     }
 
     return BufferSizesRequirements{descs_count, desc_page_size};
 }
 
-Expected<BufferSizesRequirements> BufferSizesRequirements::get_ccb_buffer_requirements_single_transfer(uint16_t batch_size,
-    uint32_t transfer_size, bool is_circular)
+Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requirements_single_transfer(
+    vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, uint16_t min_batch_size, uint16_t max_batch_size,
+    uint32_t transfer_size, bool is_circular, bool force_default_page_size, bool force_batch_size, bool is_vdma_aligned_buffer)
 {
-    const uint16_t desc_page_size = DEFAULT_DESC_PAGE_SIZE;
-    const auto desc_per_transfer = DIV_ROUND_UP(transfer_size, desc_page_size);
-    auto descs_count = desc_per_transfer * batch_size;
-    descs_count = std::max(descs_count, MIN_CCB_DESCRIPTORS_COUNT);
+    const uint32_t MAX_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
+        MAX_SG_DESCS_COUNT : MAX_CCB_DESCS_COUNT;
+    const uint32_t MIN_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
+        MIN_SG_DESCS_COUNT : MIN_CCB_DESCS_COUNT;
+
+    // First, get the result for the min size
+    auto results = get_buffer_requirements_multiple_transfers(buffer_type, max_desc_page_size,
+        min_batch_size, {transfer_size}, is_circular, force_default_page_size, force_batch_size);
+    if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == results.status()) {
+        // In case of failure to meet requirements, return without error printed to the prompt.
+        return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
+    }
+    CHECK_EXPECTED(results);
+
+    uint32_t descs_per_transfer = DIV_ROUND_UP(transfer_size, results->desc_page_size());
+    if (!is_vdma_aligned_buffer) {
+        // Add desc for boundary channel because might need extra descriptor for user non aligned buffer async API
+        descs_per_transfer++;
+    }
+
+    // In order to fetch all descriptors, the amount of active descs is lower by one that the amount
+    // of descs given  (Otherwise we won't be able to determine if the buffer is empty or full).
+    // Therefore we add 1 in order to compensate.
+    uint32_t descs_count = std::min((descs_per_transfer * max_batch_size) + 1, static_cast<uint32_t>(MAX_DESCS_COUNT));
+    descs_count = std::max(descs_count, MIN_DESCS_COUNT);
     if (is_circular) {
-        // The first 12 channels in D2H CCB ("regular channels") requires that the amount of descriptors will be a power
-        // of 2.
-        // We can optimize it by checking that channel index is one of the last 4 channels ("enhanced channels"), or
-        // even allocate those indexes.
-        // Meanwhile however, we always use power of 2
-        descs_count = get_nearest_powerof_2(descs_count, MIN_CCB_DESCRIPTORS_COUNT);
+        descs_count = get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT);
     }
 
-    return BufferSizesRequirements{descs_count, desc_page_size};
+    return BufferSizesRequirements{ descs_count, results->desc_page_size() };
 }
 
-
-uint16_t BufferSizesRequirements::find_initial_desc_page_size(const std::vector<uint32_t> &transfer_sizes,
-    const uint16_t max_desc_page_size, const bool force_default_page_size)
+uint16_t BufferSizesRequirements::find_initial_desc_page_size(
+    vdma::VdmaBuffer::Type buffer_type, const std::vector<uint32_t> &transfer_sizes,
+    uint16_t max_desc_page_size, bool force_default_page_size, uint16_t min_page_size)
 {
-    const uint16_t channel_max_page_size = std::min(DEFAULT_DESC_PAGE_SIZE, max_desc_page_size);
+    static const uint16_t DEFAULT_PAGE_SIZE = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
+        DEFAULT_SG_PAGE_SIZE : DEFAULT_CCB_PAGE_SIZE;
+    const uint16_t channel_max_page_size = std::min(DEFAULT_PAGE_SIZE, max_desc_page_size);
     const auto max_transfer_size = *std::max_element(transfer_sizes.begin(), transfer_sizes.end());
-    // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_DESC_PAGE_SIZE
-    //       is the optimal value. For transfer_sizes smaller than DEFAULT_DESC_PAGE_SIZE using smaller descriptor page
-    //       sizes will save memory consuption without harming performance. In the case of nms for example, only one bbox
-    //       is copied from each page. Hence, we'll use MIN_DESC_PAGE_SIZE for nms.
+    // Note: If the pages pointed to by the descriptors are copied in their entirety, then DEFAULT_PAGE_SIZE
+    //       is the optimal value. For transfer_sizes smaller than DEFAULT_PAGE_SIZE using smaller descriptor page
+    //       sizes will save memory consumption without harming performance. In the case of nms for example, only one bbox
+    //       is copied from each page. Hence, we'll use min_page_size for nms.
     const auto optimize_low_page_size = ((channel_max_page_size > max_transfer_size) && !force_default_page_size);
     const uint16_t initial_desc_page_size = optimize_low_page_size ?
-        static_cast<uint16_t>(get_nearest_powerof_2(max_transfer_size, MIN_DESC_PAGE_SIZE)) :
+        static_cast<uint16_t>(get_nearest_powerof_2(max_transfer_size, min_page_size)) :
         channel_max_page_size;
     if (channel_max_page_size != initial_desc_page_size) {
         LOGGER__INFO("Using non-default initial_desc_page_size of {}, due to a small transfer size ({})",
diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
index c709887f..7d1126a8 100644
--- a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
+++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
@@ -12,6 +12,7 @@
 #define _HAILO_BUFFER_REQUIREMENTS_HPP_
 
 #include "hailo/expected.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
 
 #include <cstdint>
 #include <cassert>
@@ -35,19 +36,19 @@ class BufferSizesRequirements final {
     uint16_t desc_page_size() const { return m_desc_page_size; }
     uint32_t buffer_size() const { return m_descs_count * m_desc_page_size; }
 
-    static Expected<BufferSizesRequirements> get_sg_buffer_requirements_single_transfer(uint16_t max_desc_page_size,
-        uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, bool is_circular,
-        const bool force_default_page_size, const bool force_batch_size, const bool is_vdma_aligned_buffer);
-    static Expected<BufferSizesRequirements> get_sg_buffer_requirements_multiple_transfers(uint16_t max_desc_page_size,
+    static Expected<BufferSizesRequirements> get_buffer_requirements_multiple_transfers(
+        vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size,
         uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes, bool is_circular,
-        const bool force_default_page_size, const bool force_batch_size);
+        bool force_default_page_size, bool force_batch_size);
 
-    static Expected<BufferSizesRequirements> get_ccb_buffer_requirements_single_transfer(uint16_t batch_size,
-        uint32_t transfer_size, bool is_circular);
+    static Expected<BufferSizesRequirements> get_buffer_requirements_single_transfer(
+        vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size,
+        uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, bool is_circular,
+        bool force_default_page_size, bool force_batch_size, bool is_vdma_aligned_buffer);
 
 private:
-    static uint16_t find_initial_desc_page_size(const std::vector<uint32_t> &transfer_sizes, const uint16_t max_desc_page_size,
-        const bool force_default_page_size);
+    static uint16_t find_initial_desc_page_size(vdma::VdmaBuffer::Type buffer_type, const std::vector<uint32_t> &transfer_sizes,
+        uint16_t max_desc_page_size, bool force_default_page_size, uint16_t min_page_size);
     static uint32_t get_required_descriptor_count(const std::vector<uint32_t> &transfer_sizes, uint16_t desc_page_size);
 
     const uint32_t m_descs_count;
diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
index f975fe17..c2443f05 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
@@ -3,25 +3,20 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file continuous_buffer.hpp
+ * @file continuous_buffer.cpp
  * @brief Continuous physical vdma buffer.
  **/
 
 #include "continuous_buffer.hpp"
 
-/* TODO - Support non default CCB page sizes */
-#define CCB_PAGE_SIZE (512)
-#define MAX_PAGES_PER_INTERRUPT (0x0003FFFF)
-#define MAX_CCB_BUFFER_SIZE (CCB_PAGE_SIZE * MAX_PAGES_PER_INTERRUPT)
-
 namespace hailort {
 namespace vdma {
 
 Expected<ContinuousBuffer> ContinuousBuffer::create(size_t size, HailoRTDriver &driver)
 {
-    if (size > MAX_CCB_BUFFER_SIZE) {
-        LOGGER__INFO("continious memory size {} must be smaller/equal to {}.", size, MAX_CCB_BUFFER_SIZE);
-        return make_unexpected(HAILO_OUT_OF_HOST_CMA_MEMORY);
+    if (size < MIN_CCB_PAGE_SIZE * MIN_CCB_DESCS_COUNT) {
+        LOGGER__ERROR("continuous memory size ({}) must be larger/equal to {}.", size, (MIN_CCB_PAGE_SIZE * MIN_CCB_DESCS_COUNT));
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
     }
 
     auto result = driver.vdma_continuous_buffer_alloc(size);
@@ -56,17 +51,6 @@ uint64_t ContinuousBuffer::dma_address() const
     return m_buffer_info.dma_address;
 }
 
-uint16_t ContinuousBuffer::desc_page_size() const
-{
-    // Currently we support only the default desc page size, TODO: HRT-5381 support more desc page size?
-    return DEFAULT_DESC_PAGE_SIZE;
-}
-
-uint32_t ContinuousBuffer::descs_count() const
-{
-    return descriptors_in_buffer(m_buffer_info.size);
-}
-
 hailo_status ContinuousBuffer::read(void *buf_dst, size_t count, size_t offset)
 {
     CHECK((count + offset) <= m_buffer_info.size, HAILO_INSUFFICIENT_BUFFER,
@@ -87,17 +71,8 @@ hailo_status ContinuousBuffer::write(const void *buf_src, size_t count, size_t o
     return HAILO_SUCCESS;
 }
 
-Expected<uint32_t> ContinuousBuffer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-    size_t desc_offset)
-{
-    (void)last_desc_interrupts_domain;
-    (void)desc_offset;
-
-    // The descriptors in continuous mode are programmed by the hw, nothing to do here.
-    return descriptors_in_buffer(transfer_size);
-}
-
-ContinuousBuffer::ContinuousBuffer(HailoRTDriver &driver, const ContinousBufferInfo &buffer_info) :
+ContinuousBuffer::ContinuousBuffer(HailoRTDriver &driver,
+        const ContinousBufferInfo &buffer_info) :
     m_driver(driver),
     m_buffer_info(buffer_info)
 {}
diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
index a4c109b2..1ad50ac7 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
@@ -3,17 +3,22 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file continuous_buffer.hpp
- * @brief Continuous physical vdma buffer. 
+ * @file continuous_edge_layer.hpp
+ * @brief Continuous physical vdma edge layer.
  **/
 
 #ifndef _HAILO_VDMA_CONTINUOUS_BUFFER_HPP_
 #define _HAILO_VDMA_CONTINUOUS_BUFFER_HPP_
 
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 #include "os/mmap_buffer.hpp"
 #include "vdma/memory/vdma_buffer.hpp"
 
+#define MAX_CCB_DESCS_COUNT (0x00040000)
+#define MIN_CCB_DESCS_COUNT (16u)
+#define MAX_CCB_PAGE_SIZE (4096)
+#define MIN_CCB_PAGE_SIZE (512)
+#define DEFAULT_CCB_PAGE_SIZE (512)
 
 namespace hailort {
 namespace vdma {
@@ -41,16 +46,10 @@ class ContinuousBuffer final : public VdmaBuffer {
     }
 
     virtual size_t size() const override;
-    virtual uint64_t dma_address() const override;
-    virtual uint16_t desc_page_size() const override;
-    virtual uint32_t descs_count() const override;
-
     virtual hailo_status read(void *buf_dst, size_t count, size_t offset) override;
     virtual hailo_status write(const void *buf_src, size_t count, size_t offset) override;
 
-    virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-        size_t desc_offset) override;
-
+    uint64_t dma_address() const;
 private:
     ContinuousBuffer(HailoRTDriver &driver, const ContinousBufferInfo &buffer_info);
 
diff --git a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp
new file mode 100644
index 00000000..d3feb008
--- /dev/null
+++ b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp
@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file continuous_edge_layer.cpp
+ * @brief Continuous physical vdma edge layer.
+ **/
+
+#include "continuous_edge_layer.hpp"
+
+namespace hailort {
+namespace vdma {
+
+Expected<ContinuousEdgeLayer> ContinuousEdgeLayer::create(std::shared_ptr<ContinuousBuffer> &&buffer, size_t size, size_t offset,
+    uint16_t page_size, uint32_t num_pages)
+{
+    if (num_pages > MAX_CCB_DESCS_COUNT) {
+        LOGGER__INFO("continuous memory number of pages {} must be smaller/equal to {}.", num_pages, MAX_CCB_DESCS_COUNT);
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    if (page_size > MAX_CCB_PAGE_SIZE) {
+        LOGGER__INFO("continuous memory page size {} must be smaller/equal to {}.", page_size, MAX_CCB_PAGE_SIZE);
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    if (buffer->size() < offset + size) {
+        LOGGER__ERROR("Edge layer is not fully inside the connected buffer. buffer size is {} while edge layer offset {} and size {}",
+            buffer->size(), offset, size);
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    return ContinuousEdgeLayer(std::move(buffer), size, offset, page_size, num_pages);
+}
+
+uint64_t ContinuousEdgeLayer::dma_address() const
+{
+    return (std::dynamic_pointer_cast<ContinuousBuffer>(m_buffer))->dma_address() + m_offset;
+}
+
+uint16_t ContinuousEdgeLayer::desc_page_size() const
+{
+    return m_page_size;
+}
+
+uint32_t ContinuousEdgeLayer::descs_count() const
+{
+    return m_num_pages;
+}
+
+Expected<uint32_t> ContinuousEdgeLayer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
+    size_t desc_offset)
+{
+    (void)last_desc_interrupts_domain;
+    (void)desc_offset;
+
+    // The descriptors in continuous mode are programmed by the hw, nothing to do here.
+    return descriptors_in_buffer(transfer_size);
+}
+
+ContinuousEdgeLayer::ContinuousEdgeLayer(std::shared_ptr<ContinuousBuffer> &&buffer, size_t size, size_t offset,
+        uint16_t page_size, uint32_t num_pages) :
+    VdmaEdgeLayer(std::move(buffer), size, offset),
+    m_page_size(page_size),
+    m_num_pages(num_pages)
+{}
+
+}; /* namespace vdma */
+}; /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp
new file mode 100644
index 00000000..515c4f6e
--- /dev/null
+++ b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp
@@ -0,0 +1,57 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file continuous_edge_layer.hpp
+ * @brief Continuous physical vdma edge layer.
+ **/
+
+#ifndef _HAILO_VDMA_CONTINUOUS_EDGE_LAYER_HPP_
+#define _HAILO_VDMA_CONTINUOUS_EDGE_LAYER_HPP_
+
+#include "vdma/driver/hailort_driver.hpp"
+#include "os/mmap_buffer.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
+#include "vdma/memory/continuous_buffer.hpp"
+
+
+namespace hailort {
+namespace vdma {
+
+class ContinuousEdgeLayer final : public VdmaEdgeLayer {
+public:
+    static Expected<ContinuousEdgeLayer> create(std::shared_ptr<ContinuousBuffer> &&buffer, size_t size, size_t offset,
+        uint16_t page_size, uint32_t num_pages);
+
+    virtual ~ContinuousEdgeLayer() = default;
+
+    ContinuousEdgeLayer(const ContinuousEdgeLayer &) = delete;
+    ContinuousEdgeLayer(ContinuousEdgeLayer &&) = default;
+    ContinuousEdgeLayer& operator=(const ContinuousEdgeLayer &) = delete;
+    ContinuousEdgeLayer& operator=(ContinuousEdgeLayer &&) = delete;
+
+    virtual Type type() const override
+    {
+        return Type::CONTINUOUS;
+    }
+
+    virtual uint64_t dma_address() const override;
+    virtual uint16_t desc_page_size() const override;
+    virtual uint32_t descs_count() const override;
+
+    virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
+        size_t desc_offset) override;
+
+private:
+    ContinuousEdgeLayer(std::shared_ptr<ContinuousBuffer> &&buffer, size_t size, size_t offset,
+        uint16_t page_size, uint32_t num_pages);
+
+    const uint16_t m_page_size;
+    const uint32_t m_num_pages;
+};
+
+}; /* namespace vdma */
+}; /* namespace hailort */
+
+#endif /* _HAILO_VDMA_CONTINUOUS_EDGE_LAYER_HPP_ */
diff --git a/hailort/libhailort/src/vdma/memory/descriptor_list.cpp b/hailort/libhailort/src/vdma/memory/descriptor_list.cpp
index baf39dd3..2452cb4f 100644
--- a/hailort/libhailort/src/vdma/memory/descriptor_list.cpp
+++ b/hailort/libhailort/src/vdma/memory/descriptor_list.cpp
@@ -37,8 +37,8 @@ Expected<DescriptorList> DescriptorList::create(uint32_t desc_count, uint16_t de
     hailo_status status = HAILO_UNINITIALIZED;
     assert(desc_page_size <= driver.desc_max_page_size());
 
-    CHECK_AS_EXPECTED(desc_count <= MAX_DESCS_COUNT, HAILO_INVALID_ARGUMENT,
-        "descs_count {} must be smaller/equal to {}", desc_count, MAX_DESCS_COUNT);
+    CHECK_AS_EXPECTED(desc_count <= MAX_SG_DESCS_COUNT, HAILO_INVALID_ARGUMENT,
+        "descs_count {} must be smaller/equal to {}", desc_count, MAX_SG_DESCS_COUNT);
 
     DescriptorList object(desc_count, desc_page_size, is_circular, driver, status);
     if (HAILO_SUCCESS != status) {
@@ -63,7 +63,7 @@ DescriptorList::DescriptorList(uint32_t desc_count, uint16_t desc_page_size, boo
         return;
     }
 
-    auto desc_list_info = m_driver.descriptors_list_create(desc_count, m_is_circular);
+    auto desc_list_info = m_driver.descriptors_list_create(desc_count, m_desc_page_size, m_is_circular);
     if (!desc_list_info) {
         status = desc_list_info.status();
         return;
@@ -96,15 +96,16 @@ DescriptorList::DescriptorList(DescriptorList &&other) noexcept :
     m_desc_list_info.user_address = std::exchange(other.m_desc_list_info.user_address, nullptr);
 }
 
-hailo_status DescriptorList::configure_to_use_buffer(MappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc)
+hailo_status DescriptorList::configure_to_use_buffer(MappedBuffer& buffer, size_t buffer_size,
+    size_t buffer_offset, ChannelId channel_id, uint32_t starting_desc)
 {
     const auto desc_list_capacity = m_desc_page_size * count();
-    CHECK(buffer.size() <= desc_list_capacity, HAILO_INVALID_ARGUMENT,
+    CHECK(buffer_size <= desc_list_capacity, HAILO_INVALID_ARGUMENT,
         "Can't bind a buffer larger than the descriptor list's capacity. Buffer size {}, descriptor list capacity {}",
-        buffer.size(), desc_list_capacity);
+        buffer_size, desc_list_capacity);
 
-    return m_driver.descriptors_list_bind_vdma_buffer(m_desc_list_info.handle, buffer.handle(), m_desc_page_size,
-        channel_id.channel_index, starting_desc);
+    return m_driver.descriptors_list_bind_vdma_buffer(m_desc_list_info.handle, buffer.handle(), buffer_size, 
+        buffer_offset, channel_id.channel_index, starting_desc);
 }
 
 Expected<uint16_t> DescriptorList::program_last_descriptor(size_t transfer_size,
@@ -123,7 +124,7 @@ Expected<uint16_t> DescriptorList::program_last_descriptor(size_t transfer_size,
     auto resuide = transfer_size - (required_descriptors - 1) * m_desc_page_size;
     assert(IS_FIT_IN_UINT16(resuide));
     size_t last_desc = (desc_offset + required_descriptors - 1) % count();
-    program_single_descriptor((*this)[last_desc], static_cast<uint16_t>(resuide), last_desc_interrupts_domain);
+    program_single_descriptor(last_desc, static_cast<uint16_t>(resuide), last_desc_interrupts_domain);
 
     return std::move(static_cast<uint16_t>(required_descriptors));
 }
@@ -145,9 +146,9 @@ uint32_t DescriptorList::calculate_descriptors_count(uint32_t buffer_size, uint1
     // of descs given  (Otherwise we won't be able to determine if the buffer is empty or full).
     // Therefore we add 1 in order to compensate.
     uint32_t descs_count = std::min(((descriptors_in_buffer(buffer_size, desc_page_size) * batch_size) + 1),
-        MAX_DESCS_COUNT);
+        MAX_SG_DESCS_COUNT);
 
-    return get_nearest_powerof_2(descs_count, MIN_DESCS_COUNT);
+    return get_nearest_powerof_2(descs_count, MIN_SG_DESCS_COUNT);
 }
 
 uint32_t DescriptorList::get_interrupts_bitmask(InterruptsDomain interrupts_domain)
@@ -179,9 +180,11 @@ uint32_t DescriptorList::get_interrupts_bitmask(InterruptsDomain interrupts_doma
     return bitmask;
 }
 
-void DescriptorList::program_single_descriptor(VdmaDescriptor &descriptor, uint16_t page_size,
+void DescriptorList::program_single_descriptor(size_t desc_index, uint16_t page_size,
     InterruptsDomain interrupts_domain)
 {
+    auto &descriptor = (*this)[desc_index];
+
     // Update the descriptor's PAGE_SIZE field in the control register with the maximum size of the DMA page.
     // Make all edits to the local variable local_pagesize_desc_ctrl that is on the stack to save read/writes to DDR
     auto local_pagesize_desc_ctrl = static_cast<uint32_t>(page_size << DESC_PAGE_SIZE_SHIFT) & DESC_PAGE_SIZE_MASK;
@@ -203,11 +206,5 @@ void DescriptorList::program_single_descriptor(VdmaDescriptor &descriptor, uint1
 #endif
 }
 
-void DescriptorList::clear_descriptor(const size_t desc_index)
-{
-    // Clear previous descriptor properties
-    program_single_descriptor((*this)[desc_index], m_desc_page_size, InterruptsDomain::NONE);
-}
-
 } /* namespace vdma */
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/memory/descriptor_list.hpp b/hailort/libhailort/src/vdma/memory/descriptor_list.hpp
index de3715ef..7f8222a4 100644
--- a/hailort/libhailort/src/vdma/memory/descriptor_list.hpp
+++ b/hailort/libhailort/src/vdma/memory/descriptor_list.hpp
@@ -17,8 +17,8 @@
 
 #include "vdma/channel/channel_id.hpp"
 #include "vdma/memory/mapped_buffer.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 
-#include "os/hailort_driver.hpp"
 #include "os/mmap_buffer.hpp"
 
 
@@ -26,14 +26,14 @@ namespace hailort {
 namespace vdma {
 
 
-#define MAX_DESCS_COUNT (64 * 1024u)
-#define MIN_DESCS_COUNT (2u)
+#define MAX_SG_DESCS_COUNT (64 * 1024u)
+#define MIN_SG_DESCS_COUNT (2u)
 #define DEFAULT_DESC_COUNT (64 * 1024u)
 
-static_assert(is_powerof2(MAX_DESCS_COUNT), "MAX_DESCS_COUNT must be a power of 2");
-static_assert(is_powerof2(MIN_DESCS_COUNT), "MIN_DESCS_COUNT must be a power of 2");
+static_assert(is_powerof2(MAX_SG_DESCS_COUNT), "MAX_SG_DESCS_COUNT must be a power of 2");
+static_assert(is_powerof2(MIN_SG_DESCS_COUNT), "MIN_SG_DESCS_COUNT must be a power of 2");
 static_assert(is_powerof2(DEFAULT_DESC_COUNT), "DEFAULT_DESC_COUNT must be a power of 2");
-static_assert(DEFAULT_DESC_COUNT <= MAX_DESCS_COUNT && DEFAULT_DESC_COUNT >= MIN_DESCS_COUNT,
+static_assert(DEFAULT_DESC_COUNT <= MAX_SG_DESCS_COUNT && DEFAULT_DESC_COUNT >= MIN_SG_DESCS_COUNT,
     "DEFAULT_DESC_COUNT not in range");
 
 // From PLDA's vDMA controller reference:
@@ -42,16 +42,16 @@ static_assert(DEFAULT_DESC_COUNT <= MAX_DESCS_COUNT && DEFAULT_DESC_COUNT >= MIN
 // - G_PAGE_SIZE_MAX dictates the maximum desc page size:
 //     max_page_size = 2 ^ (G_PAGE_SIZE_MAX - 1)
 //   In our case max_page_size = 2 ^ (13 - 1) = 4096
-static constexpr uint16_t MIN_DESC_PAGE_SIZE = 64;
-static constexpr uint16_t MAX_DESC_PAGE_SIZE = 4096;
-static constexpr uint16_t DEFAULT_DESC_PAGE_SIZE = 512;
+static constexpr uint16_t MIN_SG_PAGE_SIZE = 64;
+static constexpr uint16_t MAX_SG_PAGE_SIZE = 4096;
+static constexpr uint16_t DEFAULT_SG_PAGE_SIZE = 512;
 
-static_assert(is_powerof2(MIN_DESC_PAGE_SIZE), "MIN_DESC_PAGE_SIZE must be a power of 2");
-static_assert(MIN_DESC_PAGE_SIZE > 0, "MIN_DESC_PAGE_SIZE must be larger then 0");
-static_assert(is_powerof2(MAX_DESC_PAGE_SIZE), "MAX_DESC_PAGE_SIZE must be a power of 2");
-static_assert(MAX_DESC_PAGE_SIZE > 0, "MAX_DESC_PAGE_SIZE must be larger then 0");
-static_assert(is_powerof2(DEFAULT_DESC_PAGE_SIZE), "DEFAULT_DESC_PAGE_SIZE must be a power of 2");
-static_assert(DEFAULT_DESC_PAGE_SIZE > 0, "DEFAULT_DESC_PAGE_SIZE must be larger then 0");
+static_assert(is_powerof2(MIN_SG_PAGE_SIZE), "MIN_SG_PAGE_SIZE must be a power of 2");
+static_assert(MIN_SG_PAGE_SIZE > 0, "MIN_SG_PAGE_SIZE must be larger then 0");
+static_assert(is_powerof2(MAX_SG_PAGE_SIZE), "MAX_SG_PAGE_SIZE must be a power of 2");
+static_assert(MAX_SG_PAGE_SIZE > 0, "MAX_SG_PAGE_SIZE must be larger then 0");
+static_assert(is_powerof2(DEFAULT_SG_PAGE_SIZE), "DEFAULT_SG_PAGE_SIZE must be a power of 2");
+static_assert(DEFAULT_SG_PAGE_SIZE > 0, "DEFAULT_SG_PAGE_SIZE must be larger then 0");
 
 
 static constexpr auto DESCRIPTOR_STATUS_MASK = 0xFF;
@@ -87,25 +87,6 @@ struct VdmaDescriptor
 
 static_assert(SIZE_OF_SINGLE_DESCRIPTOR == sizeof(VdmaDescriptor), "Invalid size of descriptor");
 
-enum class InterruptsDomain
-{
-    NONE    = 0,
-    DEVICE  = 1 << 0,
-    HOST    = 1 << 1,
-    BOTH    = DEVICE | HOST
-};
-
-inline InterruptsDomain operator|(InterruptsDomain a, InterruptsDomain b)
-{
-    return static_cast<InterruptsDomain>(static_cast<int>(a) | static_cast<int>(b));
-}
-
-inline InterruptsDomain& operator|=(InterruptsDomain &a, InterruptsDomain b)
-{
-    a = a | b;
-    return a;
-}
-
 inline bool host_interuptes_enabled(InterruptsDomain interrupts_domain)
 {
     return 0 != (static_cast<uint32_t>(interrupts_domain) & static_cast<uint32_t>(InterruptsDomain::HOST));
@@ -164,14 +145,14 @@ class DescriptorList
 
     // Map descriptors starting at offset to the start of buffer, wrapping around the descriptor list as needed
     // On hailo8, we allow configuring buffer without specific channel index (default is INVALID_VDMA_CHANNEL_INDEX).
-    hailo_status configure_to_use_buffer(MappedBuffer& buffer, ChannelId channel_id, uint32_t starting_desc = 0);
+    hailo_status configure_to_use_buffer(MappedBuffer& buffer, size_t buffer_size, size_t buffer_offset,
+        ChannelId channel_id, uint32_t starting_desc = 0);
     // All descritors are initialized to have size of m_desc_page_size - so all we do is set the last descritor for the
     // Interrupt - and then after transfer has finished clear the previously used first and last decsriptors.
     // This saves us write/ reads to the desscriptor list which is DMA memory.
     Expected<uint16_t> program_last_descriptor(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
         size_t desc_offset);
-    void program_single_descriptor(VdmaDescriptor &descriptor, uint16_t page_size, InterruptsDomain interrupts_domain);
-    void clear_descriptor(const size_t desc_index);
+    void program_single_descriptor(size_t desc_index, uint16_t page_size, InterruptsDomain interrupts_domain);
 
     uint32_t descriptors_in_buffer(size_t buffer_size) const;
     static uint32_t descriptors_in_buffer(size_t buffer_size, uint16_t desc_page_size);
diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
index a03b2e04..e76860f5 100644
--- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
@@ -24,8 +24,6 @@
 namespace hailort {
 namespace vdma {
 
-#if defined(__linux__) || defined(_MSC_VER)
-
 // User buffer. This class does not own the buffer.
 class UserAllocatedDmaAbleBuffer : public DmaAbleBuffer {
 public:
@@ -52,13 +50,14 @@ class UserAllocatedDmaAbleBuffer : public DmaAbleBuffer {
 
     virtual size_t size() const override { return m_size; }
     virtual void *user_address() override { return m_user_address; }
-    virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; }
+    virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER; }
 
 private:
     const size_t m_size;
     void *m_user_address;
 };
 
+#if defined(__linux__) || defined(_MSC_VER)
 
 #if defined(__linux__)
 class PageAlignedDmaAbleBuffer : public DmaAbleBuffer {
@@ -80,7 +79,7 @@ class PageAlignedDmaAbleBuffer : public DmaAbleBuffer {
 
     virtual void* user_address() override { return m_mmapped_buffer.address(); }
     virtual size_t size() const override { return m_mmapped_buffer.size(); }
-    virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; }
+    virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER; }
 
 private:
     // Using mmap instead of aligned_alloc to enable MEM_SHARE flag - used for multi-process fork.
@@ -106,7 +105,7 @@ class PageAlignedDmaAbleBuffer : public DmaAbleBuffer {
 
     virtual size_t size() const override { return m_memory_guard.size(); }
     virtual void *user_address() override { return m_memory_guard.address(); }
-    virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_DRIVER_BUFFER_HANDLE_VALUE; }
+    virtual vdma_mapped_buffer_driver_identifier buffer_identifier() override { return HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER; }
 
 private:
     VirtualAllocGuard m_memory_guard;
@@ -252,11 +251,9 @@ class SharedMemoryDmaAbleBuffer : public DmaAbleBuffer {
     MmapBuffer<void> m_mmapped_buffer;
 };
 
-Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_from_user_address(void */* user_address */, size_t /* size */)
+Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_from_user_address(void *user_address, size_t size)
 {
-    LOGGER__ERROR("Mapping user address is not supported on QNX");
-
-    return make_unexpected(HAILO_NOT_SUPPORTED);
+    return UserAllocatedDmaAbleBuffer::create(user_address, size);
 }
 
 Expected<DmaAbleBufferPtr> DmaAbleBuffer::create_by_allocation(size_t size)
diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
index 0123e62b..79f56aa6 100644
--- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
@@ -19,7 +19,7 @@
 #define _HAILO_DMA_ABLE_BUFFER_HPP_
 
 #include "hailo/expected.hpp"
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 #include "os/mmap_buffer.hpp"
 
 namespace hailort {
diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
index 884c15e4..1c2e8fae 100644
--- a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
@@ -18,11 +18,11 @@
  *         so we need to allocate the pages in driver.
  **/
 
-#ifndef _HAILO_DMA_MAPPED_BUFFER_HPP_
-#define _HAILO_DMA_MAPPED_BUFFER_HPP_
+#ifndef _HAILO_VDMA_MAPPED_BUFFER_HPP_
+#define _HAILO_VDMA_MAPPED_BUFFER_HPP_
 
 #include "hailo/expected.hpp"
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 #include "vdma/memory/dma_able_buffer.hpp"
 
 #include <memory>
@@ -98,4 +98,4 @@ class MappedBuffer final
 } /* namespace vdma */
 } /* namespace hailort */
 
-#endif /* _HAILO_DMA_MAPPED_BUFFER_HPP_ */
\ No newline at end of file
+#endif /* _HAILO_VDMA_MAPPED_BUFFER_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/vdma/memory/mapping_manager.cpp b/hailort/libhailort/src/vdma/memory/mapping_manager.cpp
deleted file mode 100644
index ba2a21d0..00000000
--- a/hailort/libhailort/src/vdma/memory/mapping_manager.cpp
+++ /dev/null
@@ -1,99 +0,0 @@
-/**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file mapping_manager.cpp
- * @brief DMA mapping registry on a given device
- **/
-
-#include "mapping_manager.hpp"
-#include "hailo/hailort.h"
-
-namespace hailort {
-namespace vdma {
-
-MappingManager::MappingManager(HailoRTDriver &driver) :
-    m_driver(driver),
-    m_mutex(),
-    m_h2d_mappings(),
-    m_d2h_mappings()
-{}
-
-hailo_status MappingManager::map_buffer(void *address, size_t size, hailo_stream_direction_t direction)
-{
-    static const auto CREATE_DMAABLE_BUFFER = nullptr;
-    auto mapping_result = try_dma_map(CREATE_DMAABLE_BUFFER, address, size, direction);
-    CHECK_EXPECTED_AS_STATUS(mapping_result);
-
-    const auto new_mapping = mapping_result->second;
-    return new_mapping ? HAILO_SUCCESS : HAILO_DMA_MAPPING_ALREADY_EXISTS;
-}
-
-hailo_status MappingManager::unmap_buffer(void *address, hailo_stream_direction_t direction)
-{
-    auto &mappings = get_mapping_storage(direction);
-    std::lock_guard<std::mutex> lock_guard(m_mutex);
-    auto it = mappings.find(address);
-    if (it == mappings.end()) {
-        LOGGER__TRACE("Buffer {} not mapped in direction {}", address, direction);
-        return HAILO_NOT_FOUND;
-    }
-
-    mappings.erase(it);
-    return HAILO_SUCCESS;
-}
-
-Expected<std::pair<MappedBufferPtr, bool>> MappingManager::try_dma_map(DmaAbleBufferPtr buffer,
-    hailo_stream_direction_t direction)
-{
-    CHECK_ARG_NOT_NULL_AS_EXPECTED(buffer);
-
-    return try_dma_map(buffer, buffer->user_address(), buffer->size(), direction);
-}
-
-Expected<std::pair<MappedBufferPtr, bool>> MappingManager::try_dma_map(DmaAbleBufferPtr buffer,
-    void *address, size_t size, hailo_stream_direction_t direction)
-{
-    assert((nullptr == buffer) || ((buffer->user_address() == address) && (buffer->size() == size)));
-    CHECK_ARG_NOT_NULL_AS_EXPECTED(address);
-    CHECK_AS_EXPECTED(0 < size, HAILO_INVALID_ARGUMENT);
-    CHECK_AS_EXPECTED(HAILO_STREAM_DIRECTION_MAX_ENUM > direction, HAILO_INVALID_ARGUMENT);
-
-    auto &mappings = get_mapping_storage(direction);
-    std::lock_guard<std::mutex> lock_guard(m_mutex);
-    if (mappings.end() != mappings.find(address)) {
-        // Mapping exists
-        return std::make_pair(mappings[address], false);
-    }
-
-    // New mapping
-    if (nullptr == buffer) {
-        // We only want to create a dma-able buffer if the address hasn't been mapped and we haven't gotten
-        // a dma-able buffer from the user
-        auto buffer_exp = DmaAbleBuffer::create_from_user_address(address, size);
-        CHECK_EXPECTED(buffer_exp);
-        buffer = buffer_exp.release();
-    }
-
-    const auto data_direction = (direction == HAILO_H2D_STREAM) ?
-        HailoRTDriver::DmaDirection::H2D :
-        HailoRTDriver::DmaDirection::D2H;
-    auto mapped_buffer = MappedBuffer::create_shared(buffer, m_driver, data_direction);
-    CHECK_EXPECTED(mapped_buffer);
-
-    mappings[address] = mapped_buffer.release();
-
-    return std::make_pair(mappings[address], true);
-}
-
-std::unordered_map<void *, MappedBufferPtr> &MappingManager::get_mapping_storage(hailo_stream_direction_t direction)
-{
-    // No point in failing if direction is invalid (i.e. HAILO_STREAM_DIRECTION_MAX_ENUM),
-    // because the direction is checked before mappings are added (see try_dma_map). So an invalid direction
-    // will result in the mapping not being found
-    return (direction == HAILO_H2D_STREAM) ? m_h2d_mappings : m_d2h_mappings;
-}
-
-} /* namespace vdma */
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/memory/mapping_manager.hpp b/hailort/libhailort/src/vdma/memory/mapping_manager.hpp
deleted file mode 100644
index a211f9df..00000000
--- a/hailort/libhailort/src/vdma/memory/mapping_manager.hpp
+++ /dev/null
@@ -1,54 +0,0 @@
-/**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file mapping_manager.hpp
- * @brief DMA mapping registry on a given device
- **/
-
-#ifndef _HAILO_MAPPING_MANAGER_HPP_
-#define _HAILO_MAPPING_MANAGER_HPP_
-
-#include "hailo/hailort.h"
-#include "vdma/memory/mapped_buffer.hpp"
-#include "os/hailort_driver.hpp"
-
-#include <mutex>
-#include <unordered_map>
-#include <memory>
-
-namespace hailort {
-namespace vdma {
-
-class MappingManager final
-{
-public:
-    MappingManager(HailoRTDriver &driver);
-    MappingManager(MappingManager &&) = delete;
-    MappingManager(const MappingManager &) = delete;
-    MappingManager &operator=(MappingManager &&) = delete;
-    MappingManager &operator=(const MappingManager &) = delete;
-    ~MappingManager() = default;
-
-    hailo_status map_buffer(void *address, size_t size, hailo_stream_direction_t direction);
-    hailo_status unmap_buffer(void *address, hailo_stream_direction_t direction);
-    // Returns (MappedBufferPtr, true) if the mapping is new
-    // Returns (MappedBufferPtr, false) if the mapping is pre-existing
-    Expected<std::pair<MappedBufferPtr, bool>> try_dma_map(DmaAbleBufferPtr buffer, hailo_stream_direction_t direction);
-
-private:
-    inline std::unordered_map<void *, MappedBufferPtr> &get_mapping_storage(hailo_stream_direction_t direction);
-    Expected<std::pair<MappedBufferPtr, bool>> try_dma_map(DmaAbleBufferPtr buffer, void *address, size_t size,
-        hailo_stream_direction_t direction);
-
-    HailoRTDriver &m_driver;
-    std::mutex m_mutex;
-    std::unordered_map<void *, MappedBufferPtr> m_h2d_mappings;
-    std::unordered_map<void *, MappedBufferPtr> m_d2h_mappings;
-};
-
-} /* namespace vdma */
-} /* namespace hailort */
-
-#endif /* _HAILO_mapping_manager_HPP_ */
diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
index 5ef0132b..50fe63e6 100644
--- a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
@@ -3,45 +3,26 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file vdma_sg_buffer.cpp
+ * @file sg_buffer.cpp
  * @brief Scatter-gather vdma buffer.
  **/
 
 #include "vdma/memory/sg_buffer.hpp"
-#include "vdma/channel/channel_id.hpp"
 
 
 namespace hailort {
 namespace vdma {
 
-Expected<SgBuffer> SgBuffer::create(HailoRTDriver &driver, size_t size, uint32_t desc_count, uint16_t desc_page_size,
-    bool is_circular, HailoRTDriver::DmaDirection data_direction, ChannelId channel_id)
+Expected<SgBuffer> SgBuffer::create(HailoRTDriver &driver, size_t size, HailoRTDriver::DmaDirection data_direction)
 {
-    CHECK_AS_EXPECTED(size <= (desc_count * desc_page_size), HAILO_INTERNAL_FAILURE,
-        "Requested buffer size {} must be smaller than {}", size, (desc_count * desc_page_size));
-    CHECK_AS_EXPECTED((size % desc_page_size) == 0, HAILO_INTERNAL_FAILURE,
-        "SgBuffer size must be a multiple of descriptors page size (size {})", size);
-
     auto mapped_buffer = MappedBuffer::create_shared_by_allocation(size, driver, data_direction);
     CHECK_EXPECTED(mapped_buffer);
 
-    auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, is_circular, driver);
-    CHECK_EXPECTED(desc_list_exp);
-
-    auto desc_list = make_shared_nothrow<DescriptorList>(desc_list_exp.release());
-    CHECK_NOT_NULL_AS_EXPECTED(desc_list, HAILO_OUT_OF_HOST_MEMORY);
-
-    assert((desc_count * desc_page_size) <= std::numeric_limits<uint32_t>::max());
-
-    auto status = desc_list->configure_to_use_buffer(*mapped_buffer.value(), channel_id);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    return SgBuffer(mapped_buffer.release(), desc_list);
+    return SgBuffer(mapped_buffer.release());
 }
 
-SgBuffer::SgBuffer(std::shared_ptr<MappedBuffer> mapped_buffer, std::shared_ptr<DescriptorList> desc_list) :
-    m_mapped_buffer(mapped_buffer),
-    m_desc_list(desc_list)
+SgBuffer::SgBuffer(std::shared_ptr<MappedBuffer> mapped_buffer) :
+    m_mapped_buffer(mapped_buffer)
 {}
 
 size_t SgBuffer::size() const
@@ -49,21 +30,6 @@ size_t SgBuffer::size() const
     return m_mapped_buffer->size();
 }
 
-uint64_t SgBuffer::dma_address() const
-{
-    return m_desc_list->dma_address();
-}
-
-uint16_t SgBuffer::desc_page_size() const
-{
-    return m_desc_list->desc_page_size();
-}
-
-uint32_t SgBuffer::descs_count() const
-{
-    return static_cast<uint32_t>(m_desc_list->count());
-}
-
 hailo_status SgBuffer::read(void *buf_dst, size_t count, size_t offset)
 {
     return m_mapped_buffer->read(buf_dst, count, offset);
@@ -73,10 +39,9 @@ hailo_status SgBuffer::write(const void *buf_src, size_t count, size_t offset)
     return m_mapped_buffer->write(buf_src, count, offset);
 }
 
-Expected<uint32_t> SgBuffer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-    size_t desc_offset)
+std::shared_ptr<MappedBuffer> SgBuffer::get_mapped_buffer()
 {
-    return m_desc_list->program_last_descriptor(transfer_size, last_desc_interrupts_domain, desc_offset);
+    return m_mapped_buffer;
 }
 
 }
diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.hpp b/hailort/libhailort/src/vdma/memory/sg_buffer.hpp
index 38c6d45f..e7b2acd8 100644
--- a/hailort/libhailort/src/vdma/memory/sg_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/sg_buffer.hpp
@@ -6,16 +6,12 @@
  * @file sg_buffer.hpp
  * @brief Scatter-gather vdma buffer, from the user-mode point of view the buffer is continuous,
  *        but not from the physical-memory point of view.
- *        The sg buffer contains 2 parts:
- *              - MappedBuffer - the actual buffer stores the data.
- *              - Descriptors list - each descritpor points to a single "dma page" in the MappedBuffer.
- *        The hw accept the descriptors list address and parses it to get the actual data.
  **/
 
 #ifndef _HAILO_VDMA_SG_BUFFER_HPP_
 #define _HAILO_VDMA_SG_BUFFER_HPP_
 
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 #include "vdma/memory/vdma_buffer.hpp"
 #include "vdma/memory/descriptor_list.hpp"
 #include "vdma/memory/mapped_buffer.hpp"
@@ -26,8 +22,7 @@ namespace vdma {
 
 class SgBuffer final : public VdmaBuffer {
 public:
-    static Expected<SgBuffer> create(HailoRTDriver &driver, size_t size, uint32_t desc_count, uint16_t desc_page_size,
-        bool is_circular, HailoRTDriver::DmaDirection data_direction, vdma::ChannelId channel_id);
+    static Expected<SgBuffer> create(HailoRTDriver &driver, size_t size, HailoRTDriver::DmaDirection data_direction);
 
     virtual ~SgBuffer() = default;
 
@@ -42,22 +37,14 @@ class SgBuffer final : public VdmaBuffer {
     }
 
     virtual size_t size() const override;
-    virtual uint64_t dma_address() const override;
-    virtual uint16_t desc_page_size() const override;
-    virtual uint32_t descs_count() const override;
-
     virtual hailo_status read(void *buf_dst, size_t count, size_t offset) override;
     virtual hailo_status write(const void *buf_src, size_t count, size_t offset) override;
-
-    virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-        size_t desc_offset) override;
+    std::shared_ptr<MappedBuffer> get_mapped_buffer();
 
 private:
-    SgBuffer(std::shared_ptr<MappedBuffer> mapped_buffer, std::shared_ptr<DescriptorList> desc_list);
+    SgBuffer(std::shared_ptr<MappedBuffer> mapped_buffer);
 
-    // Initialization Dependency: The descriptor list points into the mapped buffer so it must be freed before it
     std::shared_ptr<MappedBuffer> m_mapped_buffer;
-    std::shared_ptr<DescriptorList> m_desc_list;
 };
 
 } /* vdma */
diff --git a/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp
new file mode 100644
index 00000000..371f52ba
--- /dev/null
+++ b/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp
@@ -0,0 +1,70 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file sg_edge_layer.cpp
+ * @brief Scatter-gather vdma edge layer.
+ **/
+
+#include "vdma/memory/sg_edge_layer.hpp"
+#include "vdma/channel/channel_id.hpp"
+
+
+namespace hailort {
+namespace vdma {
+
+Expected<SgEdgeLayer> SgEdgeLayer::create(std::shared_ptr<SgBuffer> &&buffer, size_t size, size_t offset,
+    HailoRTDriver &driver, uint32_t desc_count, uint16_t desc_page_size, bool is_circular, ChannelId channel_id)
+{
+    CHECK_AS_EXPECTED(size <= (desc_count * desc_page_size), HAILO_INTERNAL_FAILURE,
+        "Requested buffer size {} must be smaller or equal to {}", size, (desc_count * desc_page_size));
+    CHECK_AS_EXPECTED((size % desc_page_size) == 0, HAILO_INTERNAL_FAILURE,
+        "SgEdgeLayer size must be a multiple of descriptors page size (size {})", size);
+    CHECK_AS_EXPECTED((offset % desc_page_size) == 0, HAILO_INTERNAL_FAILURE,
+        "SgEdgeLayer offset must be a multiple of descriptors page size (offset {}. Page size {})", offset, desc_page_size);
+
+    CHECK_AS_EXPECTED(buffer->size() >= (offset + size), HAILO_INTERNAL_FAILURE,
+        "Edge layer is not fully inside the connected buffer. buffer size is {} while edge layer offset {} and size {}",
+        buffer->size(), offset, size);
+
+    auto desc_list_exp = DescriptorList::create(desc_count, desc_page_size, is_circular, driver);
+    CHECK_EXPECTED(desc_list_exp);
+
+    assert((desc_count * desc_page_size) <= std::numeric_limits<uint32_t>::max());
+
+    auto status = desc_list_exp->configure_to_use_buffer(*(buffer->get_mapped_buffer()), size , offset, channel_id);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    return SgEdgeLayer(std::move(buffer), desc_list_exp.release(), size, offset);
+}
+
+SgEdgeLayer::SgEdgeLayer(std::shared_ptr<SgBuffer> &&buffer, DescriptorList &&desc_list,
+        size_t size, size_t offset) :
+    VdmaEdgeLayer(std::move(buffer), size, offset),
+    m_desc_list(std::move(desc_list))
+{}
+
+uint64_t SgEdgeLayer::dma_address() const
+{
+    return m_desc_list.dma_address();
+}
+
+uint16_t SgEdgeLayer::desc_page_size() const
+{
+    return m_desc_list.desc_page_size();
+}
+
+uint32_t SgEdgeLayer::descs_count() const
+{
+    return static_cast<uint32_t>(m_desc_list.count());
+}
+
+Expected<uint32_t> SgEdgeLayer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
+    size_t desc_offset)
+{
+    return m_desc_list.program_last_descriptor(transfer_size, last_desc_interrupts_domain, desc_offset);
+}
+
+}
+}
\ No newline at end of file
diff --git a/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp
new file mode 100644
index 00000000..bd9716cc
--- /dev/null
+++ b/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp
@@ -0,0 +1,64 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file sg_edge_layer.hpp
+ * @brief Scatter-gather vdma buffer, from the user-mode point of view the buffer is continuous,
+ *        but not from the physical-memory point of view.
+ *        The sg buffer contains 2 parts:
+ *              - MappedBuffer - the actual buffer stores the data.
+ *              - Descriptors list - each descriptor points to a single "dma page" in the MappedBuffer.
+ *        The hw accept the descriptors list address and parses it to get the actual data.
+ **/
+
+#ifndef _HAILO_VDMA_SG_EDGE_LAYER_HPP_
+#define _HAILO_VDMA_SG_EDGE_LAYER_HPP_
+
+#include "vdma/driver/hailort_driver.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
+#include "vdma/memory/sg_buffer.hpp"
+#include "vdma/memory/descriptor_list.hpp"
+#include "vdma/memory/mapped_buffer.hpp"
+
+
+namespace hailort {
+namespace vdma {
+
+class SgEdgeLayer final : public VdmaEdgeLayer {
+public:
+    static Expected<SgEdgeLayer> create(std::shared_ptr<SgBuffer> &&buffer, size_t size, size_t offset,
+    HailoRTDriver &driver, uint32_t desc_count, uint16_t desc_page_size, bool is_circular, ChannelId channel_id);
+
+    virtual ~SgEdgeLayer() = default;
+
+    SgEdgeLayer(const SgEdgeLayer &) = delete;
+    SgEdgeLayer(SgEdgeLayer &&) = default;
+    SgEdgeLayer& operator=(const SgEdgeLayer &) = delete;
+    SgEdgeLayer& operator=(SgEdgeLayer &&) = delete;
+
+    virtual Type type() const override
+    {
+        return Type::SCATTER_GATHER;
+    }
+
+    virtual uint64_t dma_address() const override;
+    virtual uint16_t desc_page_size() const override;
+    virtual uint32_t descs_count() const override;
+
+    virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
+        size_t desc_offset) override;
+
+private:
+    SgEdgeLayer(std::shared_ptr<SgBuffer> &&buffer, DescriptorList &&desc_list,
+        size_t size, size_t offset);
+
+    // Initialization Dependency: The descriptor list points into the mapped buffer so it must be freed before it
+    std::shared_ptr<SgBuffer> m_buffer;
+    DescriptorList m_desc_list;
+};
+
+} /* vdma */
+} /* hailort */
+
+#endif /* _HAILO_VDMA_SG_EDGE_LAYER_HPP_ */
diff --git a/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp b/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp
index 97a00d90..ccb0e024 100644
--- a/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp
@@ -13,25 +13,5 @@
 namespace hailort {
 namespace vdma {
 
-CONTROL_PROTOCOL__host_buffer_info_t VdmaBuffer::get_host_buffer_info(uint32_t transfer_size)
-{
-    return get_host_buffer_info(type(), dma_address(), desc_page_size(), descs_count(), transfer_size);
-}
-
-CONTROL_PROTOCOL__host_buffer_info_t VdmaBuffer::get_host_buffer_info(Type type, uint64_t dma_address,
-    uint16_t desc_page_size, uint32_t desc_count, uint32_t transfer_size)
-{
-    CONTROL_PROTOCOL__host_buffer_info_t buffer_info{};
-    buffer_info.buffer_type = static_cast<uint8_t>((type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
-        CONTROL_PROTOCOL__HOST_BUFFER_TYPE_EXTERNAL_DESC : 
-        CONTROL_PROTOCOL__HOST_BUFFER_TYPE_CCB);
-    buffer_info.dma_address = dma_address;
-    buffer_info.desc_page_size = desc_page_size;
-    buffer_info.total_desc_count = desc_count;
-    buffer_info.bytes_in_pattern = transfer_size;
-
-    return buffer_info;
-}
-
 }
 }
\ No newline at end of file
diff --git a/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp b/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp
index 97e6e75d..763c4c55 100644
--- a/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp
@@ -3,15 +3,15 @@
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
- * @file vdma_buffer.hpp
- * @brief Abstract layer representing a vdma buffer (buffer that can be read/written to the device over vdma.)
- *        The buffer can be either non-continuous with attach descriptors list (SgBuffer) or continuous buffer.
+ * @file vdma_edge_layer.hpp
+ * @brief Abstract layer representing a vdma edge layer (buffer that can be read/written to the device over vdma.)
+ *        The buffer can be either non-continuous with attach descriptors list (SgEdgeLayer) or continuous buffer.
  **/
 
 #ifndef _HAILO_VDMA_VDMA_BUFFER_HPP_
 #define _HAILO_VDMA_VDMA_BUFFER_HPP_
 
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 #include "vdma/memory/descriptor_list.hpp"
 #include "control_protocol.h"
 
@@ -37,26 +37,8 @@ class VdmaBuffer {
 
     virtual Type type() const = 0;
     virtual size_t size() const = 0;
-    virtual uint64_t dma_address() const = 0;
-    virtual uint16_t desc_page_size() const = 0;
-    virtual uint32_t descs_count() const = 0;
-
-    uint32_t descriptors_in_buffer(size_t buffer_size) const
-    {
-        assert(buffer_size < std::numeric_limits<uint32_t>::max());
-        const auto page_size = desc_page_size();
-        return static_cast<uint32_t>(DIV_ROUND_UP(buffer_size, page_size));
-    }
-
     virtual hailo_status read(void *buf_dst, size_t count, size_t offset) = 0;
     virtual hailo_status write(const void *buf_src, size_t count, size_t offset) = 0;
-
-    virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-        size_t desc_offset) = 0;
-
-    CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(uint32_t transfer_size);
-    static CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(Type type, uint64_t dma_address,
-        uint16_t desc_page_size, uint32_t total_desc_count, uint32_t transfer_size);
 };
 
 } /* vdma */
diff --git a/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp
new file mode 100644
index 00000000..65f3425d
--- /dev/null
+++ b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp
@@ -0,0 +1,52 @@
+/**
+ * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file vdma_edge_layer.cpp
+ * @brief vdma edge layer.
+ **/
+
+#include "vdma_edge_layer.hpp"
+#include "control_protocol.h"
+
+namespace hailort {
+namespace vdma {
+
+VdmaEdgeLayer::VdmaEdgeLayer(std::shared_ptr<VdmaBuffer> &&buffer, const size_t size, const size_t offset) :
+    m_buffer(std::move(buffer)),
+    m_size(size),
+    m_offset(offset)
+{}
+
+CONTROL_PROTOCOL__host_buffer_info_t VdmaEdgeLayer::get_host_buffer_info(uint32_t transfer_size)
+{
+    return get_host_buffer_info(type(), dma_address(), desc_page_size(), descs_count(), transfer_size);
+}
+
+CONTROL_PROTOCOL__host_buffer_info_t VdmaEdgeLayer::get_host_buffer_info(Type type, uint64_t dma_address,
+    uint16_t desc_page_size, uint32_t desc_count, uint32_t transfer_size)
+{
+    CONTROL_PROTOCOL__host_buffer_info_t buffer_info{};
+    buffer_info.buffer_type = static_cast<uint8_t>((type == vdma::VdmaEdgeLayer::Type::SCATTER_GATHER) ?
+        CONTROL_PROTOCOL__HOST_BUFFER_TYPE_EXTERNAL_DESC : 
+        CONTROL_PROTOCOL__HOST_BUFFER_TYPE_CCB);
+    buffer_info.dma_address = dma_address;
+    buffer_info.desc_page_size = desc_page_size;
+    buffer_info.total_desc_count = desc_count;
+    buffer_info.bytes_in_pattern = transfer_size;
+
+    return buffer_info;
+}
+
+hailo_status VdmaEdgeLayer::read(void *buf_dst, size_t count, size_t offset)
+{
+    return m_buffer->read(buf_dst, count, m_offset + offset);
+}
+hailo_status VdmaEdgeLayer::write(const void *buf_src, size_t count, size_t offset)
+{
+    return m_buffer->write(buf_src, count, m_offset + offset);
+}
+
+}
+}
\ No newline at end of file
diff --git a/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp
new file mode 100644
index 00000000..8814f260
--- /dev/null
+++ b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp
@@ -0,0 +1,74 @@
+/**
+ * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file vdma_edge_layer.hpp
+ * @brief Abstract layer representing a vdma edge layer (buffer that can be read/written to the device over vdma.)
+ *        The buffer can be either non-continuous with attach descriptors list (SgEdgeLayer) or continuous buffer.
+ **/
+
+#ifndef _HAILO_VDMA_VDMA_EDGE_LAYER_HPP_
+#define _HAILO_VDMA_VDMA_EDGE_LAYER_HPP_
+
+#include "vdma/driver/hailort_driver.hpp"
+#include "vdma/memory/descriptor_list.hpp"
+#include "control_protocol.h"
+#include "vdma/memory/vdma_buffer.hpp"
+
+namespace hailort {
+namespace vdma {
+
+class VdmaEdgeLayer {
+public:
+
+    enum class Type {
+        SCATTER_GATHER,
+        CONTINUOUS
+    };
+
+    virtual ~VdmaEdgeLayer() = default;
+
+    VdmaEdgeLayer(const VdmaEdgeLayer &) = delete;
+    VdmaEdgeLayer(VdmaEdgeLayer &&) = default;
+    VdmaEdgeLayer& operator=(const VdmaEdgeLayer &) = delete;
+    VdmaEdgeLayer& operator=(VdmaEdgeLayer &&) = delete;
+
+    virtual Type type() const = 0;
+    virtual uint64_t dma_address() const = 0;
+    virtual uint16_t desc_page_size() const = 0;
+    virtual uint32_t descs_count() const = 0;
+
+    size_t size() const
+    {
+        return m_size;
+    }
+
+    uint32_t descriptors_in_buffer(size_t buffer_size) const
+    {
+        assert(buffer_size < std::numeric_limits<uint32_t>::max());
+        const auto page_size = desc_page_size();
+        return static_cast<uint32_t>(DIV_ROUND_UP(buffer_size, page_size));
+    }
+
+    hailo_status read(void *buf_dst, size_t count, size_t offset);
+    hailo_status write(const void *buf_src, size_t count, size_t offset);
+
+    virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
+        size_t desc_offset) = 0;
+
+    CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(uint32_t transfer_size);
+    static CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(Type type, uint64_t dma_address,
+        uint16_t desc_page_size, uint32_t total_desc_count, uint32_t transfer_size);
+protected:
+    VdmaEdgeLayer(std::shared_ptr<VdmaBuffer> &&buffer, const size_t size, const size_t offset);
+
+    std::shared_ptr<VdmaBuffer> m_buffer;
+    const size_t m_size;
+    const size_t m_offset;
+};
+
+} /* vdma */
+} /* hailort */
+
+#endif /* _HAILO_VDMA_VDMA_EDGE_LAYER_HPP_ */
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
index e8a7075f..b8bb5374 100644
--- a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
@@ -18,7 +18,7 @@
 
 #include "vdma/pcie/pcie_device.hpp"
 #include "device_common/control.hpp"
-#include "os/hailort_driver.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 #include "core_op/resource_manager/resource_manager.hpp"
 #include "vdma/vdma_config_manager.hpp"
 
diff --git a/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp
index 90eea950..b19b2e4e 100644
--- a/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp
@@ -39,7 +39,7 @@ Expected<VdmaConfigActivatedCoreOp> VdmaConfigActivatedCoreOp::create(
     VdmaConfigActivatedCoreOp object(core_op_name, network_group_params, dynamic_batch_size, input_streams, output_streams,
         std::move(resources_manager), active_core_op_holder, std::move(core_op_activated_event),
         deactivation_time_accumulator, core_op, status);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         return make_unexpected(status);
     }
     CHECK_SUCCESS_AS_EXPECTED(status);
@@ -74,7 +74,7 @@ VdmaConfigActivatedCoreOp::VdmaConfigActivatedCoreOp(
     
     // We know core_op is a VdmaConfigCoreOp
     status = core_op.activate_impl(dynamic_batch_size);
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
+    if (HAILO_STREAM_ABORT == status) {
         LOGGER__INFO("Core-op activation failed because it was aborted by user");
         return;
     }
diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
index 1e363b13..4afe078b 100644
--- a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
@@ -56,6 +56,7 @@ hailo_status VdmaConfigCoreOp::cancel_pending_transfers()
 hailo_status VdmaConfigCoreOp::activate_impl(uint16_t dynamic_batch_size)
 {
     auto status = HAILO_UNINITIALIZED;
+    auto start_time = std::chrono::steady_clock::now();
 
     if (CONTROL_PROTOCOL__IGNORE_DYNAMIC_BATCH_SIZE != dynamic_batch_size) {
         CHECK(dynamic_batch_size <= get_smallest_configured_batch_size(get_config_params()),
@@ -66,25 +67,20 @@ hailo_status VdmaConfigCoreOp::activate_impl(uint16_t dynamic_batch_size)
     status = m_resources_manager->enable_state_machine(dynamic_batch_size);
     CHECK_SUCCESS(status, "Failed to activate state-machine");
 
-    status = m_resources_manager->start_vdma_interrupts_dispatcher();
-    CHECK_SUCCESS(status, "Failed to start vdma interrupts");
+    CHECK_SUCCESS(activate_host_resources(), "Failed to activate host resources");
 
-    // Low-level streams assume that the vdma channels are enabled (happens in `enable_state_machine`), and that
-    // the interrupt dispatcher is running (so they can wait for interrupts).
-    status = activate_low_level_streams();
-    if (HAILO_STREAM_ABORTED_BY_USER == status) {
-        LOGGER__INFO("Low level streams activation failed because some were aborted by user");
-        return status;
-    }
-    CHECK_SUCCESS(status, "Failed to activate low level streams");
-
-    TRACE(SwitchCoreOpTrace, std::string(m_resources_manager->get_dev_id()), vdevice_core_op_handle());
+    //TODO: HRT-13019 - Unite with the calculation in core_op.cpp
+    const auto elapsed_time_ms = std::chrono::duration<double, std::milli>(
+        std::chrono::steady_clock::now() - start_time).count();
+    TRACE(ActivateCoreOpTrace, std::string(m_resources_manager->get_dev_id()), vdevice_core_op_handle(), elapsed_time_ms);
 
     return HAILO_SUCCESS;
 }
 
 hailo_status VdmaConfigCoreOp::deactivate_impl()
 {
+    auto start_time = std::chrono::steady_clock::now();
+
     auto status = deactivate_host_resources();
     CHECK_SUCCESS(status);
 
@@ -96,6 +92,11 @@ hailo_status VdmaConfigCoreOp::deactivate_impl()
     status = cancel_pending_transfers();
     CHECK_SUCCESS(status, "Failed to cancel pending transfers");
 
+    //TODO: HRT-13019 - Unite with the calculation in core_op.cpp
+    const auto elapsed_time_ms = std::chrono::duration<double, std::milli>(
+        std::chrono::steady_clock::now() - start_time).count();
+    TRACE(DeactivateCoreOpTrace, std::string(m_resources_manager->get_dev_id()), vdevice_core_op_handle(), elapsed_time_ms);
+
     return HAILO_SUCCESS;
 }
 
@@ -120,15 +121,19 @@ hailo_status VdmaConfigCoreOp::shutdown()
     return status;
 }
 
-hailo_status VdmaConfigCoreOp::deactivate_host_resources()
+hailo_status VdmaConfigCoreOp::activate_host_resources()
 {
-    auto status = deactivate_low_level_streams();
-    CHECK_SUCCESS(status, "Failed to deactivate low level streams");
-
-    // After disabling the vdma interrupts, we may still get some interrupts. On HRT-9430 we need to clean them.
-    status = m_resources_manager->stop_vdma_interrupts_dispatcher();
-    CHECK_SUCCESS(status, "Failed to stop vdma interrupts");
+    CHECK_SUCCESS(m_resources_manager->start_vdma_transfer_launcher(), "Failed to start vdma transfer launcher");
+    CHECK_SUCCESS(m_resources_manager->start_vdma_interrupts_dispatcher(), "Failed to start vdma interrupts");
+    CHECK_SUCCESS(activate_low_level_streams(), "Failed to activate low level streams");
+    return HAILO_SUCCESS;
+}
 
+hailo_status VdmaConfigCoreOp::deactivate_host_resources()
+{
+    CHECK_SUCCESS(deactivate_low_level_streams(), "Failed to deactivate low level streams");
+    CHECK_SUCCESS(m_resources_manager->stop_vdma_interrupts_dispatcher(), "Failed to stop vdma interrupts");
+    CHECK_SUCCESS(m_resources_manager->stop_vdma_transfer_launcher(), "Failed to stop vdma transfers pending launch");
     return HAILO_SUCCESS;
 }
 
diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
index f923e091..42f60f99 100644
--- a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
+++ b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
@@ -49,8 +49,13 @@ class VdmaConfigCoreOp : public CoreOp
     // Will first deactivate host resources (via deactivate_host_resources) and then reset the core-op on the fw
     virtual hailo_status deactivate_impl() override;
     virtual hailo_status shutdown() override;
+
+    // Activate all resources related to the core-op on the host.
+    hailo_status activate_host_resources();
+
     // Deactivate all resources related to the core-op on the host, but without resetting the core-op on the fw
     hailo_status deactivate_host_resources();
+
     hailo_status cancel_pending_transfers();
 
     virtual Expected<hailo_stream_interface_t> get_default_streams_interface() override;
diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.cpp b/hailort/libhailort/src/vdma/vdma_config_manager.cpp
index 73c650f4..753d34f1 100644
--- a/hailort/libhailort/src/vdma/vdma_config_manager.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_manager.cpp
@@ -8,58 +8,94 @@
  **/
 
 #include "vdma_config_manager.hpp"
-#include "hailo/hailort.h"
+#include "utils/profiler/tracer_macros.hpp"
 
 namespace hailort
 {
 
-hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op,
-    std::shared_ptr<VdmaConfigCoreOp> next_core_op, const uint16_t batch_size, const bool is_batch_switch)
+
+hailo_status VdmaConfigManager::set_core_op(const std::string &device_id, std::shared_ptr<VdmaConfigCoreOp> current,
+    std::shared_ptr<VdmaConfigCoreOp> next, const uint16_t batch_size)
 {
-    CHECK((nullptr != current_active_core_op) || (nullptr != next_core_op), HAILO_INVALID_ARGUMENT);
-
-    if (nullptr == current_active_core_op) {
-        // Activate first core-op
-        return next_core_op->activate_impl(batch_size);
-    } else if (nullptr == next_core_op) {
-        // Deactivate last core-op
-        return current_active_core_op->deactivate_impl();
-    } else if (is_batch_switch) {
-        auto status = current_active_core_op->get_resources_manager()->enable_state_machine(batch_size);
-        CHECK_SUCCESS(status, "Failed to activate state-machine");
+    CHECK((nullptr != current) || (nullptr != next), HAILO_INVALID_ARGUMENT);
+
+    const auto start_time = std::chrono::steady_clock::now();
+
+    const bool is_batch_switch = (current == next) && current->get_resources_manager()->get_can_fast_batch_switch();
+    if (is_batch_switch) {
+        CHECK_SUCCESS(fast_batch_switch(current, batch_size), "Failed to fast batch switch");
     } else {
-        // We're switching from current_active_core_op to next_core_op.
-        // Deactivate the current core-op on the host, meaning the fw state machine won't be reset.
-        // This will be handled by activating the next core-op.
-        auto status = current_active_core_op->deactivate_host_resources();
-        CHECK_SUCCESS(status, "Failed deactivating current core-op");
+        CHECK_SUCCESS(switch_core_op(current, next, batch_size), "Failed to switch core-op");
+    }
+
+    const auto core_op_handle = next ? next->vdevice_core_op_handle() : INVALID_CORE_OP_HANDLE;
+    const auto elapsed_time_ms = std::chrono::duration<double, std::milli>(
+        std::chrono::steady_clock::now() - start_time).count();
+    TRACE(SwitchCoreOpTrace, device_id, core_op_handle, elapsed_time_ms);
+
+    return HAILO_SUCCESS;
+}
+
+
+hailo_status VdmaConfigManager::deactivate_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op)
+{
+    static const uint16_t DEACTIVATE_BATCH_SIZE = 0;
+    const std::shared_ptr<VdmaConfigCoreOp> DEACTIVATE_NEXT_CORE_OP = nullptr;
+    return switch_core_op(current_active_core_op, DEACTIVATE_NEXT_CORE_OP, DEACTIVATE_BATCH_SIZE);
+}
+
+hailo_status VdmaConfigManager::set_state_machine(std::shared_ptr<VdmaConfigCoreOp> current,
+    std::shared_ptr<VdmaConfigCoreOp> next, uint16_t batch_size)
+{
+    // TODO: HRT-13253 don't use resources manager instead call m_vdma_device directly. The device should store the 
+    // current active core op.
+    if (next != nullptr) {
+        CHECK_SUCCESS(next->get_resources_manager()->enable_state_machine(batch_size), "Failed to enable state machine");
+        // In the case of switch NG, we call FW switch to next NG without marking the current NG as deactivated.
+        // Added setter to mark the current NG as deactivated.
+        if ((current != nullptr) && (current != next)) {
+            current->get_resources_manager()->set_is_activated(false);
+        }
+    } else {
+        assert(current != nullptr);
+        CHECK_SUCCESS(current->get_resources_manager()->reset_state_machine(), "Failed to disable state machine");
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status VdmaConfigManager::switch_core_op(std::shared_ptr<VdmaConfigCoreOp> current,
+    std::shared_ptr<VdmaConfigCoreOp> next, const uint16_t batch_size)
+{
+    assert((nullptr != current) || (nullptr != next));
+
+    if (current != nullptr) {
+        CHECK_SUCCESS(current->deactivate_host_resources(), "Failed deactivating host resources for current core-op");
 
         // TODO: In mercury we need to reset after deactivate. This will be fixed in MSW-762 and the "if" will be removed
         //       when we make the nn_manager responsible to reset the nn-core.
-        if (Device::Type::INTEGRATED == current_active_core_op->get_resources_manager()->get_device().get_type()) {
-            status = current_active_core_op->get_resources_manager()->reset_state_machine();
-            CHECK_SUCCESS(status, "Failed to reset state machine in switch core-op");
+        if (Device::Type::INTEGRATED == current->get_resources_manager()->get_device().get_type()) {
+            CHECK_SUCCESS(current->get_resources_manager()->reset_state_machine(), "Failed to reset state machine in switch core-op");
         }
+    }
 
-        // Switch from the current core-op to the next core-op. I.e. current core-op will be deactivated and
-        // next core-op will be activated
-        status = next_core_op->activate_impl(batch_size);
-        CHECK_SUCCESS(status, "Failed activating next core-op");
+    CHECK_SUCCESS(set_state_machine(current, next, batch_size), "Failed to set state machine");
+
+    // Activate next core op resources
+    if (next != nullptr) {
+        CHECK_SUCCESS(next->activate_host_resources(), "Failed activating host resources for next core-op");
+    }
 
-        // Current core-op is now deactivated (we are not on batch switch), so we can cancel pending transfers.
-        status = current_active_core_op->cancel_pending_transfers();
-        CHECK_SUCCESS(status, "Failed canceling pending transfers from previous core-op");
+    if (current != nullptr) {
+        CHECK_SUCCESS(current->cancel_pending_transfers(), "Failed canceling pending transfers from previous core-op");
     }
 
     return HAILO_SUCCESS;
 }
 
-hailo_status VdmaConfigManager::deactivate_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op)
+hailo_status VdmaConfigManager::fast_batch_switch(std::shared_ptr<VdmaConfigCoreOp> current, const uint16_t batch_size)
 {
-    static const uint16_t DEACTIVATE_BATCH_SIZE = 0;
-    const std::shared_ptr<VdmaConfigCoreOp> DEACTIVATE_NEXT_CORE_OP = nullptr;
-    static const bool IS_NOT_BATCH_SWITCH = false;
-    return switch_core_op(current_active_core_op, DEACTIVATE_NEXT_CORE_OP, DEACTIVATE_BATCH_SIZE, IS_NOT_BATCH_SWITCH);
+    assert(nullptr != current);
+    return set_state_machine(current, current, batch_size);
 }
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.hpp b/hailort/libhailort/src/vdma/vdma_config_manager.hpp
index fc13c368..6045f6ef 100644
--- a/hailort/libhailort/src/vdma/vdma_config_manager.hpp
+++ b/hailort/libhailort/src/vdma/vdma_config_manager.hpp
@@ -26,10 +26,17 @@ class VdmaConfigManager final
 public:
     VdmaConfigManager() = delete;
 
-    static hailo_status switch_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op,
-        std::shared_ptr<VdmaConfigCoreOp> next_core_op, const uint16_t batch_size, const bool is_batch_switch);
-
+    static hailo_status set_core_op(const std::string &device_id, std::shared_ptr<VdmaConfigCoreOp> current,
+        std::shared_ptr<VdmaConfigCoreOp> next, uint16_t batch_size);
     static hailo_status deactivate_core_op(std::shared_ptr<VdmaConfigCoreOp> current_active_core_op);
+
+private:
+    static hailo_status set_state_machine(std::shared_ptr<VdmaConfigCoreOp> current,
+        std::shared_ptr<VdmaConfigCoreOp> next, uint16_t batch_size);
+
+    static hailo_status switch_core_op(std::shared_ptr<VdmaConfigCoreOp> current,
+        std::shared_ptr<VdmaConfigCoreOp> next, uint16_t batch_size);
+    static hailo_status fast_batch_switch(std::shared_ptr<VdmaConfigCoreOp> current, uint16_t batch_size);
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/vdma_device.cpp b/hailort/libhailort/src/vdma/vdma_device.cpp
index c06c3b07..026a3e9a 100644
--- a/hailort/libhailort/src/vdma/vdma_device.cpp
+++ b/hailort/libhailort/src/vdma/vdma_device.cpp
@@ -11,13 +11,16 @@
 
 #include "vdma/vdma_device.hpp"
 #include "vdma/memory/descriptor_list.hpp"
-#include "vdma/memory/mapping_manager.hpp"
 #include "vdma/vdma_config_manager.hpp"
 #include "vdma/pcie/pcie_device.hpp"
 #include "vdma/integrated/integrated_device.hpp"
 #include "device_common/control.hpp"
+#include "device_common/device_internal.hpp"
 #include "core_op/resource_manager/resource_manager_builder.hpp"
 #include "core_op/core_op.hpp"
+#include "common/os_utils.hpp"
+#include "utils/buffer_storage.hpp"
+#include "hef/hef_internal.hpp"
 
 #include <new>
 #include <algorithm>
@@ -35,7 +38,6 @@ static constexpr std::chrono::milliseconds DEFAULT_TIMEOUT(50000);
 VdmaDevice::VdmaDevice(std::unique_ptr<HailoRTDriver> &&driver, Device::Type type) :
     DeviceBase::DeviceBase(type),
     m_driver(std::move(driver)),
-    m_mapping_manager(*m_driver),
     m_is_configured(false)
 {
     activate_notifications(get_dev_id());
@@ -144,9 +146,10 @@ Expected<ConfiguredNetworkGroupVector> VdmaDevice::add_hef(Hef &hef, const Netwo
         CHECK_SUCCESS_AS_EXPECTED(status);
 
         assert(nullptr == m_vdma_interrupts_dispatcher);
-        auto interrupts_dispatcher = vdma::InterruptsDispatcher::create(std::ref(*m_driver));
-        CHECK_EXPECTED(interrupts_dispatcher);
-        m_vdma_interrupts_dispatcher = interrupts_dispatcher.release();
+        TRY(m_vdma_interrupts_dispatcher, vdma::InterruptsDispatcher::create(std::ref(*m_driver)));
+
+        assert(nullptr == m_vdma_transfer_launcher);
+        TRY(m_vdma_transfer_launcher, vdma::TransferLauncher::create());
 
         m_is_configured = true;
     }
@@ -173,7 +176,8 @@ Expected<std::shared_ptr<ConfiguredNetworkGroup>> VdmaDevice::create_configured_
 
     /* build HEF supported features */
     auto resource_manager = ResourcesManagerBuilder::build(current_core_op_index,
-        *this, get_driver(), config_params, core_op_metadata, hef.pimpl->get_device_arch());
+        *this, get_driver(), config_params, core_op_metadata, static_cast<HEFHwArch>(hef.pimpl->get_device_arch()),
+        hef.pimpl->get_shef_file_handle());
     CHECK_EXPECTED(resource_manager);
 
 
@@ -194,7 +198,6 @@ Expected<std::shared_ptr<ConfiguredNetworkGroup>> VdmaDevice::create_configured_
     core_ops.emplace_back(core_op_ptr);
     m_core_ops.emplace_back(core_op_ptr);
 
-    // TODO: HRT-8875
     auto metadata = hef.pimpl->network_group_metadata(core_op_metadata->core_op_name());
     auto network_group_expected = ConfiguredNetworkGroupBase::create(config_params, std::move(core_ops), std::move(metadata));
     CHECK_EXPECTED(network_group_expected);
@@ -225,6 +228,17 @@ hailo_reset_device_mode_t VdmaDevice::get_default_reset_mode()
     return HAILO_RESET_DEVICE_MODE_SOFT;
 }
 
+// TODO - HRT-13234, move to DeviceBase
+void VdmaDevice::shutdown_core_ops()
+{
+    for (auto core_op : m_core_ops) {
+        auto status = core_op->shutdown();
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed to shutdown core op with status {}", status);
+        }
+    }
+}
+
 hailo_status VdmaDevice::mark_as_used()
 {
     return m_driver->mark_as_used();
@@ -236,6 +250,12 @@ ExpectedRef<vdma::InterruptsDispatcher> VdmaDevice::get_vdma_interrupts_dispatch
     return std::ref(*m_vdma_interrupts_dispatcher);
 }
 
+ExpectedRef<vdma::TransferLauncher> VdmaDevice::get_vdma_transfer_launcher()
+{
+    CHECK_AS_EXPECTED(m_vdma_transfer_launcher, HAILO_INTERNAL_FAILURE, "vDMA transfer launcher wasn't created");
+    return std::ref(*m_vdma_transfer_launcher);
+}
+
 VdmaDevice::~VdmaDevice()
 {
     auto status = stop_notification_fetch_thread();
@@ -250,20 +270,50 @@ VdmaDevice::~VdmaDevice()
     }
 }
 
-hailo_status VdmaDevice::dma_map(void *address, size_t size, hailo_stream_direction_t direction)
+static std::pair<void *, size_t> aligned_part_to_map(void *original, size_t size)
 {
-    return m_mapping_manager.map_buffer(address, size, direction);
+    const auto dma_alignment = OsUtils::get_dma_able_alignment();
+    const auto aligned_address = HailoRTCommon::align_to(original, dma_alignment);
+    const auto unaligned_part = reinterpret_cast<uintptr_t>(aligned_address) - reinterpret_cast<uintptr_t>(original);
+    const auto aligned_size = size > unaligned_part ? size - unaligned_part : 0;
+    return std::make_pair(aligned_address, aligned_size);
 }
 
-hailo_status VdmaDevice::dma_unmap(void *address, hailo_stream_direction_t direction)
+hailo_status VdmaDevice::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t data_direction)
 {
-    return m_mapping_manager.unmap_buffer(address, direction);
+    // Since we can't map unaligned addresses (to dma alignment), we map only the aligned part of the buffer. The other
+    // unaligned part will be copied into some bounce buffer (which is already mapped).
+    std::tie(address, size) = aligned_part_to_map(address, size);
+
+    if (size == 0) {
+        // The aligned part is not in range (Can happen when the buffer is smaller than the dma alignment), nothing to
+        // map.
+        return HAILO_SUCCESS;
+    }
+
+    // Find buffer_identifier if registered to BufferStorageResourceManager.
+    auto buffer_identifier = HailoRTDriver::INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER;
+    if (auto storage = BufferStorageResourceManager::get_resource(std::make_pair(address, size))) {
+        TRY(const auto buffer, storage->get()->get_dma_able_buffer());
+        buffer_identifier = buffer->buffer_identifier();
+    }
+
+    CHECK_EXPECTED(m_driver->vdma_buffer_map(address, size, to_hailo_driver_direction(data_direction), buffer_identifier));
+    return HAILO_SUCCESS;
 }
 
-Expected<std::pair<vdma::MappedBufferPtr, bool>> VdmaDevice::try_dma_map(vdma::DmaAbleBufferPtr buffer,
-    hailo_stream_direction_t direction)
+hailo_status VdmaDevice::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t data_direction)
 {
-    return m_mapping_manager.try_dma_map(buffer, direction);
+    // Since we can't map unaligned addresses (to dma alignment), we map only the aligned part of the buffer. The other
+    // unaligned part will be copied into some bounce buffer (which is already mapped).
+    std::tie(address, size) = aligned_part_to_map(address, size);
+    if (size == 0) {
+        // The aligned part is not in range (Can happen when the buffer is smaller than the dma alignment), nothing to
+        // map.
+        return HAILO_SUCCESS;
+    }
+
+    return m_driver->vdma_buffer_unmap(address, size, to_hailo_driver_direction(data_direction));
 }
 
 Expected<ConfiguredNetworkGroupVector> VdmaDevice::create_networks_group_vector(Hef &hef, const NetworkGroupsParamsMap &configure_params)
diff --git a/hailort/libhailort/src/vdma/vdma_device.hpp b/hailort/libhailort/src/vdma/vdma_device.hpp
index 30dc64ee..105e6d84 100644
--- a/hailort/libhailort/src/vdma/vdma_device.hpp
+++ b/hailort/libhailort/src/vdma/vdma_device.hpp
@@ -17,8 +17,8 @@
 #include "device_common/device_internal.hpp"
 #include "network_group/network_group_internal.hpp"
 #include "vdma/channel/interrupts_dispatcher.hpp"
-#include "vdma/memory/mapping_manager.hpp"
-#include "os/hailort_driver.hpp"
+#include "vdma/channel/transfer_launcher.hpp"
+#include "vdma/driver/hailort_driver.hpp"
 
 
 namespace hailort
@@ -32,6 +32,7 @@ class VdmaDevice : public DeviceBase {
 
     virtual hailo_status wait_for_wakeup() override;
     virtual void increment_control_sequence() override;
+    virtual void shutdown_core_ops() override;
     virtual hailo_reset_device_mode_t get_default_reset_mode() override;
     hailo_status mark_as_used();
     virtual Expected<size_t> read_log(MemoryView &buffer, hailo_cpu_id_t cpu_id) override;
@@ -48,11 +49,10 @@ class VdmaDevice : public DeviceBase {
     };
 
     ExpectedRef<vdma::InterruptsDispatcher> get_vdma_interrupts_dispatcher();
+    ExpectedRef<vdma::TransferLauncher> get_vdma_transfer_launcher();
 
-    virtual hailo_status dma_map(void *address, size_t size, hailo_stream_direction_t direction) override;
-    virtual hailo_status dma_unmap(void *address, hailo_stream_direction_t direction) override;
-    virtual Expected<std::pair<vdma::MappedBufferPtr, bool>> try_dma_map(vdma::DmaAbleBufferPtr buffer,
-        hailo_stream_direction_t direction) override;
+    virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
+    virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
 
 protected:
     VdmaDevice(std::unique_ptr<HailoRTDriver> &&driver, Type type);
@@ -63,16 +63,15 @@ class VdmaDevice : public DeviceBase {
         uint8_t *response_buffer, size_t *response_size, hailo_cpu_id_t cpu_id) override;
     virtual Expected<ConfiguredNetworkGroupVector> add_hef(Hef &hef, const NetworkGroupsParamsMap &configure_params) override;
 
-    // Initialization dependency: MappingManager holds dma mappings for all buffers relative to this device!
-    // (CoreOp for example holds streams with mapped buffers)
     std::unique_ptr<HailoRTDriver> m_driver;
-    vdma::MappingManager m_mapping_manager;
+    // TODO - HRT-13234, move to DeviceBase
     std::vector<std::shared_ptr<CoreOp>> m_core_ops;
     std::vector<std::shared_ptr<ConfiguredNetworkGroup>> m_network_groups; // TODO: HRT-9547 - Remove when ConfiguredNetworkGroup will be kept in global context
 
     // The vdma interrupts dispatcher contains a callback with a reference to the current activated network group
-    // (reference to the ResourcesManager). Hence, it must be destructed before the networks groups are destructed.
+    // (reference to the ResourcesManager). Hence, it must be destroyed before the networks groups are destroyed.
     std::unique_ptr<vdma::InterruptsDispatcher> m_vdma_interrupts_dispatcher;
+    std::unique_ptr<vdma::TransferLauncher> m_vdma_transfer_launcher;
 
     ActiveCoreOpHolder m_active_core_op_holder;
     bool m_is_configured;
diff --git a/hailort/libhailort/src/vdma/vdma_stream.cpp b/hailort/libhailort/src/vdma/vdma_stream.cpp
index 6379ead3..8c324ad8 100644
--- a/hailort/libhailort/src/vdma/vdma_stream.cpp
+++ b/hailort/libhailort/src/vdma/vdma_stream.cpp
@@ -11,6 +11,7 @@
 #include "vdma/vdma_stream.hpp"
 #include "vdma/circular_stream_buffer_pool.hpp"
 #include "utils/profiler/tracer_macros.hpp"
+#include "utils/buffer_storage.hpp"
 #include "common/os_utils.hpp"
 
 
@@ -24,44 +25,53 @@ Expected<std::shared_ptr<VdmaInputStream>> VdmaInputStream::create(hailo_stream_
 {
     assert((interface == HAILO_STREAM_INTERFACE_PCIE) || (interface == HAILO_STREAM_INTERFACE_INTEGRATED));
 
+    TRY(auto bounce_buffers_pool, init_dma_bounce_buffer_pool(device, channel, edge_layer));
+
     hailo_status status = HAILO_UNINITIALIZED;
     auto result = make_shared_nothrow<VdmaInputStream>(device, channel, edge_layer,
-        core_op_activated_event, interface, status);
+        core_op_activated_event, interface, std::move(bounce_buffers_pool), status);
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
     return result;
 }
 
-std::unique_ptr<StreamBufferPool> VdmaInputStream::init_dma_bounce_buffer_pool(
-    vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer, hailo_status &status)
+Expected<BounceBufferQueuePtr> VdmaInputStream::init_dma_bounce_buffer_pool(
+    VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer)
 {
     const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
     const auto dma_bounce_buffer_pool_size = channel->get_max_ongoing_transfers(
         LayerInfoUtils::get_layer_transfer_size(edge_layer));
 
-    // Checking status for base class c'tor
-    if (HAILO_SUCCESS != status) {
-        return nullptr;
-    }
+    const auto bounce_buffer_size = std::min(
+        static_cast<uint32_t>(dma_able_alignment), LayerInfoUtils::get_layer_transfer_size(edge_layer));
 
-    // Initialize dma buffer pool for support for non-aligned user buffers
-    auto dma_queued_pool = QueuedStreamBufferPool::create(dma_bounce_buffer_pool_size, dma_able_alignment,
-        BufferStorageParams::create_dma());
-    if (dma_queued_pool.status() != HAILO_SUCCESS) {
-        LOGGER__ERROR("Failed creating DMA bounce buffer pool with status {}", dma_queued_pool.status());
-        status = dma_queued_pool.status();
-        return nullptr;
-    }
+    auto bounce_buffers_pool = make_unique_nothrow<BounceBufferQueue>(dma_bounce_buffer_pool_size);
+    CHECK_NOT_NULL(bounce_buffers_pool, HAILO_OUT_OF_HOST_MEMORY);
+
+    for (size_t i = 0; i < dma_bounce_buffer_pool_size; i++) {
+        TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_by_allocation(bounce_buffer_size, device.get_driver()));
+
+        auto dma_storage = make_shared_nothrow<DmaStorage>(std::move(dma_able_buffer));
+        CHECK_NOT_NULL(dma_storage, HAILO_OUT_OF_HOST_MEMORY);
 
-    return std::unique_ptr<StreamBufferPool>(dma_queued_pool.release());
+        TRY(auto buffer, Buffer::create(std::move(dma_storage)));
+        TRY(auto mapping, DmaMappedBuffer::create(device, buffer.data(), buffer.size(), HAILO_DMA_BUFFER_DIRECTION_H2D));
+
+        auto bounce_buffer = make_shared_nothrow<BounceBuffer>(BounceBuffer{std::move(buffer), std::move(mapping)});
+        CHECK_NOT_NULL(bounce_buffer, HAILO_OUT_OF_HOST_MEMORY);
+
+        CHECK_SUCCESS(bounce_buffers_pool->enqueue(std::move(bounce_buffer)));
+    }
+    return bounce_buffers_pool;
 }
 
 VdmaInputStream::VdmaInputStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel,
                                  const LayerInfo &edge_layer, EventPtr core_op_activated_event,
-                                 hailo_stream_interface_t stream_interface, hailo_status &status) :
+                                 hailo_stream_interface_t stream_interface, BounceBufferQueuePtr &&bounce_buffers_pool,
+                                 hailo_status &status) :
     AsyncInputStreamBase(edge_layer, std::move(core_op_activated_event), status),
     m_device(device),
-    m_dma_bounce_buffer_pool(init_dma_bounce_buffer_pool(channel, edge_layer, status)),
+    m_bounce_buffers_pool(std::move(bounce_buffers_pool)),
     m_channel(std::move(channel)),
     m_interface(stream_interface),
     m_core_op_handle(INVALID_CORE_OP_HANDLE)
@@ -96,11 +106,16 @@ void VdmaInputStream::set_vdevice_core_op_handle(vdevice_core_op_handle_t core_o
 
 Expected<std::unique_ptr<StreamBufferPool>> VdmaInputStream::allocate_buffer_pool()
 {
-    auto circular_pool = CircularStreamBufferPool::create(m_device, HailoRTDriver::DmaDirection::H2D,
-        m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), get_frame_size());
-    CHECK_EXPECTED(circular_pool);
+    TRY(auto circular_pool, CircularStreamBufferPool::create(m_device, HAILO_DMA_BUFFER_DIRECTION_H2D,
+        m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), get_frame_size()));
+
+    // Bind the buffer to the channel to avoid the need to do it on every transfer.
+    TRY(auto pool_dma_able_buffer, circular_pool->get_base_buffer().storage().get_dma_able_buffer());
+    TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared(pool_dma_able_buffer, m_device.get_driver(),
+        HailoRTDriver::DmaDirection::H2D));
+    CHECK_SUCCESS(m_channel->bind_buffer(mapped_buffer));
 
-    return std::unique_ptr<StreamBufferPool>(circular_pool.release());
+    return std::unique_ptr<StreamBufferPool>(std::move(circular_pool));
 }
 
 size_t VdmaInputStream::get_max_ongoing_transfers() const
@@ -112,17 +127,10 @@ Expected<TransferRequest> VdmaInputStream::align_transfer_request(TransferReques
 {
     const auto dma_alignment = OsUtils::get_dma_able_alignment();
     std::vector<TransferBuffer> transfer_buffers;
-    TransferBuffer dma_able_bounce_buffer;
-    const auto buffer_address = transfer_request.transfer_buffers[0].base_buffer()->data();
+    const auto buffer_address = transfer_request.transfer_buffers[0].base_buffer().data();
     const auto buffer_size = transfer_request.transfer_buffers[0].size();
 
-    {
-        std::unique_lock<std::mutex> lock(m_dma_pool_mutex);
-        // Initialize dma able bounce buffer the size of alignment size to read pre alignment data
-        auto dma_able_bounce_buffer_exp = m_dma_bounce_buffer_pool->dequeue();
-        CHECK_EXPECTED(dma_able_bounce_buffer_exp);
-        dma_able_bounce_buffer = dma_able_bounce_buffer_exp.release();
-    }
+    TRY(const auto dma_able_bounce_buffer, m_bounce_buffers_pool->dequeue());
 
     // If buffer size is larger than alignment size - will create bounce buffer for non aligned buffer part and then use
     // User's buffer from aligned address - otherwise will create bounce buffer size of user buffer and copy whole frame
@@ -135,25 +143,20 @@ Expected<TransferRequest> VdmaInputStream::align_transfer_request(TransferReques
         const auto user_buffer_size = buffer_size - bounce_buffer_exact_size;
 
         // Create another transfer buffer with same base address but exact size for actual transfer
-        auto dma_able_exact_bounce_buffer = TransferBuffer(dma_able_bounce_buffer.base_buffer(), bounce_buffer_exact_size, 0);
-        memcpy((dma_able_exact_bounce_buffer.base_buffer())->data(), buffer_address, bounce_buffer_exact_size);
+        auto dma_able_exact_bounce_buffer = TransferBuffer(MemoryView(dma_able_bounce_buffer->buffer_storage),
+            bounce_buffer_exact_size, 0);
+        dma_able_exact_bounce_buffer.copy_from(MemoryView(buffer_address, bounce_buffer_exact_size));
         transfer_buffers.emplace_back(dma_able_exact_bounce_buffer);
-
-        auto dma_able_user_buffer = DmaStorage::create_dma_able_buffer_from_user_size(
-            reinterpret_cast<uint8_t*>(aligned_user_buffer_addr), user_buffer_size);
-        CHECK_EXPECTED(dma_able_user_buffer);
-        transfer_buffers.emplace_back(dma_able_user_buffer.release());
+        transfer_buffers.emplace_back(MemoryView(reinterpret_cast<uint8_t*>(aligned_user_buffer_addr), user_buffer_size));
     } else {
-        auto dma_able_exact_bounce_buffer = TransferBuffer(dma_able_bounce_buffer.base_buffer(), buffer_size, 0);
-        memcpy((dma_able_exact_bounce_buffer.base_buffer())->data(), buffer_address, buffer_size);
+        auto dma_able_exact_bounce_buffer = TransferBuffer(MemoryView(dma_able_bounce_buffer->buffer_storage), buffer_size, 0);
+        dma_able_exact_bounce_buffer.copy_from(MemoryView(buffer_address, buffer_size));
         transfer_buffers.emplace_back(dma_able_exact_bounce_buffer);
     }
 
-    auto wrapped_callback = [user_callback=transfer_request.callback, dma_able_bounce_buffer, this](hailo_status callback_status) {
-        {
-            std::unique_lock<std::mutex> lock(m_dma_pool_mutex);
-            m_dma_bounce_buffer_pool->enqueue(TransferBuffer{dma_able_bounce_buffer});
-        }
+    auto wrapped_callback = [user_callback=transfer_request.callback,
+                             dma_able_bounce_buffer=std::move(dma_able_bounce_buffer), this](hailo_status callback_status) mutable {
+        m_bounce_buffers_pool->enqueue(std::move(dma_able_bounce_buffer));
         user_callback(callback_status);
     };
 
@@ -163,18 +166,15 @@ Expected<TransferRequest> VdmaInputStream::align_transfer_request(TransferReques
 hailo_status VdmaInputStream::write_async_impl(TransferRequest &&transfer_request)
 {
     TRACE(FrameDequeueH2DTrace, m_device.get_dev_id(), m_core_op_handle, name());
-    const auto user_owns_buffer = (buffer_mode() == StreamBufferMode::NOT_OWNING);
 
     const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
-    if (reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer()->data()) % dma_able_alignment == 0) {
-        return m_channel->launch_transfer(std::move(transfer_request), user_owns_buffer);
+    if (reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer().data()) % dma_able_alignment == 0) {
+        return m_channel->launch_transfer(std::move(transfer_request));
     } else {
         auto unaligned_transfer_request = align_transfer_request(std::move(transfer_request));
         CHECK_EXPECTED_AS_STATUS(unaligned_transfer_request);
-        return m_channel->launch_transfer(unaligned_transfer_request.release(), user_owns_buffer);
+        return m_channel->launch_transfer(unaligned_transfer_request.release());
     }
-
-    return HAILO_INTERNAL_FAILURE;
 }
 
 hailo_status VdmaInputStream::activate_stream_impl()
@@ -246,11 +246,16 @@ hailo_stream_interface_t VdmaOutputStream::get_interface() const
 
 Expected<std::unique_ptr<StreamBufferPool>> VdmaOutputStream::allocate_buffer_pool()
 {
-    auto circular_pool = CircularStreamBufferPool::create(m_device, HailoRTDriver::DmaDirection::D2H,
-        m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), m_transfer_size);
-    CHECK_EXPECTED(circular_pool);
+    TRY(auto circular_pool, CircularStreamBufferPool::create(m_device, HAILO_DMA_BUFFER_DIRECTION_D2H,
+        m_channel->get_desc_list()->desc_page_size(), m_channel->get_desc_list()->count(), m_transfer_size));
+
+    // Bind the buffer to the channel to avoid the need to do it on every transfer.
+    TRY(auto pool_dma_able_buffer, circular_pool->get_base_buffer().storage().get_dma_able_buffer());
+    TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared(pool_dma_able_buffer, m_device.get_driver(),
+        HailoRTDriver::DmaDirection::D2H));
+    CHECK_SUCCESS(m_channel->bind_buffer(mapped_buffer));
 
-    return std::unique_ptr<StreamBufferPool>(circular_pool.release());
+    return std::unique_ptr<StreamBufferPool>(std::move(circular_pool));
 }
 
 size_t VdmaOutputStream::get_max_ongoing_transfers() const
@@ -260,18 +265,18 @@ size_t VdmaOutputStream::get_max_ongoing_transfers() const
 
 Expected<TransferRequest> VdmaOutputStream::align_transfer_request(TransferRequest &&transfer_request)
 {
-    auto aligned_bounce_buffer_exp = DmaStorage::create_dma_able_buffer_from_user_size(nullptr,
-        transfer_request.transfer_buffers[0].size());
-    CHECK_EXPECTED(aligned_bounce_buffer_exp);
-    auto aligned_bounce_buffer = aligned_bounce_buffer_exp.release();
+    // Allocate a bounce buffer and store it inside the lambda to keep it alive until not needed.
+    auto bounce_buffer_exp = Buffer::create_shared(transfer_request.transfer_buffers[0].size(), BufferStorageParams::create_dma());
+    CHECK_EXPECTED(bounce_buffer_exp);
+    auto bounce_buffer = bounce_buffer_exp.release();
 
     auto wrapped_callback = [unaligned_user_buffer = transfer_request.transfer_buffers[0].base_buffer(),
-            aligned_bounce_buffer, user_callback = transfer_request.callback](hailo_status callback_status) {
-        memcpy(const_cast<uint8_t*>(unaligned_user_buffer->data()), aligned_bounce_buffer->data(), unaligned_user_buffer->size());
+            bounce_buffer=bounce_buffer, user_callback=transfer_request.callback](hailo_status callback_status) {
+        memcpy(const_cast<uint8_t*>(unaligned_user_buffer.data()), bounce_buffer->data(), unaligned_user_buffer.size());
         user_callback(callback_status);
     };
 
-    return TransferRequest(std::move(aligned_bounce_buffer), wrapped_callback);
+    return TransferRequest(MemoryView(bounce_buffer->data(), bounce_buffer->size()), wrapped_callback);
 }
 
 hailo_status VdmaOutputStream::read_async_impl(TransferRequest &&transfer_request)
@@ -285,18 +290,17 @@ hailo_status VdmaOutputStream::read_async_impl(TransferRequest &&transfer_reques
             original_callback(status);
         };
     }
-    const auto user_owns_buffer = (buffer_mode() == StreamBufferMode::NOT_OWNING);
     const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
-    if (reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer()->data()) % dma_able_alignment == 0) {
-        return m_channel->launch_transfer(std::move(transfer_request), user_owns_buffer);
+    if (reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer().data()) % dma_able_alignment == 0) {
+        return m_channel->launch_transfer(std::move(transfer_request));
     } else {
         // In case of read unaligned - currently doesnt support using users buffer - so allocate complete new buffer size of user's buffer
         LOGGER__WARNING("read_async() was provided an unaligned buffer (address=0x{:x}), which causes performance degradation. Use buffers algined to {} bytes for optimal performance",
-            reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer()->data()), dma_able_alignment);
+            reinterpret_cast<size_t>(transfer_request.transfer_buffers[0].base_buffer().data()), dma_able_alignment);
 
         auto realigned_transfer_request = align_transfer_request(std::move(transfer_request));
         CHECK_EXPECTED_AS_STATUS(realigned_transfer_request);
-        return m_channel->launch_transfer(realigned_transfer_request.release(), user_owns_buffer);
+        return m_channel->launch_transfer(realigned_transfer_request.release());
     }
 }
 
diff --git a/hailort/libhailort/src/vdma/vdma_stream.hpp b/hailort/libhailort/src/vdma/vdma_stream.hpp
index c2203a45..b3962120 100644
--- a/hailort/libhailort/src/vdma/vdma_stream.hpp
+++ b/hailort/libhailort/src/vdma/vdma_stream.hpp
@@ -20,6 +20,15 @@
 namespace hailort
 {
 
+struct BounceBuffer {
+    Buffer buffer_storage;
+    DmaMappedBuffer mapping;
+};
+using BounceBufferPtr = std::shared_ptr<BounceBuffer>;
+
+using BounceBufferQueue = SafeQueue<BounceBufferPtr>;
+using BounceBufferQueuePtr = std::unique_ptr<BounceBufferQueue>;
+
 class VdmaInputStream : public AsyncInputStreamBase {
 public:
 
@@ -28,7 +37,8 @@ class VdmaInputStream : public AsyncInputStreamBase {
         EventPtr core_op_activated_event);
 
     VdmaInputStream(VdmaDevice &device, vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer,
-                    EventPtr core_op_activated_event, hailo_stream_interface_t stream_interface, hailo_status &status);
+                    EventPtr core_op_activated_event, hailo_stream_interface_t stream_interface,
+                    BounceBufferQueuePtr &&bounce_buffers_pool, hailo_status &status);
     virtual ~VdmaInputStream();
 
     virtual hailo_stream_interface_t get_interface() const override;
@@ -42,16 +52,12 @@ class VdmaInputStream : public AsyncInputStreamBase {
     virtual hailo_status activate_stream_impl() override;
     virtual hailo_status deactivate_stream_impl() override;
 
-    static std::unique_ptr<StreamBufferPool> init_dma_bounce_buffer_pool(vdma::BoundaryChannelPtr channel,
-        const LayerInfo &edge_layer, hailo_status &status);
+    static Expected<BounceBufferQueuePtr> init_dma_bounce_buffer_pool(VdmaDevice &device,
+        vdma::BoundaryChannelPtr channel, const LayerInfo &edge_layer);
     Expected<TransferRequest> align_transfer_request(TransferRequest &&transfer_request);
 
     VdmaDevice &m_device;
-
-    // Buffer pool for DMA able bounce buffers
-    // TODO HRT-12542- create new class for bounce buffers
-    std::mutex m_dma_pool_mutex;
-    std::unique_ptr<StreamBufferPool> m_dma_bounce_buffer_pool;
+    BounceBufferQueuePtr m_bounce_buffers_pool;
 
     vdma::BoundaryChannelPtr m_channel;
     const hailo_stream_interface_t m_interface;
diff --git a/hailort/libhailort/tracer_profiler.proto b/hailort/libhailort/tracer_profiler.proto
index 5b7e37b0..d4d27c20 100644
--- a/hailort/libhailort/tracer_profiler.proto
+++ b/hailort/libhailort/tracer_profiler.proto
@@ -18,7 +18,8 @@ message ProtoProfilerTopHeader {
     string os_ver = 5;
     string cpu_arch = 6;
     uint64 sys_ram_size = 7; //bytes
-    uint64 time_stamp_since_epoch =8; //nanosec
+    uint64 time_stamp_since_epoch = 8; //nanosec
+    ProtoProfilerPcieInfo pcie_info = 9;
 }
 
 message ProtoTraceMessage {
@@ -27,10 +28,12 @@ message ProtoTraceMessage {
         ProtoProfilerAddStreamTrace added_stream = 2;
         ProtoProfilerAddCoreOpTrace added_core_op = 3;
         ProtoProfilerAddDeviceTrace added_device = 4;
-        ProtoProfilerSwitchedCoreOpTrace switched_core_op = 5;
+        ProtoProfilerActivateCoreOpTrace activate_core_op = 5;
         ProtoProfilerFrameEnqueueTrace frame_enqueue = 6;
         ProtoProfilerFrameDequeueTrace frame_dequeue = 7;
         ProtoProfilerCoreOpSwitchDecision switch_core_op_decision = 8;
+        ProtoProfilerDeactivateCoreOpTrace deactivate_core_op = 9;
+        ProtoProfilerLoadedHefTrace loaded_hef = 10;
     }
 }
 
@@ -89,11 +92,18 @@ message ProtoProfilerCoreOpSwitchDecision {
     bool switch_because_idle = 5;
 }
 
-message ProtoProfilerSwitchedCoreOpTrace {
+message ProtoProfilerActivateCoreOpTrace {
     uint64 time_stamp = 1; // nanosec
     int32 new_core_op_handle = 2;
-    string core_op_name = 3;
-    string device_id = 4;
+    string device_id = 3;
+    double duration = 4; //millisec
+}
+
+message ProtoProfilerDeactivateCoreOpTrace {
+    uint64 time_stamp = 1; // nanosec
+    int32 core_op_handle = 2;
+    string device_id = 3;
+    double duration = 4; //millisec
 }
 
 // Low level streams adding
@@ -121,3 +131,15 @@ message ProtoProfilerAddDeviceTrace {
     uint64 time_stamp = 1; // nanosec
     ProtoProfilerDeviceInfo  device_info = 2;
 }
+
+message ProtoProfilerPcieInfo {
+    string lanes = 1;
+    string  gen = 2;
+}
+
+message ProtoProfilerLoadedHefTrace {
+    uint64 time_stamp = 1; // nanosec
+    string hef_name = 2;
+    string dfc_version = 3;
+    bytes hef_md5 = 4;
+}
diff --git a/hailort/prepare_externals/CMakeLists.txt b/hailort/prepare_externals/CMakeLists.txt
index d73f9bd5..6166c4ce 100644
--- a/hailort/prepare_externals/CMakeLists.txt
+++ b/hailort/prepare_externals/CMakeLists.txt
@@ -11,6 +11,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/json.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/dotwriter.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/benchmark.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/readerwriterqueue.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/eigen.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/cli11.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/protobuf.cmake)
 if(HAILO_BUILD_SERVICE)
diff --git a/hailort/rpc/hailort_rpc.proto b/hailort/rpc/hailort_rpc.proto
index b0c9dab8..a95d88ca 100644
--- a/hailort/rpc/hailort_rpc.proto
+++ b/hailort/rpc/hailort_rpc.proto
@@ -45,6 +45,7 @@ service ProtoHailoRtRpc {
     rpc ConfiguredNetworkGroup_set_nms_score_threshold(ConfiguredNetworkGroup_set_nms_score_threshold_Request) returns (ConfiguredNetworkGroup_set_nms_score_threshold_Reply) {}
     rpc ConfiguredNetworkGroup_set_nms_iou_threshold(ConfiguredNetworkGroup_set_nms_iou_threshold_Request) returns (ConfiguredNetworkGroup_set_nms_iou_threshold_Reply) {}
     rpc ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request) returns (ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply) {}
+    rpc ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request) returns (ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply) {}
 
 
     rpc InputVStreams_create (VStream_create_Request) returns (VStreams_create_Reply) {}
@@ -79,6 +80,7 @@ service ProtoHailoRtRpc {
     rpc OutputVStream_set_nms_score_threshold (VStream_set_nms_score_threshold_Request) returns (VStream_set_nms_score_threshold_Reply) {}
     rpc OutputVStream_set_nms_iou_threshold (VStream_set_nms_iou_threshold_Request) returns (VStream_set_nms_iou_threshold_Reply) {}
     rpc OutputVStream_set_nms_max_proposals_per_class (VStream_set_nms_max_proposals_per_class_Request) returns (VStream_set_nms_max_proposals_per_class_Reply) {}
+    rpc OutputVStream_set_nms_max_accumulated_mask_size (VStream_set_nms_max_accumulated_mask_size_Request) returns (VStream_set_nms_max_accumulated_mask_size_Reply) {}
 }
 
 message empty {}
@@ -117,7 +119,6 @@ message ProtoTransferRequest {
     string stream_name = 1;
     uint32 direction = 2;
     bytes data = 3;
-    uint32 size = 4;
     uint32 cb_idx = 5;
 }
 
@@ -301,7 +302,7 @@ message ProtoNamedNetworkParams {
 message ProtoNmsShape {
     uint32 number_of_classes = 1;
     uint32 max_bbox_per_class = 2;
-    uint32 max_mask_size = 3;
+    uint32 max_accumulated_mask_size = 3;
 }
 
 message ProtoVStreamInfo {
@@ -447,7 +448,8 @@ message ProtoYoloxPostProcessConfig {
 
 message ProtoYoloV5SegPostProcessConfig {
     double mask_threshold = 1;
-    string layer_name = 2;
+    uint32 max_accumulated_mask_size  = 2;
+    string layer_name = 3;
 }
 
 message ProtoOpMetadata {
@@ -773,6 +775,16 @@ message ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply {
     uint32 status = 1;
 }
 
+message ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request {
+    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
+    string edge_name = 2;
+    uint32 max_accumulated_mask_size = 3;
+}
+
+message ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply {
+    uint32 status = 1;
+}
+
 message ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request {
     ProtoConfiguredNetworkGroupIdentifier identifier = 1;
     string vstream_name = 2;
@@ -951,4 +963,13 @@ message VStream_set_nms_max_proposals_per_class_Request {
 
 message VStream_set_nms_max_proposals_per_class_Reply {
     uint32 status = 1;
+}
+
+message VStream_set_nms_max_accumulated_mask_size_Request {
+    ProtoVStreamIdentifier identifier = 1;
+    uint32 max_accumulated_mask_size = 2;
+}
+
+message VStream_set_nms_max_accumulated_mask_size_Reply {
+    uint32 status = 1;
 }
\ No newline at end of file
diff --git a/hailort/scripts/download_firmware_eth.cmd b/hailort/scripts/download_firmware_eth.cmd
index 593b252e..45446bc2 100644
--- a/hailort/scripts/download_firmware_eth.cmd
+++ b/hailort/scripts/download_firmware_eth.cmd
@@ -2,7 +2,7 @@
 @ECHO OFF
 
 set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.16.2
+set HRT_VERSION=4.17.0
 set FW_DIR=Hailo8/%HRT_VERSION%/FW
 set FW=hailo8_fw.%HRT_VERSION%_eth.bin
 
diff --git a/hailort/scripts/download_firmware_eth.sh b/hailort/scripts/download_firmware_eth.sh
index c2104ead..53b776ce 100755
--- a/hailort/scripts/download_firmware_eth.sh
+++ b/hailort/scripts/download_firmware_eth.sh
@@ -2,7 +2,7 @@
 set -e
 
 readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.16.2
+readonly HRT_VERSION=4.17.0
 readonly FW_AWS_DIR="Hailo8/${HRT_VERSION}/FW"
 readonly FW="hailo8_fw.${HRT_VERSION}_eth.bin"
 
diff --git a/hailort/scripts/download_hefs.cmd b/hailort/scripts/download_hefs.cmd
index 663ffcc3..448e47db 100644
--- a/hailort/scripts/download_hefs.cmd
+++ b/hailort/scripts/download_hefs.cmd
@@ -1,11 +1,11 @@
 :: cmd
 @ECHO OFF
 set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.16.2
+set HRT_VERSION=4.17.0
 set REMOTE_HEF_DIR=Hailo8/%HRT_VERSION%/HEFS
 set LOCAL_EXAMPLES_HEF_DIR=..\libhailort\examples\hefs
 set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\hailo_tutorials\hefs
-set EXAMPLES_HEFS=(multi_network_shortcut_net.hef shortcut_net.hef)
+set EXAMPLES_HEFS=(multi_network_shortcut_net.hef shortcut_net.hef shortcut_net_nv12.hef)
 set TUTORIALS_HEFS=(resnet_v1_18.hef shortcut_net.hef)
 
 if not exist %LOCAL_EXAMPLES_HEF_DIR% mkdir %LOCAL_EXAMPLES_HEF_DIR%
diff --git a/hailort/scripts/download_hefs.sh b/hailort/scripts/download_hefs.sh
index fe7af3ba..6019b38a 100755
--- a/hailort/scripts/download_hefs.sh
+++ b/hailort/scripts/download_hefs.sh
@@ -2,12 +2,13 @@
 set -e
 
 readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.16.2
+readonly HRT_VERSION=4.17.0
 readonly REMOTE_HEF_DIR="Hailo8/${HRT_VERSION}/HEFS"
 readonly LOCAL_EXAMPLES_HEF_DIR="../libhailort/examples/hefs"
 readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/hailo_tutorials/hefs"
 readonly EXAMPLES_HEFS=(
     "shortcut_net.hef"
+    "shortcut_net_nv12.hef"
     "multi_network_shortcut_net.hef"
 )
 readonly TUTORIALS_HEFS=(
diff --git a/hailort/tools/hailo15-scripts/hailo15_env_vars.sh b/hailort/tools/hailo15-scripts/hailo15_env_vars.sh
deleted file mode 100644
index 2756ad19..00000000
--- a/hailort/tools/hailo15-scripts/hailo15_env_vars.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#! /bin/bash
-set -e
-
-# Environment declarations
-script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-local_platform_sw_path="$script_directory"/../../../
-h15="10.0.0.1"
-ssh-copy-id root@$h15
\ No newline at end of file
diff --git a/hailort/tools/hailo15-scripts/load_driver.sh b/hailort/tools/hailo15-scripts/load_driver.sh
deleted file mode 100755
index 83fa1012..00000000
--- a/hailort/tools/hailo15-scripts/load_driver.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#! /bin/bash
-set -e
-
-# Include Environment declarations
-script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-source "$script_directory"/hailo15_env_vars.sh
-
-cd $local_platform_sw_path
-./install.sh comp build_integrated_nnc_driver
-path="$local_platform_sw_path"/hailort/drivers/linux/integrated_nnc/hailo_integrated_nnc.ko
-scp $path root@$h15:/lib/modules/5.15.32-yocto-standard/kernel/drivers/misc/hailo_integrated_nnc.ko
-
-ssh root@$h15 "modprobe -r hailo_integrated_nnc && modprobe hailo_integrated_nnc"
diff --git a/hailort/tools/hailo15-scripts/load_firmware.sh b/hailort/tools/hailo15-scripts/load_firmware.sh
deleted file mode 100755
index c5686308..00000000
--- a/hailort/tools/hailo15-scripts/load_firmware.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#! /bin/bash
-set -e
-
-# Include Environment declarations
-script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-source "$script_directory"/hailo15_env_vars.sh
-
-cd $local_platform_sw_path
-./install.sh comp build_fw --fw vpu --hw-arch hailo15
-scp firmware/vpu_firmware/build/hailo15_nnc_fw.bin root@$h15:/lib/firmware/hailo/hailo15_nnc_fw.bin
-ssh root@$h15 "modprobe -r hailo_integrated_nnc && modprobe hailo_integrated_nnc"
diff --git a/hailort/tools/hailo15-scripts/load_hrt.sh b/hailort/tools/hailo15-scripts/load_hrt.sh
deleted file mode 100755
index e85594b8..00000000
--- a/hailort/tools/hailo15-scripts/load_hrt.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#! /bin/bash
-set -e
-
-# Include Environment declarations
-script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-source "$script_directory"/hailo15_env_vars.sh
-
-cd $local_platform_sw_path
-build_config=release
-./build.sh -n pG -aaarch64 -b$build_config install
-
-scp lib/linux.aarch64.$build_config/libhailort.* root@$h15:/usr/lib/
-scp bin/linux.aarch64.$build_config/hailortcli root@$h15:/usr/bin/
-scp bin/linux.aarch64.$build_config/debalex root@$h15:/usr/bin/
-scp bin/linux.aarch64.$build_config/board_tests root@$h15:/usr/bin/
diff --git a/hailort/tools/hailo15-scripts/load_pcr.sh b/hailort/tools/hailo15-scripts/load_pcr.sh
deleted file mode 100755
index 4123852e..00000000
--- a/hailort/tools/hailo15-scripts/load_pcr.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#! /bin/bash
-set -e
-
-# Include Environment declarations
-script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-source "$script_directory"/hailo15_env_vars.sh
-
-cd $local_platform_sw_path
-# Compile PCR
-./install.sh comp build_infra_tools --arch aarch64 --build-hailort --build-type release
-
-scp platform_internals/hailo_platform_internals/low_level_tools/build/linux.aarch64.release/pcr/pcr root@$h15:/usr/bin/
diff --git a/hailort/tools/hailo15-scripts/read_log.sh b/hailort/tools/hailo15-scripts/read_log.sh
deleted file mode 100755
index 398e00e9..00000000
--- a/hailort/tools/hailo15-scripts/read_log.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#! /bin/bash
-set -e
-
-# Include Environment declarations
-script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-source "$script_directory"/hailo15_env_vars.sh
-
-cd $local_platform_sw_path
-source hailo_platform_venv/bin/activate
-ssh root@$h15 "hailortcli fw-logger /tmp/fw_log.dat"
-scp root@$h15:/tmp/fw_log.dat /tmp
-ssh root@$h15 "rm /tmp/fw_log.dat"
-
-python ./platform_internals/hailo_platform_internals/tools/firmware/tracelog_parser_tool/tracelog_parser_tool/parse_tracelog.py --fw vpu --core-log-entries firmware/vpu_firmware/build/hailo15_nnc_fw_*_log_entries.csv --core-only --raw-input-file /tmp/fw_log.dat
-
diff --git a/hailort/tools/hailo15-scripts/sanity_infer.sh b/hailort/tools/hailo15-scripts/sanity_infer.sh
deleted file mode 100755
index 03935493..00000000
--- a/hailort/tools/hailo15-scripts/sanity_infer.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#! /bin/bash
-set -e
-
-# Include Environment declarations
-script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-source "$script_directory"/hailo15_env_vars.sh
-
-ssh root@$h15 "hailortcli run /etc/hailo/hefs/hailo15/shortcut_net/28_28_3/shortcut_net.hef -c 1" 
diff --git a/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh b/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh
deleted file mode 100755
index 4e8c93d6..00000000
--- a/hailort/tools/hailo15-scripts/update_hrt_and_infer.sh
+++ /dev/null
@@ -1,23 +0,0 @@
-#! /bin/bash
-set -e
-
-# Include Environment declarations
-script_directory=$(cd $(dirname "${BASH_SOURCE[0]}") && pwd)
-source "$script_directory"/hailo15_env_vars.sh
-
-# Build hailo15 artifacts
-/bin/bash "$script_directory"/load_hrt.sh
-
-# Build hailo15 PCR
-/bin/bash "$script_directory"/load_pcr.sh
-
-# Build hailo15 fw
-cd $local_platform_sw_path
-./install.sh comp build_fw --fw vpu --hw-arch hailo15
-scp firmware/vpu_firmware/build/hailo15_nnc_fw.bin root@$h15:/lib/firmware/hailo/hailo15_nnc_fw.bin
-
-# Build integrated_nnc (hailo15) driver
-/bin/bash "$script_directory"/load_driver.sh
-
-# Run sanity infer
-/bin/bash "$script_directory"/sanity_infer.sh
diff --git a/hailort/tools/hw_debug/CMakeLists.txt b/hailort/tools/hw_debug/CMakeLists.txt
deleted file mode 100644
index 5cdfa811..00000000
--- a/hailort/tools/hw_debug/CMakeLists.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-cmake_minimum_required(VERSION 3.0.0)
-
-include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/spdlog.cmake)
-include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/cli11.cmake)
-
-set(FILES
-    main.cpp
-    shell.cpp
-    readline_wrapper.cpp
-    driver_memory.cpp
-    memory_commands.cpp
-    hailo15_fields.cpp
-
-    # Depends on hailort_driver and its dependencies
-    ${HAILO_OS_DIR}/hailort_driver.cpp
-    ${HAILO_OS_DIR}/file_descriptor.cpp
-    ${HAILO_FULL_OS_DIR}/driver_scan.cpp
-)
-
-if(WIN32)
-    # hailort_driver.cpp in windows depends on string_conversion
-    set(FILES ${FILES}
-        ${HAILORT_COMMON_OS_DIR}/string_conversion.cpp)
-endif()
-
-add_executable(debalex ${FILES})
-target_compile_options(debalex PRIVATE ${HAILORT_COMPILE_OPTIONS})
-set_property(TARGET debalex PROPERTY CXX_STANDARD 14)
-target_link_libraries(debalex PRIVATE
-    libhailort
-    spdlog::spdlog
-    CLI11::CLI11
-    )
-target_include_directories(debalex
-    PRIVATE
-    ${HAILORT_COMMON_DIR}
-    ${HAILORT_SRC_DIR}
-    ${DRIVER_INC_DIR}
-)
-
-if(CMAKE_SYSTEM_NAME STREQUAL QNX)
-    target_link_libraries(debalex PRIVATE pci)
-endif()
-
-find_path(READLINE_INCLUDE_DIR NAMES readline/readline.h)
-find_library(READLINE_LIBRARY NAMES readline)
-
-if(READLINE_INCLUDE_DIR AND READLINE_LIBRARY)
-    target_link_libraries(debalex PRIVATE ${READLINE_LIBRARY})
-    target_include_directories(debalex PRIVATE  ${READLINE_INCLUDE_DIR})
-    add_definitions(-DUSE_READLINE)
-else()
-    message(WARNING "Could not find readline library. To better UI, please install it by calling `sudo apt install libreadline6-dev`")
-endif()
-
-install(TARGETS debalex
-   RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
-)
-cli11_install_completion_file(debalex)
\ No newline at end of file
diff --git a/hailort/tools/hw_debug/driver_memory.cpp b/hailort/tools/hw_debug/driver_memory.cpp
deleted file mode 100644
index 83d34396..00000000
--- a/hailort/tools/hw_debug/driver_memory.cpp
+++ /dev/null
@@ -1,155 +0,0 @@
-/**
- * @file driver_memory.cpp
- * @brief Implements MemorySource over HailoRT driver, reads/write all interfaces.
- */
-
-#include "driver_memory.hpp"
-#include "hailo15_fields.hpp"
-
-DriverMemorySource::DriverMemorySource(std::shared_ptr<HailoRTDriver> driver, HailoRTDriver::MemoryType memory_type) :
-    m_driver(driver),
-    m_memory_type(memory_type)
-{}
-
-hailo_status DriverMemorySource::read(uint64_t offset, uint8_t *data, size_t size)
-{
-    return m_driver->read_memory(m_memory_type, offset, data, size);
-}
-
-hailo_status DriverMemorySource::write(uint64_t offset, const uint8_t *data, size_t size)
-{
-    return m_driver->write_memory(m_memory_type, offset, data, size);
-}
-
-size_t DriverMemorySource::total_size() const
-{
-    // TODO HRT-7984: return the actual size
-    return std::numeric_limits<size_t>::max();
-}
-
-
-static constexpr size_t VDMA_CHANNELS_COUNT = 32;
-static constexpr size_t VDMA_H2D_CHANNELS_COUNT = 16;
-
-#pragma pack(push, 1)
-struct VdmaDataPerDirection {
-    // Control
-    uint64_t start_abort    : 1;
-    uint64_t pause_resume   : 1;
-    uint64_t abort_on_err   : 1;
-    uint64_t reserved0      : 2;
-    uint64_t irq_on_err     : 1;
-    uint64_t irq_on_host    : 1;
-    uint64_t irq_on_device  : 1;
-
-    // Depth id
-    uint64_t id             : 3;
-    uint64_t depth          : 4;
-    uint64_t reserved1      : 1;
-
-    uint64_t num_available  : 16;
-    uint64_t num_processed  : 16;
-    uint64_t num_ongoing    : 16;
-
-    uint64_t error          : 8;
-    uint64_t reserved2      : 8;
-    uint64_t desc_address   : 48;
-};
-static_assert(0x10 == sizeof(VdmaDataPerDirection), "Invalid VdmaDataPerDirection size");
-
-struct VdmaChannelData {
-    VdmaDataPerDirection src;
-    VdmaDataPerDirection dest;
-};
-#pragma pack(pop)
-
-class VdmaChannelField : public Field {
-public:
-    VdmaChannelField() :
-        Field("channel", "vDMA channel register")
-    {}
-
-    virtual size_t elements_count() const
-    {
-        return VDMA_CHANNELS_COUNT;
-    };
-
-    virtual std::string print_element(MemorySource& memory, size_t index) const
-    {
-        assert(index < elements_count());
-        VdmaChannelData data{};
-        auto status = memory.read(index * sizeof(data), reinterpret_cast<uint8_t*>(&data), sizeof(data));
-        if (HAILO_SUCCESS != status) {
-            throw std::runtime_error(fmt::format("Failed reading memory, status {}", status));
-        }
-
-        return fmt::format("channel[{}] (offset=0x{:X} size=0x{:X} type= {}):\n", index, index * sizeof(data), sizeof(data),
-                index < VDMA_H2D_CHANNELS_COUNT ? "H2D" : "D2H") +
-               fmt::format("    Src status:  {}\n", print_src_status(data.src)) + 
-               fmt::format("    Dest status: {}\n", print_dest_status(data.dest)) + 
-               fmt::format("    Src:   {}\n", print_direction(data.src)) +
-               fmt::format("    Dest:  {}\n", print_direction(data.dest));
-    }
-
-private:
-    static std::string print_src_status(const VdmaDataPerDirection &data) {
-        auto max_desc_mask =  static_cast<uint16_t>((1 << data.depth)  - 1);
-        std::string status =
-            data.error ? "CHANNEL ERROR" :
-            !data.start_abort ? "ABORTED" :
-            data.pause_resume ? "PAUSED" : 
-            (data.num_ongoing & max_desc_mask) != (data.num_processed & max_desc_mask) ? "DURING TRANSFER" : 
-            (data.num_available & max_desc_mask) != (data.num_processed & max_desc_mask) ? "WAITING TO SEND" : 
-                "IDLE";
-        return status;
-    }
-
-    static std::string print_dest_status(const VdmaDataPerDirection &data) {    
-        auto max_desc_mask =  static_cast<uint16_t>((1 << data.depth)  - 1);
-        std::string status = 
-            data.error ? "CHANNEL ERROR" :
-            !data.start_abort ? "ABORTED" :
-            data.pause_resume ? "PAUSED" : 
-            (data.num_ongoing & max_desc_mask) != (data.num_processed & max_desc_mask) ? "DURING TRANSFER" : 
-            (data.num_available & max_desc_mask) != (data.num_processed & max_desc_mask) ? "WAITING TO RECEIVE" :
-                "IDLE";
-        return status;
-    }
-
-    static std::string print_direction(const VdmaDataPerDirection &data)
-    {
-        return fmt::format(
-            "control=({} | {}) id={} depth={:02} num_avail=0x{:04X} num_proc=0x{:04X} num_ongoing=0x{:04X} err=0x{:02X} desc_address=0x{:016X}",
-            data.start_abort ? "START" : "ABORT",
-            data.pause_resume ? "PAUSE" : "RESUME",
-            data.id,
-            data.depth,
-            data.num_available,
-            data.num_processed,
-            data.num_ongoing,
-            data.error,
-            data.desc_address << DESC_ADDRESS_SHIFT);
-    }
-
-    static constexpr size_t DESC_ADDRESS_SHIFT = 16;
-};
-
-VdmaMemorySource::VdmaMemorySource(std::shared_ptr<HailoRTDriver> driver, MemoryType memory_type) :
-    DriverMemorySource(std::move(driver), memory_type)
-{
-    add_field(std::make_shared<VdmaChannelField>());
-}
-
-size_t VdmaMemorySource::total_size() const
-{
-    return VDMA_CHANNELS_COUNT * sizeof(VdmaChannelData);
-}
-
-DramDmaEngineMemorySource::DramDmaEngineMemorySource(std::shared_ptr<HailoRTDriver> driver, MemoryType memory_type) :
-    DriverMemorySource(std::move(driver), memory_type)
-{
-    add_field(std::make_shared<QddcField>());
-    add_field(std::make_shared<QsdcField>());
-    add_field(std::make_shared<QdmcField>());
-    add_field(std::make_shared<QsmcField>());
-}
\ No newline at end of file
diff --git a/hailort/tools/hw_debug/driver_memory.hpp b/hailort/tools/hw_debug/driver_memory.hpp
deleted file mode 100644
index 60e6a650..00000000
--- a/hailort/tools/hw_debug/driver_memory.hpp
+++ /dev/null
@@ -1,39 +0,0 @@
-/**
- * @file driver_memory.hpp
- * @brief Implements MemorySource over HailoRT driver, reads/write all interfaces.
- */
-
-#ifndef _HW_DEBUG_DRIVER_MEMORY_HPP_
-#define _HW_DEBUG_DRIVER_MEMORY_HPP_
-
-#include "memory_commands.hpp"
-#include "os/hailort_driver.hpp"
-
-using hailort::HailoRTDriver;
-using MemoryType = HailoRTDriver::MemoryType;
-
-class DriverMemorySource : public MemorySource {
-public:
-    DriverMemorySource(std::shared_ptr<HailoRTDriver> driver, MemoryType memory_type);
-
-    hailo_status read(uint64_t offset, uint8_t *data, size_t size) override;
-    hailo_status write(uint64_t offset, const uint8_t *data, size_t size) override;
-    size_t total_size() const override;
-
-private:
-    std::shared_ptr<HailoRTDriver> m_driver;
-    MemoryType m_memory_type;
-};
-
-class VdmaMemorySource : public DriverMemorySource {
-public:
-    VdmaMemorySource(std::shared_ptr<HailoRTDriver> driver, MemoryType memory_type);
-    size_t total_size() const override;
-};
-
-class DramDmaEngineMemorySource : public DriverMemorySource {
-public:
-    DramDmaEngineMemorySource(std::shared_ptr<HailoRTDriver> driver, MemoryType memory_type);
-};
-
-#endif /* _HW_DEBUG_DRIVER_MEMORY_HPP_ */
diff --git a/hailort/tools/hw_debug/hailo15_fields.cpp b/hailort/tools/hw_debug/hailo15_fields.cpp
deleted file mode 100644
index d4df9dd6..00000000
--- a/hailort/tools/hw_debug/hailo15_fields.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/**
- * @file hailo15_fields.cpp
- * @brief Contains all memory fields related to hailo15
- */
-
-#include "hailo15_fields.hpp"
-#include "hw_consts/hailo15/dram_dma_engine_config_regs.h"
-
-// Implement our own offsetof to allow access to array
-#define my_offsetof(type,field) ((size_t)(&(((type*)(0))->field)))
-#define dram_dma_offsetof(field) my_offsetof(DRAM_DMA_ENGINE_CONFIG_t, field)
-
-
-static constexpr auto CCB_ADDRESS_SHIFT = 9;
-
-
-QddcField::QddcField() :
-    Field("qddc", "Queue dest device channel (qddc)")
-{}
-
-size_t QddcField::elements_count() const
-{
-    return DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH;
-}
-
-std::string QddcField::print_element(MemorySource& memory, size_t index) const
-{
-    return fmt::format("qddc[{}] enabled={} mode={} shmifo_id={}\n", index,
-        is_enabled(memory, index), mode(memory, index), shmifo_id(memory, index));
-}
-
-bool QddcField::is_enabled(MemorySource &memory, size_t index) const
-{
-    return (1 == memory.read<uint32_t>(dram_dma_offsetof(QddcEnable[index])));
-}
-
-uint32_t QddcField::shmifo_id(MemorySource &memory, size_t index) const
-{
-    return memory.read<uint32_t>(dram_dma_offsetof(QddcShmifoId[index]));
-}
-
-std::string QddcField::mode(MemorySource &memory, size_t index) const
-{
-    const auto mode = memory.read<uint32_t>(dram_dma_offsetof(QddcMode[index]));
-    switch (mode) {
-    case 0: return "CONTINUOUS";
-    case 1: return "BURST";
-    default:
-        return fmt::format("Unknown {}", mode);
-    }
-}
-
-QsdcField::QsdcField() :
-    Field("qsdc", "Queue source device channel (qsdc)")
-{}
-
-size_t QsdcField::elements_count() const
-{
-    return DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH;
-}
-
-std::string QsdcField::print_element(MemorySource& memory, size_t index) const
-{
-    return fmt::format("qsdc[{}] enabled={} shmifo_id={}\n", index,
-        is_enabled(memory, index), shmifo_id(memory, index));
-}
-
-bool QsdcField::is_enabled(MemorySource &memory, size_t index) const
-{
-    return (1 == memory.read<uint32_t>(dram_dma_offsetof(QsdcEnable[index])));
-}
-
-uint32_t QsdcField::shmifo_id(MemorySource &memory, size_t index) const
-{
-    return memory.read<uint32_t>(dram_dma_offsetof(QsdcShmifoId[index]));
-}
-
-QdmcField::QdmcField() :
-    Field("qdmc", "Queue dest memory channel (qdmc)")
-{}
-
-size_t QdmcField::elements_count() const
-{
-    return DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH;
-}
-
-std::string QdmcField::print_element(MemorySource& memory, size_t index) const
-{
-    return fmt::format("qdmc[{}] enabled={} address=0x{:x} desc_count={} desc_per_irq={}\n", index,
-        is_enabled(memory, index), base_address(memory, index), descriptors_count(memory, index),
-        descriptors_per_irq(memory, index));
-}
-
-bool QdmcField::is_enabled(MemorySource &memory, size_t index) const
-{
-    return (1 == memory.read<uint32_t>(dram_dma_offsetof(QdmcEnable[index])));
-}
-
-uint64_t QdmcField::base_address(MemorySource &memory, size_t index) const
-{
-    const uint64_t address = memory.read<uint32_t>(dram_dma_offsetof(QdmcMemBaseAddr[index]));
-    return address << CCB_ADDRESS_SHIFT;
-}
-
-uint32_t QdmcField::descriptors_count(MemorySource &memory, size_t index) const
-{
-    if (index > DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_REGULAR_CH) {
-        return memory.read<uint32_t>(dram_dma_offsetof(QdmcMemCcbSize[index - DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_REGULAR_CH]));
-    }
-    else {
-        const auto desc_count_log2 = memory.read<uint32_t>(dram_dma_offsetof(QdmcMemCcbSizeLog2[index]));
-        uint32_t size = 1;
-        for (uint32_t i = 0; i < desc_count_log2; i++) {
-            size <<= 1;
-        }
-        return size;
-    }
-}
-
-uint32_t QdmcField::descriptors_per_irq(MemorySource &memory, size_t index) const
-{
-    return memory.read<uint32_t>(dram_dma_offsetof(QdmcDescCsInterrupt[index]));
-}
-
-QsmcField::QsmcField() :
-    Field("qsmc", "Queue source memory channel (qsmc)")
-{}
-
-size_t QsmcField::elements_count() const
-{
-    return DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH;
-}
-
-std::string QsmcField::print_element(MemorySource& memory, size_t index) const
-{
-    return fmt::format("qdmc[{}] mode={} enabled={} address=0x{:x} desc_count={}\n", index,
-        mode(memory, index), is_enabled(memory, index), base_address(memory, index), descriptors_count(memory, index));
-}
-
-bool QsmcField::is_enabled(MemorySource &memory, size_t index) const
-{
-    return (1 == memory.read<uint32_t>(dram_dma_offsetof(QsmcEnable[index])));
-}
-
-uint64_t QsmcField::base_address(MemorySource &memory, size_t index) const
-{
-    const uint64_t address = memory.read<uint32_t>(dram_dma_offsetof(QsmcMemBaseAddr[index]));
-    return address << CCB_ADDRESS_SHIFT;
-}
-
-uint32_t QsmcField::descriptors_count(MemorySource &memory, size_t index) const
-{
-    const auto desc_count = memory.read<uint32_t>(dram_dma_offsetof(QsmcMemCcbSize[index]));
-    return desc_count + 1; // The reg contains desc_count-1
-}
-
-std::string QsmcField::mode(MemorySource &memory, size_t index) const
-{
-    const auto mode = memory.read<uint32_t>(dram_dma_offsetof(QsmcMode[index]));
-    switch (mode) {
-    case 0: return "CONTINUOUS";
-    case 2: return "BURST";
-    case 3: // C2C mode
-    {
-        auto c2c_sel = memory.read<uint32_t>(dram_dma_offsetof(QsmcC2cSel[index]));
-        return fmt::format("C2C (from {})", c2c_sel);
-    }
-    default:
-        return fmt::format("Unknown {}", mode);
-    }
-}
diff --git a/hailort/tools/hw_debug/hailo15_fields.hpp b/hailort/tools/hw_debug/hailo15_fields.hpp
deleted file mode 100644
index a159f01a..00000000
--- a/hailort/tools/hw_debug/hailo15_fields.hpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/**
- * @file hailo15_fields.hpp
- * @brief Contains all memory fields related to hailo15
- */
-
-#ifndef _HW_DEBUG_HAILO15_FIELDS_H_
-#define _HW_DEBUG_HAILO15_FIELDS_H_
-
-#include "memory_commands.hpp"
-
-
-class QddcField : public Field {
-public:
-    QddcField();
-
-    virtual size_t elements_count() const override;
-    virtual std::string print_element(MemorySource& memory, size_t index) const override;
-
-private:
-    bool is_enabled(MemorySource &memory, size_t index) const;
-    uint32_t shmifo_id(MemorySource &memory, size_t index) const;
-    std::string mode(MemorySource &memory, size_t index) const;
-};
-
-class QsdcField : public Field {
-public:
-    QsdcField();
-
-    virtual size_t elements_count() const override;
-    virtual std::string print_element(MemorySource& memory, size_t index) const override;
-
-private:
-    bool is_enabled(MemorySource &memory, size_t index) const;
-    uint32_t shmifo_id(MemorySource &memory, size_t index) const;
-};
-
-
-class QdmcField : public Field {
-public:
-    QdmcField();
-
-    virtual size_t elements_count() const override;
-    virtual std::string print_element(MemorySource& memory, size_t index) const override;
-
-private:
-    bool is_enabled(MemorySource &memory, size_t index) const;
-    uint64_t base_address(MemorySource &memory, size_t index) const;
-    uint32_t descriptors_count(MemorySource &memory, size_t index) const;
-    uint32_t descriptors_per_irq(MemorySource &memory, size_t index) const;
-};
-
-class QsmcField : public Field {
-public:
-    QsmcField();
-
-    virtual size_t elements_count() const override;
-    virtual std::string print_element(MemorySource& memory, size_t index) const override;
-
-private:
-    bool is_enabled(MemorySource &memory, size_t index) const;
-    uint64_t base_address(MemorySource &memory, size_t index) const;
-    uint32_t descriptors_count(MemorySource &memory, size_t index) const;
-    std::string mode(MemorySource &memory, size_t index) const;
-};
-
-#endif /* _HW_DEBUG_HAILO15_FIELDS_H_ */
diff --git a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_macros.h b/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_macros.h
deleted file mode 100644
index 889f3cca..00000000
--- a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_macros.h
+++ /dev/null
@@ -1,2270 +0,0 @@
-/*-------------------------------------------------------------------------------------
-//	Copyright (c) 2022 by Hailotech This model is the confidential and
-//	proprietary property of Hailotech and the possession or use of this
-//	file requires a written license from Hailotech.
--------------------------------------------------------------------------------------*/
-
-
-
-#include <stdint.h>
-
-#ifndef DRAM_DMA_ENGINE_CONFIG_MACRO_H
-#define DRAM_DMA_ENGINE_CONFIG_MACRO_H
-
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDCENABLE : val		*/
-/*  Description: Enable per channel,when disabled do not give credits to vDMA */
-#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__SHIFT                                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__WIDTH                                             (1)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__MASK                                              (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__RESET                                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__READ(reg_offset)                                  \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__MODIFY(reg_offset, value)                         \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__SET(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCENABLE__VAL__CLR(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDCRESET : val		*/
-/*  Description: Soft reset per channel,when write 1'b1 should clear all internal credits/counter/status. Should be set when channel is disabled,usually with vDMA channel reset (abort). Write 1'b0 should do nothing. Read always return 1'b0. Implemented as external register type. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__SHIFT                                              (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__WIDTH                                              (1)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__MASK                                               (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__RESET                                              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__READ(reg_offset)                                   \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__MODIFY(reg_offset, value)                          \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__SET(reg_offset)                                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCRESET__VAL__CLR(reg_offset)                                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDCMODE : val		*/
-/*  Description: 0 - CONT_MODE. 1 - BURST_MODE */
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__SHIFT                                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__WIDTH                                               (1)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__MASK                                                (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__RESET                                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__READ(reg_offset)                                    \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__MODIFY(reg_offset, value)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__SET(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMODE__VAL__CLR(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDCADDBURSTVAL : val		*/
-/*  Description: Writing to this register increment the remain burst counter in QDDC by QddcAddBurstVal x 8 Bytes: RemainBurstCount += QddcAddBurstVal. Reading this register should return the current available credit counter (RemainBurstCount) in 2s complement format - can be negative. Implemented as external register type. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__SHIFT                                        (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__WIDTH                                        (27)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__MASK                                         (0x07FFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__RESET                                        (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__READ(reg_offset)                             \
-			(((uint32_t)(reg_offset) & 0x07FFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__MODIFY(reg_offset, value)                    \
-			(reg_offset) = (((reg_offset) & ~0x07FFFFFFL) | (((uint32_t)(value) << 0) & 0x07FFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__SET(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x07FFFFFFL) | 0x07FFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCADDBURSTVAL__VAL__CLR(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x07FFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDCMAXDESC : val		*/
-/*  Description: Maximum in flight descriptors,this is a TH for number of descriptors the QM might give the vDMA. 3'd0 - 1 descriptor (debug mode). 3'd1 - N_QM_DESC*1/8 (2). 3'd2 - N_QM_DESC*2/8 (4). 3'd3 - N_QM_DESC*3/8 (6). 3'd4 - N_QM_DESC*2/4 (8). 3'd5 - N_QM_DESC*5/8 (10). 3'd6 - N_QM_DESC*6/8 (12). 3'd7 - N_QM_DESC-1 (15-maximum),default. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__SHIFT                                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__WIDTH                                            (3)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__MASK                                             (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__RESET                                            (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__READ(reg_offset)                                 \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__MODIFY(reg_offset, value)                        \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__SET(reg_offset)                                  \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCMAXDESC__VAL__CLR(reg_offset)                                  \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDCSHMIFOID : val		*/
-/*  Description: The RX-SHMIFO ID. Used to know the SHMIFO base address (from a global parameter/define) and used to select the correct SHMIFO credit signal (nn_core_inbound_buffer_ready_pulse). 0-19: for DSM-RX 0-19. 20-23: for CSM 0-3. 24-30: reserved. 31: NULL ignore any credit from NN Core. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__SHIFT                                           (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__WIDTH                                           (5)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__MASK                                            (0x0000001FL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__RESET                                           (0x0000001FL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__READ(reg_offset)                                \
-			(((uint32_t)(reg_offset) & 0x0000001FL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__MODIFY(reg_offset, value)                       \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL) | (((uint32_t)(value) << 0) & 0x0000001FL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__SET(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL) | 0x0000001FL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOID__VAL__CLR(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDCSHMIFOCREDITSIZE : val		*/
-/*  Description: The credit size in 8B granularity minus 1. 0 - indicates 8B 1 - indicates 16B ... 10'd1023 - indicates 8kB */
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__SHIFT                                   (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__WIDTH                                   (10)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__MASK                                    (0x000003FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__RESET                                   (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__READ(reg_offset)                        \
-			(((uint32_t)(reg_offset) & 0x000003FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__MODIFY(reg_offset, value)               \
-			(reg_offset) = (((reg_offset) & ~0x000003FFL) | (((uint32_t)(value) << 0) & 0x000003FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__SET(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x000003FFL) | 0x000003FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOCREDITSIZE__VAL__CLR(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x000003FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDCSHMIFOINITCREDIT : val		*/
-/*  Description: Writing to this register set the amount of credit from SHMIFO RX (AvailableCredits),used to configure the initial amount of credits,reading this register should return the value of AvailableCredits. Implemented as external register type. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__SHIFT                                   (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__WIDTH                                   (13)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__MASK                                    (0x00001FFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__RESET                                   (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__READ(reg_offset)                        \
-			(((uint32_t)(reg_offset) & 0x00001FFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__MODIFY(reg_offset, value)               \
-			(reg_offset) = (((reg_offset) & ~0x00001FFFL) | (((uint32_t)(value) << 0) & 0x00001FFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__SET(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x00001FFFL) | 0x00001FFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDCSHMIFOINITCREDIT__VAL__CLR(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x00001FFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCENABLE : val		*/
-/*  Description: Enable per channel,when disabled do not give credits to vDMA */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__SHIFT                                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__WIDTH                                             (1)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__MASK                                              (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__RESET                                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__READ(reg_offset)                                  \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__MODIFY(reg_offset, value)                         \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__SET(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCENABLE__VAL__CLR(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCRESET : val		*/
-/*  Description: Soft reset per channel,when write 1'b1 should clear all internal credits/counter/status. Should be set when channel is disabled,usually with vDMA channel reset (abort). Write 1'b0 should do nothing. Read always return 1'b0. Implemented as external register type. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__SHIFT                                              (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__WIDTH                                              (1)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__MASK                                               (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__RESET                                              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__READ(reg_offset)                                   \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__MODIFY(reg_offset, value)                          \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__SET(reg_offset)                                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCRESET__VAL__CLR(reg_offset)                                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCMAXDESC : val		*/
-/*  Description: Maximum in flight descriptors,this is a TH for number of descriptors the QM might give the vDMA. 3'd0 - 1 descriptor (debug mode). 3'd1 - N_QM_DESC*1/8 (2). 3'd2 - N_QM_DESC*2/8 (4). 3'd3 - N_QM_DESC*3/8 (6). 3'd4 - N_QM_DESC*4/8 (8). 3'd5 - N_QM_DESC*5/8 (10). 3'd6 - N_QM_DESC*6/8 (12). 3'd7 - N_QM_DESC-1 (15-maximum),default. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__SHIFT                                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__WIDTH                                            (3)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__MASK                                             (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__RESET                                            (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__READ(reg_offset)                                 \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__MODIFY(reg_offset, value)                        \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__SET(reg_offset)                                  \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCMAXDESC__VAL__CLR(reg_offset)                                  \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCSHMIFOID : val		*/
-/*  Description: The TX-SHMIFO ID. Used to know the SHMIFO base address (from a global parameter/define) and used to select the correct SHMIFO credit signal (nn_core_outbound_buffer_valid_pulse). 0-19: for DSM-TX 0-19. 20-30: reserved. 31: NULL ignore any credit from NN Core. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__SHIFT                                           (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__WIDTH                                           (5)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__MASK                                            (0x0000001FL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__RESET                                           (0x0000001FL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__READ(reg_offset)                                \
-			(((uint32_t)(reg_offset) & 0x0000001FL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__MODIFY(reg_offset, value)                       \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL) | (((uint32_t)(value) << 0) & 0x0000001FL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__SET(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL) | 0x0000001FL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOID__VAL__CLR(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCSHMIFOCREDITSIZE : val		*/
-/*  Description: The credit size in 8B granularity minus 1. 0 - indicates 8B 1 - indicates 16B ... 10'd1023 - indicates 8kB */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__SHIFT                                   (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__WIDTH                                   (10)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__MASK                                    (0x000003FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__RESET                                   (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__READ(reg_offset)                        \
-			(((uint32_t)(reg_offset) & 0x000003FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__MODIFY(reg_offset, value)               \
-			(reg_offset) = (((reg_offset) & ~0x000003FFL) | (((uint32_t)(value) << 0) & 0x000003FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__SET(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x000003FFL) | 0x000003FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSHMIFOCREDITSIZE__VAL__CLR(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x000003FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCFULLNUMPATTERNS : val		*/
-/*  Description: Number of patterns per pattern ID minus one. 0 - one pattern,1 - two patterns,...,3 - four patterns. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__SHIFT                                    (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__WIDTH                                    (2)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__MASK                                     (0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__RESET                                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__READ(reg_offset)                         \
-			(((uint32_t)(reg_offset) & 0x00000003L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__MODIFY(reg_offset, value)                \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__SET(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLNUMPATTERNS__VAL__CLR(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x00000003L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCFULLPATTERNNUMLINES : val		*/
-/*  Description: Number of lines per pattern. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__SHIFT                                (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__WIDTH                                (18)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__MASK                                 (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__RESET                                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__READ(reg_offset)                     \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__MODIFY(reg_offset, value)            \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__SET(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMLINES__VAL__CLR(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCFULLPATTERNNUMPAGES : val		*/
-/*  Description: Number of pages per line. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__SHIFT                                (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__WIDTH                                (18)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__MASK                                 (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__RESET                                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__READ(reg_offset)                     \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__MODIFY(reg_offset, value)            \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__SET(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNNUMPAGES__VAL__CLR(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCFULLPATTERNPAGESIZE : val		*/
-/*  Description: page size in 8B granularity,minus one,per pattern. 0-8B,1-16B,...,511-4kB */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__SHIFT                                (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__WIDTH                                (9)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__MASK                                 (0x000001FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__RESET                                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__READ(reg_offset)                     \
-			(((uint32_t)(reg_offset) & 0x000001FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__MODIFY(reg_offset, value)            \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL) | (((uint32_t)(value) << 0) & 0x000001FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__SET(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL) | 0x000001FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNPAGESIZE__VAL__CLR(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCFULLPATTERNRESIDUEPAGESIZE : val		*/
-/*  Description: Residue page size in 8B granularity,minus one,per pattern. 0-8B,1-16B,...,511-4kB */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__SHIFT                         (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__WIDTH                         (9)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__MASK                          (0x000001FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__RESET                         (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__READ(reg_offset)              \
-			(((uint32_t)(reg_offset) & 0x000001FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__MODIFY(reg_offset, value)     \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL) | (((uint32_t)(value) << 0) & 0x000001FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__SET(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL) | 0x000001FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCFULLPATTERNRESIDUEPAGESIZE__VAL__CLR(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCSIMPPATTERNNUMPAGES : val		*/
-/*  Description: Number of pages per line (simplified pattern has single line/pattern). */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__SHIFT                                (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__WIDTH                                (18)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__MASK                                 (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__RESET                                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__READ(reg_offset)                     \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__MODIFY(reg_offset, value)            \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__SET(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNNUMPAGES__VAL__CLR(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCSIMPPATTERNPAGESIZE : val		*/
-/*  Description: Log2(Page size/512B),valid values are 0 to PAGE_SIZE_MAX-10. 0 - 512B,1 - 1kB,2 - 2kB,3 - 4kB */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__SHIFT                                (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__WIDTH                                (2)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__MASK                                 (0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__RESET                                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__READ(reg_offset)                     \
-			(((uint32_t)(reg_offset) & 0x00000003L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__MODIFY(reg_offset, value)            \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__SET(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNPAGESIZE__VAL__CLR(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x00000003L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDCSIMPPATTERNRESIDUEPAGESIZE : val		*/
-/*  Description: Residue page size in 8B granularity,minus one,per pattern. 0-8B,1-16B,...,511-4kB */
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__SHIFT                         (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__WIDTH                         (9)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__MASK                          (0x000001FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__RESET                         (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__READ(reg_offset)              \
-			(((uint32_t)(reg_offset) & 0x000001FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__MODIFY(reg_offset, value)     \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL) | (((uint32_t)(value) << 0) & 0x000001FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__SET(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL) | 0x000001FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDCSIMPPATTERNRESIDUEPAGESIZE__VAL__CLR(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCENABLE : val		*/
-/*  Description: Enable per channel,when disabled do not give credits to vDMA */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__SHIFT                                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__WIDTH                                             (1)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__MASK                                              (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__RESET                                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__READ(reg_offset)                                  \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__MODIFY(reg_offset, value)                         \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__SET(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCENABLE__VAL__CLR(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCRESET : val		*/
-/*  Description: Soft reset per channel,when write 1'b1 should clear all internal credits/counter/status. Should be set when channel is disabled,usually with vDMA channel reset (abort). Write 1'b0 should do nothing. Read always return 1'b0. Implemented as external register type. */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__SHIFT                                              (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__WIDTH                                              (1)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__MASK                                               (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__RESET                                              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__READ(reg_offset)                                   \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__MODIFY(reg_offset, value)                          \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__SET(reg_offset)                                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCRESET__VAL__CLR(reg_offset)                                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCMEMBASEADDR : val		*/
-/*  Description: Base address to the CCB in the DDR memory space. aligned to minimum page size of 512B. */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__SHIFT                                        (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__WIDTH                                        (26)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__MASK                                         (0x03FFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__RESET                                        (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__READ(reg_offset)                             \
-			(((uint32_t)(reg_offset) & 0x03FFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__MODIFY(reg_offset, value)                    \
-			(reg_offset) = (((reg_offset) & ~0x03FFFFFFL) | (((uint32_t)(value) << 0) & 0x03FFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__SET(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x03FFFFFFL) | 0x03FFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMBASEADDR__VAL__CLR(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x03FFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCMEMCCBSIZELOG2 : val		*/
-/*  Description: The CCB size Log2(memory size/512B): 1 - 1kB (2 pages). 2 - 2kB. valid values are 1 to W_CCB_DESC_INDEX */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__WIDTH                                     (5)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__MASK                                      (0x0000001FL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x0000001FL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL) | (((uint32_t)(value) << 0) & 0x0000001FL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL) | 0x0000001FL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZELOG2__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000001FL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCDESCCSINTERRUPT : val		*/
-/*  Description: When > 0 the QDMC will interrupt the CS manager every written QdmcDescCsInterrupt descriptors. */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__SHIFT                                    (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__WIDTH                                    (18)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__MASK                                     (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__RESET                                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__READ(reg_offset)                         \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__MODIFY(reg_offset, value)                \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__SET(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCCSINTERRUPT__VAL__CLR(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCBANKINTERLEAVEMODE : val		*/
-/*  Description: Select the bank interleave mode: 2'd0 - interleave 8 banks (default),2'd1 - Interleave 4 banks,2'd2 - Interleave 2 banks,2'd3 - no interleave. */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__SHIFT                                 (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__WIDTH                                 (2)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__MASK                                  (0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__RESET                                 (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__READ(reg_offset)                      \
-			(((uint32_t)(reg_offset) & 0x00000003L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__MODIFY(reg_offset, value)             \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__SET(reg_offset)                       \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCBANKINTERLEAVEMODE__VAL__CLR(reg_offset)                       \
-			(reg_offset) = (((reg_offset) & ~0x00000003L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCMODE : val		*/
-/*  Description: 0 - CONT_MODE. 1 - BURST_MODE */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__SHIFT                                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__WIDTH                                               (1)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__MASK                                                (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__RESET                                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__READ(reg_offset)                                    \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__MODIFY(reg_offset, value)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__SET(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMODE__VAL__CLR(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCADDBURSTVAL : val		*/
-/*  Description: Writing to this register increment the available descriptor counter in QDMC by QdmcAddBurstVal descriptors: AvailableDescsCounter += QdmcAddBurstVal. Reading this register should return the current available descriptors counter (AvailableDescsCounter). Implemented as external register type. */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__SHIFT                                        (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__WIDTH                                        (18)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__MASK                                         (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__RESET                                        (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__READ(reg_offset)                             \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__MODIFY(reg_offset, value)                    \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__SET(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCADDBURSTVAL__VAL__CLR(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCMEMCCBSIZE : val		*/
-/*  Description: The CCB size Log2(memory size/512B): 1 - 1kB (2 pages). 2 - 2kB. valid values are 1 to W_CCB_DESC_INDEX */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__SHIFT                                         (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__WIDTH                                         (18)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__MASK                                          (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__RESET                                         (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__READ(reg_offset)                              \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__MODIFY(reg_offset, value)                     \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__SET(reg_offset)                               \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCMEMCCBSIZE__VAL__CLR(reg_offset)                               \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCDESCPERIPHINTERRUPT : val		*/
-/*  Description: When > 0 the QDMC will interrupt the peripheral every written QdmcDescPeriphInterrupt descriptors. */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__SHIFT                                (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__WIDTH                                (18)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__MASK                                 (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__RESET                                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__READ(reg_offset)                     \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__MODIFY(reg_offset, value)            \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__SET(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCDESCPERIPHINTERRUPT__VAL__CLR(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMCCCBPROCESSEDINDEX : val		*/
-/*  Description: Used by the peripheral to indicates how many data is ready in the CCB (process). This is the CcbIndex (free pointer in CCB). */
-#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__SHIFT                                  (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__WIDTH                                  (18)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__MASK                                   (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__RESET                                  (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMCCCBPROCESSEDINDEX__VAL__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCENABLE : val		*/
-/*  Description: Enable per channel,when disabled do not give credits to vDMA */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__SHIFT                                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__WIDTH                                             (1)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__MASK                                              (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__RESET                                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__READ(reg_offset)                                  \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__MODIFY(reg_offset, value)                         \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__SET(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCENABLE__VAL__CLR(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCRESET : val		*/
-/*  Description: Soft reset per channel,when write 1'b1 should clear all internal credits/counter/status. Should be set when channel is disabled,usually with vDMA channel reset (abort). Write 1'b0 should do nothing. Read always return 1'b0. Implemented as external register type. */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__SHIFT                                              (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__WIDTH                                              (1)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__MASK                                               (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__RESET                                              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__READ(reg_offset)                                   \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__MODIFY(reg_offset, value)                          \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__SET(reg_offset)                                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCRESET__VAL__CLR(reg_offset)                                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCMODE : val		*/
-/*  Description: QSMC mode of operation: 2'd0 - CONT_MODE 2'd1 - reserved. 2'd2 - BURST_MODE 2'd3 - C2C_MODE */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__SHIFT                                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__WIDTH                                               (2)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__MASK                                                (0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__RESET                                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__READ(reg_offset)                                    \
-			(((uint32_t)(reg_offset) & 0x00000003L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__MODIFY(reg_offset, value)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__SET(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMODE__VAL__CLR(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0x00000003L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCC2CSEL : val		*/
-/*  Description: Selector for Channel-to-Channel credit input,selects QDMC channel as source for HW available descriptors */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__SHIFT                                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__WIDTH                                             (6)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__MASK                                              (0x0000003FL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__RESET                                             (0x0000003FL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__READ(reg_offset)                                  \
-			(((uint32_t)(reg_offset) & 0x0000003FL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__MODIFY(reg_offset, value)                         \
-			(reg_offset) = (((reg_offset) & ~0x0000003FL) | (((uint32_t)(value) << 0) & 0x0000003FL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__SET(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x0000003FL) | 0x0000003FL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCC2CSEL__VAL__CLR(reg_offset)                                   \
-			(reg_offset) = (((reg_offset) & ~0x0000003FL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCADDBURSTVAL : val		*/
-/*  Description: Writing to this register increment the available descriptor counter in QSMC by QsmcAddBurstVal descriptors: AvailableDescsCounter += QsmcAddBurstVal. Reading this register should return the current available descriptors counter (AvailableDescsCounter). Implemented as external register type. */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__SHIFT                                        (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__WIDTH                                        (18)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__MASK                                         (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__RESET                                        (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__READ(reg_offset)                             \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__MODIFY(reg_offset, value)                    \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__SET(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCADDBURSTVAL__VAL__CLR(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCMEMBASEADDR : val		*/
-/*  Description: Base address to the CCB in the DDR memory space. aligned to minimum page size of 512B. */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__SHIFT                                        (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__WIDTH                                        (26)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__MASK                                         (0x03FFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__RESET                                        (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__READ(reg_offset)                             \
-			(((uint32_t)(reg_offset) & 0x03FFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__MODIFY(reg_offset, value)                    \
-			(reg_offset) = (((reg_offset) & ~0x03FFFFFFL) | (((uint32_t)(value) << 0) & 0x03FFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__SET(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x03FFFFFFL) | 0x03FFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMBASEADDR__VAL__CLR(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x03FFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCMEMCCBSIZE : val		*/
-/*  Description: The CCB size minus one in page size granularity. 0 - 1 desc 1 - 2 desc ... N_CCB_MAX_DESC-1 - N_CCB_MAX_DESC desc. */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__SHIFT                                         (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__WIDTH                                         (18)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__MASK                                          (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__RESET                                         (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__READ(reg_offset)                              \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__MODIFY(reg_offset, value)                     \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__SET(reg_offset)                               \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCMEMCCBSIZE__VAL__CLR(reg_offset)                               \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCPAGESIZE : val		*/
-/*  Description: M2D Memory page size. Valid values are: 0 - 512B,1 - 1KB,2 - 2KB,3 - 4KB,4 - 1536B. */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__SHIFT                                           (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__WIDTH                                           (3)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__MASK                                            (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__RESET                                           (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__READ(reg_offset)                                \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__MODIFY(reg_offset, value)                       \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__SET(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCPAGESIZE__VAL__CLR(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCSIMPPATTERNNUMPAGES : val		*/
-/*  Description: Number of pages per line (simplified pattern has single line/pattern). */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__SHIFT                                (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__WIDTH                                (18)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__MASK                                 (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__RESET                                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__READ(reg_offset)                     \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__MODIFY(reg_offset, value)            \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__SET(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNNUMPAGES__VAL__CLR(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCSIMPPATTERNRESIDUEPAGESIZE : val		*/
-/*  Description: Residue page size in 8B granularity,minus one,per pattern. 0-8B,1-16B,...,511-4kB */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__SHIFT                         (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__WIDTH                         (9)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__MASK                          (0x000001FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__RESET                         (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__READ(reg_offset)              \
-			(((uint32_t)(reg_offset) & 0x000001FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__MODIFY(reg_offset, value)     \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL) | (((uint32_t)(value) << 0) & 0x000001FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__SET(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL) | 0x000001FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCSIMPPATTERNRESIDUEPAGESIZE__VAL__CLR(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x000001FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCBANKINTERLEAVEMODE : val		*/
-/*  Description: Select the bank interleave mode: 2'd0 - interleave 8 banks (default),2'd1 - Interleave 4 banks,2'd2 - Interleave 2 banks,2'd3 - no interleave. */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__SHIFT                                 (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__WIDTH                                 (2)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__MASK                                  (0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__RESET                                 (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__READ(reg_offset)                      \
-			(((uint32_t)(reg_offset) & 0x00000003L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__MODIFY(reg_offset, value)             \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | (((uint32_t)(value) << 0) & 0x00000003L))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__SET(reg_offset)                       \
-			(reg_offset) = (((reg_offset) & ~0x00000003L) | 0x00000003L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCBANKINTERLEAVEMODE__VAL__CLR(reg_offset)                       \
-			(reg_offset) = (((reg_offset) & ~0x00000003L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCDESCPERIPHINTERRUPT : val		*/
-/*  Description: When > 0 the QSMC will interrupt the peripheral every read QsmcDescPeriphInterrupt descriptors. */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__SHIFT                                (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__WIDTH                                (18)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__MASK                                 (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__RESET                                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__READ(reg_offset)                     \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__MODIFY(reg_offset, value)            \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | (((uint32_t)(value) << 0) & 0x0003FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__SET(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL) | 0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCDESCPERIPHINTERRUPT__VAL__CLR(reg_offset)                      \
-			(reg_offset) = (((reg_offset) & ~0x0003FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMCCCBFREEINDEX : val		*/
-/*  Description: Used by the peripheral to indicates how many data is ready in the CCB for write (process). This is the CcbIndex (free pointer in CCB). */
-#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__SHIFT                                       (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__WIDTH                                       (18)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__MASK                                        (0x0003FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__RESET                                       (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMCCCBFREEINDEX__VAL__READ(reg_offset)                            \
-			(((uint32_t)(reg_offset) & 0x0003FFFFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_CS_INTR_MASK : val		*/
-/*  Description: INT register bits[15:0] per M2D channel,indicating one of the following events: a. Internal desc - QSMC processed last CCB descriptor. Implemented by set the interrupt when CCB-free-index is wrapped (become zero),might be used for CONF channel - to indicates conf is done. bits[31:16] per D2M channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescCsInterrupt (OR) External desc - domain#0 (local) source/destination event. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__SHIFT                                    (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__WIDTH                                    (32)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__MASK                                     (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__RESET                                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__READ(reg_offset)                         \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__MODIFY(reg_offset, value)                \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__SET(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_MASK__VAL__CLR(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_CS_INTR_STATUS : val		*/
-/*  Description: INT register bits[15:0] per M2D channel,indicating one of the following events: a. Internal desc - QSMC processed last CCB descriptor. Implemented by set the interrupt when CCB-free-index is wrapped (become zero),might be used for CONF channel - to indicates conf is done. bits[31:16] per D2M channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescCsInterrupt (OR) External desc - domain#0 (local) source/destination event. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__SHIFT                                  (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__WIDTH                                  (32)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__MASK                                   (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__RESET                                  (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_STATUS__VAL__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_CS_INTR_W1C : val		*/
-/*  Description: INT register bits[15:0] per M2D channel,indicating one of the following events: a. Internal desc - QSMC processed last CCB descriptor. Implemented by set the interrupt when CCB-free-index is wrapped (become zero),might be used for CONF channel - to indicates conf is done. bits[31:16] per D2M channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescCsInterrupt (OR) External desc - domain#0 (local) source/destination event. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__WIDTH                                     (32)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__MASK                                      (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1C__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_CS_INTR_W1S : val		*/
-/*  Description: INT register bits[15:0] per M2D channel,indicating one of the following events: a. Internal desc - QSMC processed last CCB descriptor. Implemented by set the interrupt when CCB-free-index is wrapped (become zero),might be used for CONF channel - to indicates conf is done. bits[31:16] per D2M channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescCsInterrupt (OR) External desc - domain#0 (local) source/destination event. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__WIDTH                                     (32)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__MASK                                      (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_CS_INTR_W1S__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_AP_INTR_MASK : val		*/
-/*  Description: INT register bit per direction/channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescPeriphInterrupt (D2M enhanced channels only) (OR) Internal desc - QSMC processed descriptors per QsmcDescPeriphInterrupt (M2D enhanced channels only) (OR) External desc - domain#1 (host) source/destination event */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__SHIFT                                    (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__WIDTH                                    (32)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__MASK                                     (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__RESET                                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__READ(reg_offset)                         \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__MODIFY(reg_offset, value)                \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__SET(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_MASK__VAL__CLR(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_AP_INTR_STATUS : val		*/
-/*  Description: INT register bit per direction/channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescPeriphInterrupt (D2M enhanced channels only) (OR) Internal desc - QSMC processed descriptors per QsmcDescPeriphInterrupt (M2D enhanced channels only) (OR) External desc - domain#1 (host) source/destination event */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__SHIFT                                  (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__WIDTH                                  (32)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__MASK                                   (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__RESET                                  (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_STATUS__VAL__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_AP_INTR_W1C : val		*/
-/*  Description: INT register bit per direction/channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescPeriphInterrupt (D2M enhanced channels only) (OR) Internal desc - QSMC processed descriptors per QsmcDescPeriphInterrupt (M2D enhanced channels only) (OR) External desc - domain#1 (host) source/destination event */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__WIDTH                                     (32)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__MASK                                      (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1C__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_AP_INTR_W1S : val		*/
-/*  Description: INT register bit per direction/channel indicating one of the following events: Internal desc - QDMC processed descriptors per QdmcDescPeriphInterrupt (D2M enhanced channels only) (OR) Internal desc - QSMC processed descriptors per QsmcDescPeriphInterrupt (M2D enhanced channels only) (OR) External desc - domain#1 (host) source/destination event */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__WIDTH                                     (32)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__MASK                                      (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_AP_INTR_W1S__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_DSP_INTR_MASK : val		*/
-/*  Description: INT register */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__SHIFT                                   (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__WIDTH                                   (8)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__MASK                                    (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__RESET                                   (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__READ(reg_offset)                        \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__MODIFY(reg_offset, value)               \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__SET(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_MASK__VAL__CLR(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_DSP_INTR_STATUS : val		*/
-/*  Description: INT register */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__SHIFT                                 (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__WIDTH                                 (8)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__MASK                                  (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__RESET                                 (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_STATUS__VAL__READ(reg_offset)                      \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_DSP_INTR_W1C : val		*/
-/*  Description: INT register */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__SHIFT                                    (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__WIDTH                                    (8)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__MASK                                     (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__RESET                                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__READ(reg_offset)                         \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__MODIFY(reg_offset, value)                \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__SET(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1C__VAL__CLR(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_DSP_INTR_W1S : val		*/
-/*  Description: INT register */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__SHIFT                                    (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__WIDTH                                    (8)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__MASK                                     (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__RESET                                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__READ(reg_offset)                         \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__MODIFY(reg_offset, value)                \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__SET(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_DSP_INTR_W1S__VAL__CLR(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_ERR_INTR_MASK : desc_err		*/
-/*  Description: Summary of desc_err_intr register. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__SHIFT                              (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__WIDTH                              (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__MASK                               (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__RESET                              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__READ(reg_offset)                   \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__MODIFY(reg_offset, value)          \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__SET(reg_offset)                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__DESC_ERR__CLR(reg_offset)                    \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*  ENGINE_ERR_INTR_MASK : qddc_crd_ovf_err		*/
-/*  Description: Summary of qddc_crd_ovf_err_intr register. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__SHIFT                      (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__WIDTH                      (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__MASK                       (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__RESET                      (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__READ(reg_offset)           \
-			(((uint32_t)(reg_offset) & 0x00000002L) >> 1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__MODIFY(reg_offset, value)  \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__SET(reg_offset)            \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QDDC_CRD_OVF_ERR__CLR(reg_offset)            \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1))
-
-/*  ENGINE_ERR_INTR_MASK : qsdc_crd_ovf_err		*/
-/*  Description: Summary of qsdc_crd_ovf_err_intr register. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__SHIFT                      (2)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__WIDTH                      (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__MASK                       (0x00000004L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__RESET                      (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__READ(reg_offset)           \
-			(((uint32_t)(reg_offset) & 0x00000004L) >> 2)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__MODIFY(reg_offset, value)  \
-			(reg_offset) = (((reg_offset) & ~0x00000004L) | (((uint32_t)(value) << 2) & 0x00000004L))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__SET(reg_offset)            \
-			(reg_offset) = (((reg_offset) & ~0x00000004L) | ((uint32_t)(1) << 2))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_MASK__QSDC_CRD_OVF_ERR__CLR(reg_offset)            \
-			(reg_offset) = (((reg_offset) & ~0x00000004L) | ((uint32_t)(0) << 2))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_ERR_INTR_STATUS : desc_err		*/
-/*  Description: Summary of desc_err_intr register. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__WIDTH                            (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__MASK                             (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__DESC_ERR__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-
-/*  ENGINE_ERR_INTR_STATUS : qddc_crd_ovf_err		*/
-/*  Description: Summary of qddc_crd_ovf_err_intr register. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__SHIFT                    (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__WIDTH                    (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__MASK                     (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__RESET                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QDDC_CRD_OVF_ERR__READ(reg_offset)         \
-			(((uint32_t)(reg_offset) & 0x00000002L) >> 1)
-
-/*  ENGINE_ERR_INTR_STATUS : qsdc_crd_ovf_err		*/
-/*  Description: Summary of qsdc_crd_ovf_err_intr register. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__SHIFT                    (2)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__WIDTH                    (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__MASK                     (0x00000004L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__RESET                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ERR_INTR_STATUS__QSDC_CRD_OVF_ERR__READ(reg_offset)         \
-			(((uint32_t)(reg_offset) & 0x00000004L) >> 2)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  DESC_ERR_INTR_MASK : DescStatus		*/
-/*  Description: Interrupt bit per DESC_STATUS fields of vDMA descriptor which returned unexpected value (Note that successful descriptor returns status of 8'h1). Refer to EngErrInterruptSource register for the error origin. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__SHIFT                              (0)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__WIDTH                              (8)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__MASK                               (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__RESET                              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__READ(reg_offset)                   \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__MODIFY(reg_offset, value)          \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__SET(reg_offset)                    \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DESCSTATUS__CLR(reg_offset)                    \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*  DESC_ERR_INTR_MASK : RemainPageSize		*/
-/*  Description: non-zero REMAINING_PAGE_SIZE. Refer to EngErrInterruptSource register for the error origin. Refer to EngErrRemainPageSize register for the returned value. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__SHIFT                          (8)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__WIDTH                          (1)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__MASK                           (0x00000100L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__RESET                          (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__READ(reg_offset)               \
-			(((uint32_t)(reg_offset) & 0x00000100L) >> 8)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__MODIFY(reg_offset, value)      \
-			(reg_offset) = (((reg_offset) & ~0x00000100L) | (((uint32_t)(value) << 8) & 0x00000100L))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__SET(reg_offset)                \
-			(reg_offset) = (((reg_offset) & ~0x00000100L) | ((uint32_t)(1) << 8))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__REMAINPAGESIZE__CLR(reg_offset)                \
-			(reg_offset) = (((reg_offset) & ~0x00000100L) | ((uint32_t)(0) << 8))
-
-/*  DESC_ERR_INTR_MASK : SrcDescWdataPar		*/
-/*  Description: Source descriptor complete with error status. Refer to EngErrInterruptSource register for the error origin. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__SHIFT                         (9)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__WIDTH                         (1)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__MASK                          (0x00000200L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__RESET                         (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__READ(reg_offset)              \
-			(((uint32_t)(reg_offset) & 0x00000200L) >> 9)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__MODIFY(reg_offset, value)     \
-			(reg_offset) = (((reg_offset) & ~0x00000200L) | (((uint32_t)(value) << 9) & 0x00000200L))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__SET(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x00000200L) | ((uint32_t)(1) << 9))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__SRCDESCWDATAPAR__CLR(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x00000200L) | ((uint32_t)(0) << 9))
-
-/*  DESC_ERR_INTR_MASK : DstDescWdataPar		*/
-/*  Description: Destination descriptor complete with error status. Refer to EngErrInterruptSource register for the error origin. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__SHIFT                         (10)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__WIDTH                         (1)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__MASK                          (0x00000400L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__RESET                         (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__READ(reg_offset)              \
-			(((uint32_t)(reg_offset) & 0x00000400L) >> 10)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__MODIFY(reg_offset, value)     \
-			(reg_offset) = (((reg_offset) & ~0x00000400L) | (((uint32_t)(value) << 10) & 0x00000400L))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__SET(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x00000400L) | ((uint32_t)(1) << 10))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_MASK__DSTDESCWDATAPAR__CLR(reg_offset)               \
-			(reg_offset) = (((reg_offset) & ~0x00000400L) | ((uint32_t)(0) << 10))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  DESC_ERR_INTR_STATUS : DescStatus		*/
-/*  Description: Interrupt bit per DESC_STATUS fields of vDMA descriptor which returned unexpected value (Note that successful descriptor returns status of 8'h1). Refer to EngErrInterruptSource register for the error origin. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__WIDTH                            (8)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__MASK                             (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DESCSTATUS__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-
-/*  DESC_ERR_INTR_STATUS : RemainPageSize		*/
-/*  Description: non-zero REMAINING_PAGE_SIZE. Refer to EngErrInterruptSource register for the error origin. Refer to EngErrRemainPageSize register for the returned value. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__SHIFT                        (8)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__WIDTH                        (1)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__MASK                         (0x00000100L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__RESET                        (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__REMAINPAGESIZE__READ(reg_offset)             \
-			(((uint32_t)(reg_offset) & 0x00000100L) >> 8)
-
-/*  DESC_ERR_INTR_STATUS : SrcDescWdataPar		*/
-/*  Description: Source descriptor complete with error status. Refer to EngErrInterruptSource register for the error origin. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__SHIFT                       (9)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__WIDTH                       (1)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__MASK                        (0x00000200L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__RESET                       (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__SRCDESCWDATAPAR__READ(reg_offset)            \
-			(((uint32_t)(reg_offset) & 0x00000200L) >> 9)
-
-/*  DESC_ERR_INTR_STATUS : DstDescWdataPar		*/
-/*  Description: Destination descriptor complete with error status. Refer to EngErrInterruptSource register for the error origin. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__SHIFT                       (10)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__WIDTH                       (1)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__MASK                        (0x00000400L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__RESET                       (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_STATUS__DSTDESCWDATAPAR__READ(reg_offset)            \
-			(((uint32_t)(reg_offset) & 0x00000400L) >> 10)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  DESC_ERR_INTR_W1C : DescStatus		*/
-/*  Description: Interrupt bit per DESC_STATUS fields of vDMA descriptor which returned unexpected value (Note that successful descriptor returns status of 8'h1). Refer to EngErrInterruptSource register for the error origin. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__SHIFT                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__WIDTH                               (8)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__MASK                                (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__RESET                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__READ(reg_offset)                    \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__MODIFY(reg_offset, value)           \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__SET(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1C__DESCSTATUS__CLR(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  DESC_ERR_INTR_W1S : DescStatus		*/
-/*  Description: Interrupt bit per DESC_STATUS fields of vDMA descriptor which returned unexpected value (Note that successful descriptor returns status of 8'h1). Refer to EngErrInterruptSource register for the error origin. */
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__SHIFT                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__WIDTH                               (8)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__MASK                                (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__RESET                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__READ(reg_offset)                    \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__MODIFY(reg_offset, value)           \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__SET(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__DESC_ERR_INTR_W1S__DESCSTATUS__CLR(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_CRD_OVF_ERR_INTR_MASK : ch		*/
-/*  Description: Interrupt bit per QDDC channel indicating overflow or underflow in Core credit counter. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__SHIFT                              (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__WIDTH                              (16)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__MASK                               (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__RESET                              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__READ(reg_offset)                   \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__MODIFY(reg_offset, value)          \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__SET(reg_offset)                    \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_MASK__CH__CLR(reg_offset)                    \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_CRD_OVF_ERR_INTR_STATUS : ch		*/
-/*  Description: Interrupt bit per QDDC channel indicating overflow or underflow in Core credit counter. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__WIDTH                            (16)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__MASK                             (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_STATUS__CH__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_CRD_OVF_ERR_INTR_W1C : ch		*/
-/*  Description: Interrupt bit per QDDC channel indicating overflow or underflow in Core credit counter. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__SHIFT                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__WIDTH                               (16)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__MASK                                (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__RESET                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__READ(reg_offset)                    \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__MODIFY(reg_offset, value)           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__SET(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1C__CH__CLR(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_CRD_OVF_ERR_INTR_W1S : ch		*/
-/*  Description: Interrupt bit per QDDC channel indicating overflow or underflow in Core credit counter. */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__SHIFT                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__WIDTH                               (16)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__MASK                                (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__RESET                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__READ(reg_offset)                    \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__MODIFY(reg_offset, value)           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__SET(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CRD_OVF_ERR_INTR_W1S__CH__CLR(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_CRD_OVF_ERR_INTR_MASK : ch		*/
-/*  Description: Interrupt bit per QSDC channel indicating overflow or underflow in Core credit counter. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__SHIFT                              (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__WIDTH                              (16)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__MASK                               (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__RESET                              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__READ(reg_offset)                   \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__MODIFY(reg_offset, value)          \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__SET(reg_offset)                    \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_MASK__CH__CLR(reg_offset)                    \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_CRD_OVF_ERR_INTR_STATUS : ch		*/
-/*  Description: Interrupt bit per QSDC channel indicating overflow or underflow in Core credit counter. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__WIDTH                            (16)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__MASK                             (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_STATUS__CH__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_CRD_OVF_ERR_INTR_W1C : ch		*/
-/*  Description: Interrupt bit per QSDC channel indicating overflow or underflow in Core credit counter. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__SHIFT                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__WIDTH                               (16)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__MASK                                (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__RESET                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__READ(reg_offset)                    \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__MODIFY(reg_offset, value)           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__SET(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1C__CH__CLR(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_CRD_OVF_ERR_INTR_W1S : ch		*/
-/*  Description: Interrupt bit per QSDC channel indicating overflow or underflow in Core credit counter. */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__SHIFT                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__WIDTH                               (16)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__MASK                                (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__RESET                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__READ(reg_offset)                    \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__MODIFY(reg_offset, value)           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__SET(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CRD_OVF_ERR_INTR_W1S__CH__CLR(reg_offset)                     \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGERRINTERRUPTSOURCE : ChannelID		*/
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__WIDTH                            (4)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__MASK                             (0x0000000FL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__CHANNELID__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x0000000FL) >> 0)
-
-/*  ENGERRINTERRUPTSOURCE : Direction		*/
-/*  Description: 0 - Destination. 1 - Source. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__SHIFT                            (4)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__WIDTH                            (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__MASK                             (0x00000010L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DIRECTION__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x00000010L) >> 4)
-
-/*  ENGERRINTERRUPTSOURCE : Domain		*/
-/*  Description: 0 - Device. 1 - Memory. */
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__SHIFT                               (5)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__WIDTH                               (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__MASK                                (0x00000020L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__RESET                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRINTERRUPTSOURCE__DOMAIN__READ(reg_offset)                    \
-			(((uint32_t)(reg_offset) & 0x00000020L) >> 5)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGERRREMAINPAGESIZE : val		*/
-/*  Description: In case of non-zero REMAINING_PAGE_SIZE this register holds the latched value until cleared by writing to this register */
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__SHIFT                                   (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__WIDTH                                   (24)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__MASK                                    (0x00FFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__RESET                                   (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGERRREMAINPAGESIZE__VAL__READ(reg_offset)                        \
-			(((uint32_t)(reg_offset) & 0x00FFFFFFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGTRANSFERPAGESIZE : size		*/
-/*  Description: TRANSFERRED_PAGE_SIZE value of last descriptor write to QDMC */
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__SHIFT                                   (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__WIDTH                                   (24)
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__MASK                                    (0x00FFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__RESET                                   (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__SIZE__READ(reg_offset)                        \
-			(((uint32_t)(reg_offset) & 0x00FFFFFFL) >> 0)
-
-/*  ENGTRANSFERPAGESIZE : ch_id		*/
-/*  Description: QDMC Channel ID */
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__SHIFT                                  (24)
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__WIDTH                                  (4)
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__MASK                                   (0x0F000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__RESET                                  (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGTRANSFERPAGESIZE__CH_ID__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x0F000000L) >> 24)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  VDMASOFTRESET : val		*/
-/*  Description: Apply soft reset to vDMA. Must be cleared in order to release vDMA from soft reset. */
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__SHIFT                                          (0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__WIDTH                                          (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__MASK                                           (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__RESET                                          (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__READ(reg_offset)                               \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__MODIFY(reg_offset, value)                      \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__SET(reg_offset)                                \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__VAL__CLR(reg_offset)                                \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*  VDMASOFTRESET : par		*/
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__SHIFT                                          (31)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__WIDTH                                          (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__MASK                                           (0x80000000L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__RESET                                          (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__READ(reg_offset)                               \
-			(((uint32_t)(reg_offset) & 0x80000000L) >> 31)
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__MODIFY(reg_offset, value)                      \
-			(reg_offset) = (((reg_offset) & ~0x80000000L) | (((uint32_t)(value) << 31) & 0x80000000L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__SET(reg_offset)                                \
-			(reg_offset) = (((reg_offset) & ~0x80000000L) | ((uint32_t)(1) << 31))
-#define DRAM_DMA_ENGINE_CONFIG__VDMASOFTRESET__PAR__CLR(reg_offset)                                \
-			(reg_offset) = (((reg_offset) & ~0x80000000L) | ((uint32_t)(0) << 31))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  VDMA_SHAREDBUS : cs_mask		*/
-/*  Description: Bit mask on vDMA Sharedbus interrupt source for CS */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__WIDTH                                     (4)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__MASK                                      (0x0000000FL)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__RESET                                     (0x0000000AL)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x0000000FL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x0000000FL) | (((uint32_t)(value) << 0) & 0x0000000FL))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000000FL) | 0x0000000FL)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__CS_MASK__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000000FL))
-
-/*  VDMA_SHAREDBUS : ap_mask		*/
-/*  Description: Bit mask on vDMA Sharedbus interrupt source for AP */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__SHIFT                                     (4)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__WIDTH                                     (4)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__MASK                                      (0x000000F0L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__RESET                                     (0x00000050L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x000000F0L) >> 4)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x000000F0L) | (((uint32_t)(value) << 4) & 0x000000F0L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x000000F0L) | 0x000000F0L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SHAREDBUS__AP_MASK__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x000000F0L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_QDDC_REDUNDANT_EN : val		*/
-/*  Description: Redundancy mode enable bit per QM pair. bit i makes QM[i*2+1] a redundancy for QM[i*2] */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__SHIFT                                  (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__WIDTH                                  (8)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__MASK                                   (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__RESET                                  (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__MODIFY(reg_offset, value)              \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__SET(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDDC_REDUNDANT_EN__VAL__CLR(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_QSDC_REDUNDANT_EN : val		*/
-/*  Description: Redundancy mode enable bit per QM pair. bit i makes QM[i*2+1] a redundancy for QM[i*2] */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__SHIFT                                  (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__WIDTH                                  (8)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__MASK                                   (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__RESET                                  (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__MODIFY(reg_offset, value)              \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__SET(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSDC_REDUNDANT_EN__VAL__CLR(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_QDMC_REDUNDANT_EN : val		*/
-/*  Description: Redundancy mode enable bit per QM pair. bit i makes QM[i*2+1] a redundancy for QM[i*2] */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__SHIFT                                  (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__WIDTH                                  (8)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__MASK                                   (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__RESET                                  (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__MODIFY(reg_offset, value)              \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__SET(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QDMC_REDUNDANT_EN__VAL__CLR(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_QSMC_REDUNDANT_EN : val		*/
-/*  Description: Redundancy mode enable bit per QM pair. bit i makes QM[i*2+1] a redundancy for QM[i*2] */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__SHIFT                                  (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__WIDTH                                  (8)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__MASK                                   (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__RESET                                  (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__MODIFY(reg_offset, value)              \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__SET(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_QSMC_REDUNDANT_EN__VAL__CLR(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_REDUNDANT_ASF_INT_MASK : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__WIDTH                            (8)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__MASK                             (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__MODIFY(reg_offset, value)        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__SET(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_MASK__VAL__CLR(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_REDUNDANT_ASF_INT_STATUS : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__SHIFT                          (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__WIDTH                          (8)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__MASK                           (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__RESET                          (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_STATUS__VAL__READ(reg_offset)               \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_REDUNDANT_ASF_INT_W1C : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__WIDTH                             (8)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__MASK                              (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1C__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_REDUNDANT_ASF_INT_W1S : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__WIDTH                             (8)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__MASK                              (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_REDUNDANT_ASF_INT_W1S__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_REDUNDANT_ASF_INT_MASK : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__WIDTH                            (8)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__MASK                             (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__MODIFY(reg_offset, value)        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__SET(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_MASK__VAL__CLR(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_REDUNDANT_ASF_INT_STATUS : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__SHIFT                          (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__WIDTH                          (8)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__MASK                           (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__RESET                          (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_STATUS__VAL__READ(reg_offset)               \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_REDUNDANT_ASF_INT_W1C : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__WIDTH                             (8)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__MASK                              (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1C__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_REDUNDANT_ASF_INT_W1S : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__WIDTH                             (8)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__MASK                              (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_REDUNDANT_ASF_INT_W1S__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMC_REDUNDANT_ASF_INT_MASK : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__WIDTH                            (8)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__MASK                             (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__MODIFY(reg_offset, value)        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__SET(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_MASK__VAL__CLR(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMC_REDUNDANT_ASF_INT_STATUS : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__SHIFT                          (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__WIDTH                          (8)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__MASK                           (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__RESET                          (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_STATUS__VAL__READ(reg_offset)               \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMC_REDUNDANT_ASF_INT_W1C : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__WIDTH                             (8)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__MASK                              (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1C__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMC_REDUNDANT_ASF_INT_W1S : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__WIDTH                             (8)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__MASK                              (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_REDUNDANT_ASF_INT_W1S__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMC_REDUNDANT_ASF_INT_MASK : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__SHIFT                            (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__WIDTH                            (8)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__MASK                             (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__MODIFY(reg_offset, value)        \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__SET(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_MASK__VAL__CLR(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMC_REDUNDANT_ASF_INT_STATUS : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__SHIFT                          (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__WIDTH                          (8)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__MASK                           (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__RESET                          (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_STATUS__VAL__READ(reg_offset)               \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMC_REDUNDANT_ASF_INT_W1C : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__WIDTH                             (8)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__MASK                              (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1C__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMC_REDUNDANT_ASF_INT_W1S : val		*/
-/*  Description: Redundancy mode compare mismatch for QM pair i */
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__WIDTH                             (8)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__MASK                              (0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x000000FFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | (((uint32_t)(value) << 0) & 0x000000FFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL) | 0x000000FFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_REDUNDANT_ASF_INT_W1S__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x000000FFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  PRIOISLP : val		*/
-/*  Description: Indicates channel priority is low priority. */
-#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__SHIFT                                               (0)
-#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__WIDTH                                               (32)
-#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__MASK                                                (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__RESET                                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__READ(reg_offset)                                    \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__MODIFY(reg_offset, value)                           \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | (((uint32_t)(value) << 0) & 0xFFFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__SET(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL) | 0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__PRIOISLP__VAL__CLR(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0xFFFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  READLPTOQOSVALUE : val		*/
-/*  Description: The QOS toward DDR-AXI master for low priority read. */
-#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__SHIFT                                       (0)
-#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__WIDTH                                       (3)
-#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__MASK                                        (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__RESET                                       (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__READ(reg_offset)                            \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__MODIFY(reg_offset, value)                   \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__SET(reg_offset)                             \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__READLPTOQOSVALUE__VAL__CLR(reg_offset)                             \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  READHPTOQOSVALUE : val		*/
-/*  Description: The QOS toward DDR-AXI master for high priority read. */
-#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__SHIFT                                       (0)
-#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__WIDTH                                       (3)
-#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__MASK                                        (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__RESET                                       (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__READ(reg_offset)                            \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__MODIFY(reg_offset, value)                   \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__SET(reg_offset)                             \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__READHPTOQOSVALUE__VAL__CLR(reg_offset)                             \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  WRITELPTOQOSVALUE : val		*/
-/*  Description: The QOS toward DDR-AXI master for low priority write. */
-#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__SHIFT                                      (0)
-#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__WIDTH                                      (3)
-#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__MASK                                       (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__RESET                                      (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__READ(reg_offset)                           \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__MODIFY(reg_offset, value)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__SET(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__WRITELPTOQOSVALUE__VAL__CLR(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  WRITEHPTOQOSVALUE : val		*/
-/*  Description: The QOS toward DDR-AXI master for high priority write. */
-#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__SHIFT                                      (0)
-#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__WIDTH                                      (3)
-#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__MASK                                       (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__RESET                                      (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__READ(reg_offset)                           \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__MODIFY(reg_offset, value)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__SET(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__WRITEHPTOQOSVALUE__VAL__CLR(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  DESCREADQOSVALUE : val		*/
-/*  Description: The QOS toward DDR-desc-AXI master for read. */
-#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__SHIFT                                       (0)
-#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__WIDTH                                       (3)
-#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__MASK                                        (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__RESET                                       (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__READ(reg_offset)                            \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__MODIFY(reg_offset, value)                   \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__SET(reg_offset)                             \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__DESCREADQOSVALUE__VAL__CLR(reg_offset)                             \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  DESCWRITEQOSVALUE : val		*/
-/*  Description: The QOS toward DDR-desc-AXI master for write. */
-#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__SHIFT                                      (0)
-#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__WIDTH                                      (3)
-#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__MASK                                       (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__RESET                                      (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__READ(reg_offset)                           \
-			(((uint32_t)(reg_offset) & 0x00000007L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__MODIFY(reg_offset, value)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | (((uint32_t)(value) << 0) & 0x00000007L))
-#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__SET(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000007L) | 0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__DESCWRITEQOSVALUE__VAL__CLR(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000007L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  VDMA_ARB : prio_en		*/
-/*  Description: Enable 2 level priority based channel arbitration in vDMA */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__SHIFT                                           (0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__WIDTH                                           (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__MASK                                            (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__RESET                                           (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__READ(reg_offset)                                \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__MODIFY(reg_offset, value)                       \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__SET(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PRIO_EN__CLR(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*  VDMA_ARB : interleave_en		*/
-/*  Description: Enable arbitration order to interleave between M2D and D2M channels */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__SHIFT                                     (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__WIDTH                                     (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__MASK                                      (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__RESET                                     (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x00000002L) >> 1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__INTERLEAVE_EN__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1))
-
-/*  VDMA_ARB : par		*/
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__SHIFT                                               (31)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__WIDTH                                               (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__MASK                                                (0x80000000L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__RESET                                               (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__READ(reg_offset)                                    \
-			(((uint32_t)(reg_offset) & 0x80000000L) >> 31)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__MODIFY(reg_offset, value)                           \
-			(reg_offset) = (((reg_offset) & ~0x80000000L) | (((uint32_t)(value) << 31) & 0x80000000L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__SET(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0x80000000L) | ((uint32_t)(1) << 31))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_ARB__PAR__CLR(reg_offset)                                     \
-			(reg_offset) = (((reg_offset) & ~0x80000000L) | ((uint32_t)(0) << 31))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QM_CFG_CG_DELAY : val		*/
-/*  Description: Clock cycles to keep clock running after enable condition is met */
-#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__SHIFT                                        (0)
-#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__WIDTH                                        (4)
-#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__MASK                                         (0x0000000FL)
-#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__RESET                                        (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__READ(reg_offset)                             \
-			(((uint32_t)(reg_offset) & 0x0000000FL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__MODIFY(reg_offset, value)                    \
-			(reg_offset) = (((reg_offset) & ~0x0000000FL) | (((uint32_t)(value) << 0) & 0x0000000FL))
-#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__SET(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x0000000FL) | 0x0000000FL)
-#define DRAM_DMA_ENGINE_CONFIG__QM_CFG_CG_DELAY__VAL__CLR(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x0000000FL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDDC_CFG_CG_BYPASS : val		*/
-/*  Description: Bypass QDDC CG */
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__WIDTH                                     (16)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__MASK                                      (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDDC_CFG_CG_BYPASS__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSDC_CFG_CG_BYPASS : val		*/
-/*  Description: Bypass QSDC CG */
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__WIDTH                                     (16)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__MASK                                      (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSDC_CFG_CG_BYPASS__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QDMC_CFG_CG_BYPASS : val		*/
-/*  Description: Bypass QDMC CG */
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__WIDTH                                     (16)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__MASK                                      (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QDMC_CFG_CG_BYPASS__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  QSMC_CFG_CG_BYPASS : val		*/
-/*  Description: Bypass QSMC CG */
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__WIDTH                                     (16)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__MASK                                      (0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x0000FFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | (((uint32_t)(value) << 0) & 0x0000FFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL) | 0x0000FFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__QSMC_CFG_CG_BYPASS__VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x0000FFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_ASF_INT_MASK : parity_error_in_regfile		*/
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__SHIFT                (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__WIDTH                (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__MASK                 (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__RESET                (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__READ(reg_offset)     \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__MODIFY(reg_offset, value) \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__SET(reg_offset)      \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_MASK__PARITY_ERROR_IN_REGFILE__CLR(reg_offset)      \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_ASF_INT_STATUS : parity_error_in_regfile		*/
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__SHIFT              (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__WIDTH              (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__MASK               (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__RESET              (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_STATUS__PARITY_ERROR_IN_REGFILE__READ(reg_offset)   \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_ASF_INT_W1C : parity_error_in_regfile		*/
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__SHIFT                 (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__WIDTH                 (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__MASK                  (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__RESET                 (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__READ(reg_offset)      \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__MODIFY(reg_offset, value) \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__SET(reg_offset)       \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1C__PARITY_ERROR_IN_REGFILE__CLR(reg_offset)       \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_ASF_INT_W1S : parity_error_in_regfile		*/
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__SHIFT                 (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__WIDTH                 (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__MASK                  (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__RESET                 (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__READ(reg_offset)      \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__MODIFY(reg_offset, value) \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__SET(reg_offset)       \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_ASF_INT_W1S__PARITY_ERROR_IN_REGFILE__CLR(reg_offset)       \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  ENGINE_RW_PARITY_BIST_MODE : val		*/
-/*  Description: write 1 if want to work in rw_parity bist mode in which the parity bit is written by APB wdata and not from HW calculation */
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__SHIFT                             (0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__WIDTH                             (1)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__MASK                              (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__RESET                             (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__READ(reg_offset)                  \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__MODIFY(reg_offset, value)         \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__SET(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__ENGINE_RW_PARITY_BIST_MODE__VAL__CLR(reg_offset)                   \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  VDMA_STOP_LP : dis		*/
-/*  Description: Write 1 if want to disable LP Stop feature */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__SHIFT                                           (0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__WIDTH                                           (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__MASK                                            (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__RESET                                           (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__READ(reg_offset)                                \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__MODIFY(reg_offset, value)                       \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__SET(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__DIS__CLR(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*  VDMA_STOP_LP : force_val		*/
-/*  Description: Force Stop LP state when feature is enabled */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__SHIFT                                     (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__WIDTH                                     (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__MASK                                      (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x00000002L) >> 1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_STOP_LP__FORCE_VAL__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  VDMA_SCH : stop_th		*/
-/*  Description: Stop scheduling for this many cycles after each successful allocation */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__SHIFT                                           (0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__WIDTH                                           (7)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__MASK                                            (0x0000007FL)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__RESET                                           (0x00000007L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__READ(reg_offset)                                \
-			(((uint32_t)(reg_offset) & 0x0000007FL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__MODIFY(reg_offset, value)                       \
-			(reg_offset) = (((reg_offset) & ~0x0000007FL) | (((uint32_t)(value) << 0) & 0x0000007FL))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__SET(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x0000007FL) | 0x0000007FL)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_TH__CLR(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x0000007FL))
-
-/*  VDMA_SCH : stop_en		*/
-/*  Description: Enable periodic scheduling stopping mechanism */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__SHIFT                                           (7)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__WIDTH                                           (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__MASK                                            (0x00000080L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__RESET                                           (0x00000080L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__READ(reg_offset)                                \
-			(((uint32_t)(reg_offset) & 0x00000080L) >> 7)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__MODIFY(reg_offset, value)                       \
-			(reg_offset) = (((reg_offset) & ~0x00000080L) | (((uint32_t)(value) << 7) & 0x00000080L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__SET(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x00000080L) | ((uint32_t)(1) << 7))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__STOP_EN__CLR(reg_offset)                                 \
-			(reg_offset) = (((reg_offset) & ~0x00000080L) | ((uint32_t)(0) << 7))
-
-/*  VDMA_SCH : tsf24_mode		*/
-/*  Description: Apply fix to increase maximum transfers to 24 */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__SHIFT                                        (8)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__WIDTH                                        (1)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__MASK                                         (0x00000100L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__RESET                                        (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__READ(reg_offset)                             \
-			(((uint32_t)(reg_offset) & 0x00000100L) >> 8)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__MODIFY(reg_offset, value)                    \
-			(reg_offset) = (((reg_offset) & ~0x00000100L) | (((uint32_t)(value) << 8) & 0x00000100L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__SET(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x00000100L) | ((uint32_t)(1) << 8))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF24_MODE__CLR(reg_offset)                              \
-			(reg_offset) = (((reg_offset) & ~0x00000100L) | ((uint32_t)(0) << 8))
-
-/*  VDMA_SCH : tsf_af_threshold		*/
-/*  Description: Almost Full at 13 allocated TSF (12+8=20). In tsf24_mode should be set to 12. */
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__SHIFT                                  (9)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__WIDTH                                  (5)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__MASK                                   (0x00003E00L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__RESET                                  (0x00002800L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x00003E00L) >> 9)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__MODIFY(reg_offset, value)              \
-			(reg_offset) = (((reg_offset) & ~0x00003E00L) | (((uint32_t)(value) << 9) & 0x00003E00L))
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__SET(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x00003E00L) | 0x00003E00L)
-#define DRAM_DMA_ENGINE_CONFIG__VDMA_SCH__TSF_AF_THRESHOLD__CLR(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x00003E00L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_SRC_DESC_TRACE : en		*/
-/*  Description: Enable tracing of descriptors read from Source QMs */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__SHIFT                                      (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__WIDTH                                      (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__MASK                                       (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__RESET                                      (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__READ(reg_offset)                           \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__MODIFY(reg_offset, value)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__SET(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__EN__CLR(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*  CFG_SRC_DESC_TRACE : stop_on_wrap		*/
-/*  Description: Stop when reaching end of tracing buffer */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__SHIFT                            (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__WIDTH                            (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__MASK                             (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x00000002L) >> 1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__MODIFY(reg_offset, value)        \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__SET(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__STOP_ON_WRAP__CLR(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1))
-
-/*  CFG_SRC_DESC_TRACE : mprot		*/
-/*  Description: AWPROT value */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__SHIFT                                   (2)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__WIDTH                                   (3)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__MASK                                    (0x0000001CL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__RESET                                   (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__READ(reg_offset)                        \
-			(((uint32_t)(reg_offset) & 0x0000001CL) >> 2)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__MODIFY(reg_offset, value)               \
-			(reg_offset) = (((reg_offset) & ~0x0000001CL) | (((uint32_t)(value) << 2) & 0x0000001CL))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__SET(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x0000001CL) | 0x0000001CL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MPROT__CLR(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x0000001CL))
-
-/*  CFG_SRC_DESC_TRACE : mcache		*/
-/*  Description: AWCACHE value */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__SHIFT                                  (5)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__WIDTH                                  (4)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__MASK                                   (0x000001E0L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__RESET                                  (0x00000020L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x000001E0L) >> 5)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__MODIFY(reg_offset, value)              \
-			(reg_offset) = (((reg_offset) & ~0x000001E0L) | (((uint32_t)(value) << 5) & 0x000001E0L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__SET(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000001E0L) | 0x000001E0L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__MCACHE__CLR(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000001E0L))
-
-/*  CFG_SRC_DESC_TRACE : buff_size_m1		*/
-/*  Description: Buffer size minus 1 in 16B descriptors */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__SHIFT                            (16)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__WIDTH                            (16)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__MASK                             (0xFFFF0000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0xFFFF0000L) >> 16)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__MODIFY(reg_offset, value)        \
-			(reg_offset) = (((reg_offset) & ~0xFFFF0000L) | (((uint32_t)(value) << 16) & 0xFFFF0000L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__SET(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0xFFFF0000L) | 0xFFFF0000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE__BUFF_SIZE_M1__CLR(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0xFFFF0000L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_SRC_DESC_TRACE_BASE_ADDR : base_addr		*/
-/*  Description: Buffer base address bits 34:4 aligned to 16B */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__SHIFT                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__WIDTH                     (31)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__MASK                      (0x7FFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__RESET                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__READ(reg_offset)          \
-			(((uint32_t)(reg_offset) & 0x7FFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__MODIFY(reg_offset, value) \
-			(reg_offset) = (((reg_offset) & ~0x7FFFFFFFL) | (((uint32_t)(value) << 0) & 0x7FFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__SET(reg_offset)           \
-			(reg_offset) = (((reg_offset) & ~0x7FFFFFFFL) | 0x7FFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_SRC_DESC_TRACE_BASE_ADDR__BASE_ADDR__CLR(reg_offset)           \
-			(reg_offset) = (((reg_offset) & ~0x7FFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_DST_DESC_TRACE : en		*/
-/*  Description: Enable tracing of descriptors read from Source QMs */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__SHIFT                                      (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__WIDTH                                      (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__MASK                                       (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__RESET                                      (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__READ(reg_offset)                           \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__MODIFY(reg_offset, value)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__SET(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__EN__CLR(reg_offset)                            \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*  CFG_DST_DESC_TRACE : stop_on_wrap		*/
-/*  Description: Stop when reaching end of tracing buffer */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__SHIFT                            (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__WIDTH                            (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__MASK                             (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0x00000002L) >> 1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__MODIFY(reg_offset, value)        \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__SET(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__STOP_ON_WRAP__CLR(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1))
-
-/*  CFG_DST_DESC_TRACE : mprot		*/
-/*  Description: AWPROT value */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__SHIFT                                   (2)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__WIDTH                                   (3)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__MASK                                    (0x0000001CL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__RESET                                   (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__READ(reg_offset)                        \
-			(((uint32_t)(reg_offset) & 0x0000001CL) >> 2)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__MODIFY(reg_offset, value)               \
-			(reg_offset) = (((reg_offset) & ~0x0000001CL) | (((uint32_t)(value) << 2) & 0x0000001CL))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__SET(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x0000001CL) | 0x0000001CL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MPROT__CLR(reg_offset)                         \
-			(reg_offset) = (((reg_offset) & ~0x0000001CL))
-
-/*  CFG_DST_DESC_TRACE : mcache		*/
-/*  Description: AWCACHE value. MER-3804 ECO: Note that bit 3 is double booked for timeout ExtRef default value which needs to be 1. In case debug tracing is enabled */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__SHIFT                                  (5)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__WIDTH                                  (4)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__MASK                                   (0x000001E0L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__RESET                                  (0x00000120L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__READ(reg_offset)                       \
-			(((uint32_t)(reg_offset) & 0x000001E0L) >> 5)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__MODIFY(reg_offset, value)              \
-			(reg_offset) = (((reg_offset) & ~0x000001E0L) | (((uint32_t)(value) << 5) & 0x000001E0L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__SET(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000001E0L) | 0x000001E0L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__MCACHE__CLR(reg_offset)                        \
-			(reg_offset) = (((reg_offset) & ~0x000001E0L))
-
-/*  CFG_DST_DESC_TRACE : buff_size_m1		*/
-/*  Description: Buffer size minus 1 in 16B descriptors */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__SHIFT                            (16)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__WIDTH                            (16)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__MASK                             (0xFFFF0000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__RESET                            (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__READ(reg_offset)                 \
-			(((uint32_t)(reg_offset) & 0xFFFF0000L) >> 16)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__MODIFY(reg_offset, value)        \
-			(reg_offset) = (((reg_offset) & ~0xFFFF0000L) | (((uint32_t)(value) << 16) & 0xFFFF0000L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__SET(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0xFFFF0000L) | 0xFFFF0000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE__BUFF_SIZE_M1__CLR(reg_offset)                  \
-			(reg_offset) = (((reg_offset) & ~0xFFFF0000L))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_DST_DESC_TRACE_BASE_ADDR : base_addr		*/
-/*  Description: Buffer base address bits 34:4 aligned to 16B. MER-3804 ECO: Note that bits 17:16 are double booked for timeout ExtRef mux. In case debug tracing and ExtRef are required to be turned on this constrain the base address bits 17:16 to be the same as the timestamp mux */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__SHIFT                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__WIDTH                     (31)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__MASK                      (0x7FFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__RESET                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__READ(reg_offset)          \
-			(((uint32_t)(reg_offset) & 0x7FFFFFFFL) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__MODIFY(reg_offset, value) \
-			(reg_offset) = (((reg_offset) & ~0x7FFFFFFFL) | (((uint32_t)(value) << 0) & 0x7FFFFFFFL))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__SET(reg_offset)           \
-			(reg_offset) = (((reg_offset) & ~0x7FFFFFFFL) | 0x7FFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DST_DESC_TRACE_BASE_ADDR__BASE_ADDR__CLR(reg_offset)           \
-			(reg_offset) = (((reg_offset) & ~0x7FFFFFFFL))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  CFG_DEBUG_TIMESTAMP : en		*/
-/*  Description: Write 1 to enable timestamp counter for debug logic */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__SHIFT                                     (0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__WIDTH                                     (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__MASK                                      (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__RESET                                     (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__READ(reg_offset)                          \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__MODIFY(reg_offset, value)                 \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__SET(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__EN__CLR(reg_offset)                           \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-/*  CFG_DEBUG_TIMESTAMP : clr		*/
-/*  Description: Write 1 to clear timestamp counter. After writing 1 to this field need to write 0 immediately */
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__SHIFT                                    (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__WIDTH                                    (1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__MASK                                     (0x00000002L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__RESET                                    (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__READ(reg_offset)                         \
-			(((uint32_t)(reg_offset) & 0x00000002L) >> 1)
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__MODIFY(reg_offset, value)                \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | (((uint32_t)(value) << 1) & 0x00000002L))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__SET(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(1) << 1))
-#define DRAM_DMA_ENGINE_CONFIG__CFG_DEBUG_TIMESTAMP__CLR__CLR(reg_offset)                          \
-			(reg_offset) = (((reg_offset) & ~0x00000002L) | ((uint32_t)(0) << 1))
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  DEBUG_TIMESTAMP : val		*/
-#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__SHIFT                                        (0)
-#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__WIDTH                                        (32)
-#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__MASK                                         (0xFFFFFFFFL)
-#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__RESET                                        (0x00000000L)
-#define DRAM_DMA_ENGINE_CONFIG__DEBUG_TIMESTAMP__VAL__READ(reg_offset)                             \
-			(((uint32_t)(reg_offset) & 0xFFFFFFFFL) >> 0)
-
-/*----------------------------------------------------------------------------------------------------*/
-/*  AUTO_ADDRESS_ERR_CB_INDICATION : enable		*/
-/*  Description: default is 1, meaning the address error is enabled, to hide the address error indication, set to 0 */
-#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__SHIFT                      (0)
-#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__WIDTH                      (1)
-#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__MASK                       (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__RESET                      (0x00000001L)
-#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__READ(reg_offset)           \
-			(((uint32_t)(reg_offset) & 0x00000001L) >> 0)
-#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__MODIFY(reg_offset, value)  \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | (((uint32_t)(value) << 0) & 0x00000001L))
-#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__SET(reg_offset)            \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(1) << 0))
-#define DRAM_DMA_ENGINE_CONFIG__AUTO_ADDRESS_ERR_CB_INDICATION__ENABLE__CLR(reg_offset)            \
-			(reg_offset) = (((reg_offset) & ~0x00000001L) | ((uint32_t)(0) << 0))
-
-
-#endif /* DRAM_DMA_ENGINE_CONFIG_MACRO_H */
diff --git a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_regs.h b/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_regs.h
deleted file mode 100644
index 5c2c014b..00000000
--- a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_engine_config_regs.h
+++ /dev/null
@@ -1,143 +0,0 @@
-/*-------------------------------------------------------------------------------------
-//	Copyright (c) 2022 by Hailotech This model is the confidential and
-//	proprietary property of Hailotech and the possession or use of this
-//	file requires a written license from Hailotech.
--------------------------------------------------------------------------------------*/
-
-
-
-#include <stdint.h>
-
-#ifndef DRAM_DMA_ENGINE_CONFIG_REGS_H
-#define DRAM_DMA_ENGINE_CONFIG_REGS_H
-
-#include "dram_dma_package_macros.h"
-#include "dram_dma_engine_config_macros.h"
-
-typedef struct DRAM_DMA_ENGINE_CONFIG_regs_s  {
-	volatile uint32_t QddcEnable[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                      /* offset: 0x0 ; repeat: [16]       */
-	volatile uint32_t QddcReset[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                       /* offset: 0x40 ; repeat: [16]       */
-	volatile uint32_t QddcMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                        /* offset: 0x80 ; repeat: [16]       */
-	volatile uint32_t QddcAddBurstVal[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                 /* offset: 0xc0 ; repeat: [16]       */
-	volatile uint32_t QddcMaxDesc[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                     /* offset: 0x100 ; repeat: [16]       */
-	volatile uint32_t QddcShmifoId[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                    /* offset: 0x140 ; repeat: [16]       */
-	volatile uint32_t QddcShmifoCreditSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                            /* offset: 0x180 ; repeat: [16]       */
-	volatile uint32_t QddcShmifoInitCredit[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                            /* offset: 0x1c0 ; repeat: [16]       */
-	volatile uint32_t QsdcEnable[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                      /* offset: 0x200 ; repeat: [16]       */
-	volatile uint32_t QsdcReset[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                       /* offset: 0x240 ; repeat: [16]       */
-	volatile uint32_t QsdcMaxDesc[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                     /* offset: 0x280 ; repeat: [16]       */
-	volatile uint32_t QsdcShmifoId[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                    /* offset: 0x2c0 ; repeat: [16]       */
-	volatile uint32_t QsdcShmifoCreditSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                            /* offset: 0x300 ; repeat: [16]       */
-	volatile uint32_t QsdcFullNumPatterns[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN];                                       /* offset: 0x340 ; repeat: [4]        */
-	volatile uint32_t QsdcFullPatternNumLines[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN][DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS];/* offset: 0x350 ; repeat: [4, 4]     */
-	volatile uint32_t QsdcFullPatternNumPages[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN][DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS];/* offset: 0x390 ; repeat: [4, 4]     */
-	volatile uint32_t QsdcFullPatternPageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN][DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS];/* offset: 0x3d0 ; repeat: [4, 4]     */
-	volatile uint32_t QsdcFullPatternResiduePageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN][DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS];/* offset: 0x410 ; repeat: [4, 4]     */
-	volatile uint32_t QsdcSimpPatternNumPages[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SIMP_PATTERN];                                   /* offset: 0x450 ; repeat: [12]       */
-	volatile uint32_t QsdcSimpPatternPageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SIMP_PATTERN];                                   /* offset: 0x480 ; repeat: [12]       */
-	volatile uint32_t QsdcSimpPatternResiduePageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SIMP_PATTERN];                            /* offset: 0x4b0 ; repeat: [12]       */
-	volatile uint32_t QdmcEnable[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                      /* offset: 0x4e0 ; repeat: [16]       */
-	volatile uint32_t QdmcReset[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                       /* offset: 0x520 ; repeat: [16]       */
-	volatile uint32_t QdmcMemBaseAddr[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                 /* offset: 0x560 ; repeat: [16]       */
-	volatile uint32_t QdmcMemCcbSizeLog2[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_REGULAR_CH];                                          /* offset: 0x5a0 ; repeat: [12]       */
-	volatile uint32_t QdmcDescCsInterrupt[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                             /* offset: 0x5d0 ; repeat: [16]       */
-	volatile uint32_t QdmcBankInterleaveMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                          /* offset: 0x610 ; repeat: [16]       */
-	volatile uint32_t QdmcMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH];                                                   /* offset: 0x650 ; repeat: [4]        */
-	volatile uint32_t QdmcAddBurstVal[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH];                                            /* offset: 0x660 ; repeat: [4]        */
-	volatile uint32_t QdmcMemCcbSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH];                                             /* offset: 0x670 ; repeat: [4]        */
-	volatile uint32_t QdmcDescPeriphInterrupt[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH];                                    /* offset: 0x680 ; repeat: [4]        */
-	volatile uint32_t QdmcCcbProcessedIndex[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH];                                      /* offset: 0x690 ; repeat: [4]        */
-	volatile uint32_t QsmcEnable[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                      /* offset: 0x6a0 ; repeat: [16]       */
-	volatile uint32_t QsmcReset[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                       /* offset: 0x6e0 ; repeat: [16]       */
-	volatile uint32_t QsmcMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                        /* offset: 0x720 ; repeat: [16]       */
-	volatile uint32_t QsmcC2cSel[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                      /* offset: 0x760 ; repeat: [16]       */
-	volatile uint32_t QsmcAddBurstVal[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                 /* offset: 0x7a0 ; repeat: [16]       */
-	volatile uint32_t QsmcMemBaseAddr[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                 /* offset: 0x7e0 ; repeat: [16]       */
-	volatile uint32_t QsmcMemCcbSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                  /* offset: 0x820 ; repeat: [16]       */
-	volatile uint32_t QsmcPageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                                    /* offset: 0x860 ; repeat: [16]       */
-	volatile uint32_t QsmcSimpPatternNumPages[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                         /* offset: 0x8a0 ; repeat: [16]       */
-	volatile uint32_t QsmcSimpPatternResiduePageSize[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                  /* offset: 0x8e0 ; repeat: [16]       */
-	volatile uint32_t QsmcBankInterleaveMode[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH];                                          /* offset: 0x920 ; repeat: [16]       */
-	volatile uint32_t QsmcDescPeriphInterrupt[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH];                                    /* offset: 0x960 ; repeat: [4]        */
-	volatile uint32_t QsmcCcbFreeIndex[DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH];                                           /* offset: 0x970 ; repeat: [4]        */
-	volatile uint32_t engine_cs_intr_mask;                                                                                          /* offset: 0x980 ; repeat: [1]        */
-	volatile uint32_t engine_cs_intr_status;                                                                                        /* offset: 0x984 ; repeat: [1]        */
-	volatile uint32_t engine_cs_intr_w1c;                                                                                           /* offset: 0x988 ; repeat: [1]        */
-	volatile uint32_t engine_cs_intr_w1s;                                                                                           /* offset: 0x98c ; repeat: [1]        */
-	volatile uint32_t engine_ap_intr_mask;                                                                                          /* offset: 0x990 ; repeat: [1]        */
-	volatile uint32_t engine_ap_intr_status;                                                                                        /* offset: 0x994 ; repeat: [1]        */
-	volatile uint32_t engine_ap_intr_w1c;                                                                                           /* offset: 0x998 ; repeat: [1]        */
-	volatile uint32_t engine_ap_intr_w1s;                                                                                           /* offset: 0x99c ; repeat: [1]        */
-	volatile uint32_t engine_dsp_intr_mask;                                                                                         /* offset: 0x9a0 ; repeat: [1]        */
-	volatile uint32_t engine_dsp_intr_status;                                                                                       /* offset: 0x9a4 ; repeat: [1]        */
-	volatile uint32_t engine_dsp_intr_w1c;                                                                                          /* offset: 0x9a8 ; repeat: [1]        */
-	volatile uint32_t engine_dsp_intr_w1s;                                                                                          /* offset: 0x9ac ; repeat: [1]        */
-	volatile uint32_t engine_err_intr_mask;                                                                                         /* offset: 0x9b0 ; repeat: [1]        */
-	volatile uint32_t engine_err_intr_status;                                                                                       /* offset: 0x9b4 ; repeat: [1]        */
-	volatile uint32_t desc_err_intr_mask;                                                                                           /* offset: 0x9b8 ; repeat: [1]        */
-	volatile uint32_t desc_err_intr_status;                                                                                         /* offset: 0x9bc ; repeat: [1]        */
-	volatile uint32_t desc_err_intr_w1c;                                                                                            /* offset: 0x9c0 ; repeat: [1]        */
-	volatile uint32_t desc_err_intr_w1s;                                                                                            /* offset: 0x9c4 ; repeat: [1]        */
-	volatile uint32_t qddc_crd_ovf_err_intr_mask;                                                                                   /* offset: 0x9c8 ; repeat: [1]        */
-	volatile uint32_t qddc_crd_ovf_err_intr_status;                                                                                 /* offset: 0x9cc ; repeat: [1]        */
-	volatile uint32_t qddc_crd_ovf_err_intr_w1c;                                                                                    /* offset: 0x9d0 ; repeat: [1]        */
-	volatile uint32_t qddc_crd_ovf_err_intr_w1s;                                                                                    /* offset: 0x9d4 ; repeat: [1]        */
-	volatile uint32_t qsdc_crd_ovf_err_intr_mask;                                                                                   /* offset: 0x9d8 ; repeat: [1]        */
-	volatile uint32_t qsdc_crd_ovf_err_intr_status;                                                                                 /* offset: 0x9dc ; repeat: [1]        */
-	volatile uint32_t qsdc_crd_ovf_err_intr_w1c;                                                                                    /* offset: 0x9e0 ; repeat: [1]        */
-	volatile uint32_t qsdc_crd_ovf_err_intr_w1s;                                                                                    /* offset: 0x9e4 ; repeat: [1]        */
-	volatile uint32_t EngErrInterruptSource;                                                                                        /* offset: 0x9e8 ; repeat: [1]        */
-	volatile uint32_t EngErrRemainPageSize;                                                                                         /* offset: 0x9ec ; repeat: [1]        */
-	volatile uint32_t EngTransferPageSize;                                                                                          /* offset: 0x9f0 ; repeat: [1]        */
-	volatile uint32_t VdmaSoftReset;                                                                                                /* offset: 0x9f4 ; repeat: [1]        */
-	volatile uint32_t vdma_sharedbus;                                                                                               /* offset: 0x9f8 ; repeat: [1]        */
-	volatile uint32_t cfg_qddc_redundant_en;                                                                                        /* offset: 0x9fc ; repeat: [1]        */
-	volatile uint32_t cfg_qsdc_redundant_en;                                                                                        /* offset: 0xa00 ; repeat: [1]        */
-	volatile uint32_t cfg_qdmc_redundant_en;                                                                                        /* offset: 0xa04 ; repeat: [1]        */
-	volatile uint32_t cfg_qsmc_redundant_en;                                                                                        /* offset: 0xa08 ; repeat: [1]        */
-	volatile uint32_t qddc_redundant_asf_int_mask;                                                                                  /* offset: 0xa0c ; repeat: [1]        */
-	volatile uint32_t qddc_redundant_asf_int_status;                                                                                /* offset: 0xa10 ; repeat: [1]        */
-	volatile uint32_t qddc_redundant_asf_int_w1c;                                                                                   /* offset: 0xa14 ; repeat: [1]        */
-	volatile uint32_t qddc_redundant_asf_int_w1s;                                                                                   /* offset: 0xa18 ; repeat: [1]        */
-	volatile uint32_t qsdc_redundant_asf_int_mask;                                                                                  /* offset: 0xa1c ; repeat: [1]        */
-	volatile uint32_t qsdc_redundant_asf_int_status;                                                                                /* offset: 0xa20 ; repeat: [1]        */
-	volatile uint32_t qsdc_redundant_asf_int_w1c;                                                                                   /* offset: 0xa24 ; repeat: [1]        */
-	volatile uint32_t qsdc_redundant_asf_int_w1s;                                                                                   /* offset: 0xa28 ; repeat: [1]        */
-	volatile uint32_t qdmc_redundant_asf_int_mask;                                                                                  /* offset: 0xa2c ; repeat: [1]        */
-	volatile uint32_t qdmc_redundant_asf_int_status;                                                                                /* offset: 0xa30 ; repeat: [1]        */
-	volatile uint32_t qdmc_redundant_asf_int_w1c;                                                                                   /* offset: 0xa34 ; repeat: [1]        */
-	volatile uint32_t qdmc_redundant_asf_int_w1s;                                                                                   /* offset: 0xa38 ; repeat: [1]        */
-	volatile uint32_t qsmc_redundant_asf_int_mask;                                                                                  /* offset: 0xa3c ; repeat: [1]        */
-	volatile uint32_t qsmc_redundant_asf_int_status;                                                                                /* offset: 0xa40 ; repeat: [1]        */
-	volatile uint32_t qsmc_redundant_asf_int_w1c;                                                                                   /* offset: 0xa44 ; repeat: [1]        */
-	volatile uint32_t qsmc_redundant_asf_int_w1s;                                                                                   /* offset: 0xa48 ; repeat: [1]        */
-	volatile uint32_t PrioIsLp;                                                                                                     /* offset: 0xa4c ; repeat: [1]        */
-	volatile uint32_t ReadLpToQosValue;                                                                                             /* offset: 0xa50 ; repeat: [1]        */
-	volatile uint32_t ReadHpToQosValue;                                                                                             /* offset: 0xa54 ; repeat: [1]        */
-	volatile uint32_t WriteLpToQosValue;                                                                                            /* offset: 0xa58 ; repeat: [1]        */
-	volatile uint32_t WriteHpToQosValue;                                                                                            /* offset: 0xa5c ; repeat: [1]        */
-	volatile uint32_t DescReadQosValue;                                                                                             /* offset: 0xa60 ; repeat: [1]        */
-	volatile uint32_t DescWriteQosValue;                                                                                            /* offset: 0xa64 ; repeat: [1]        */
-	volatile uint32_t vdma_arb;                                                                                                     /* offset: 0xa68 ; repeat: [1]        */
-	volatile uint32_t qm_cfg_cg_delay;                                                                                              /* offset: 0xa6c ; repeat: [1]        */
-	volatile uint32_t qddc_cfg_cg_bypass;                                                                                           /* offset: 0xa70 ; repeat: [1]        */
-	volatile uint32_t qsdc_cfg_cg_bypass;                                                                                           /* offset: 0xa74 ; repeat: [1]        */
-	volatile uint32_t qdmc_cfg_cg_bypass;                                                                                           /* offset: 0xa78 ; repeat: [1]        */
-	volatile uint32_t qsmc_cfg_cg_bypass;                                                                                           /* offset: 0xa7c ; repeat: [1]        */
-	volatile uint32_t engine_asf_int_mask;                                                                                          /* offset: 0xa80 ; repeat: [1]        */
-	volatile uint32_t engine_asf_int_status;                                                                                        /* offset: 0xa84 ; repeat: [1]        */
-	volatile uint32_t engine_asf_int_w1c;                                                                                           /* offset: 0xa88 ; repeat: [1]        */
-	volatile uint32_t engine_asf_int_w1s;                                                                                           /* offset: 0xa8c ; repeat: [1]        */
-	volatile uint32_t engine_rw_parity_bist_mode;                                                                                   /* offset: 0xa90 ; repeat: [1]        */
-	volatile uint32_t vdma_stop_lp;                                                                                                 /* offset: 0xa94 ; repeat: [1]        */
-	volatile uint32_t vdma_sch;                                                                                                     /* offset: 0xa98 ; repeat: [1]        */
-	volatile uint32_t cfg_src_desc_trace;                                                                                           /* offset: 0xa9c ; repeat: [1]        */
-	volatile uint32_t cfg_src_desc_trace_base_addr;                                                                                 /* offset: 0xaa0 ; repeat: [1]        */
-	volatile uint32_t cfg_dst_desc_trace;                                                                                           /* offset: 0xaa4 ; repeat: [1]        */
-	volatile uint32_t cfg_dst_desc_trace_base_addr;                                                                                 /* offset: 0xaa8 ; repeat: [1]        */
-	volatile uint32_t cfg_debug_timestamp;                                                                                          /* offset: 0xaac ; repeat: [1]        */
-	volatile uint32_t debug_timestamp;                                                                                              /* offset: 0xab0 ; repeat: [1]        */
-	volatile uint32_t auto_address_err_cb_indication;                                                                               /* offset: 0xab4 ; repeat: [1]        */
-} DRAM_DMA_ENGINE_CONFIG_t;
-
-#endif /* DRAM_DMA_ENGINE_CONFIG_REGS_H */
diff --git a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_package_macros.h b/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_package_macros.h
deleted file mode 100644
index 705f9a50..00000000
--- a/hailort/tools/hw_debug/hw_consts/hailo15/dram_dma_package_macros.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*-------------------------------------------------------------------------------------
-//	Copyright (c) 2022 by Hailotech This model is the confidential and
-//	proprietary property of Hailotech and the possession or use of this
-//	file requires a written license from Hailotech.
--------------------------------------------------------------------------------------*/
-
-
-
-#include <stdint.h>
-
-#ifndef DRAM_DMA_PACKAGE_MACROS_H
-#define DRAM_DMA_PACKAGE_MACROS_H
-
-/* HW constants and parameters for package "dram_dma" */
-
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_AXI_QOS_BITS (3)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_CH (32)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_CH_RX_CREDIT (4096)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_CH_TX_CREDIT (2048)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DD_DESC (16)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DESC (16)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DIR_CH (16)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_DM_DESC (16)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_ENHANCED_CH (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_FULL_PATTERN (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_MAX_PATTERNS (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_PATTERNS_MAX_LINES (262144)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_PATTERNS_MAX_PAGES (262144)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_REGULAR_CH (12)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_RX_SHMIFO (24)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SD_DESC (16)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SIMP_PATTERN (12)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SM_DESC (16)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SW_CH (16)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_SW_INT (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__N_TX_SHMIFO (20)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__PAGE_SIZE_MAX (13)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__PAGE_SIZE_MAX_8B (10)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_BURST_SIZE (29)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_BURST_SIZE_8B (26)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_C2C_SEL (6)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CCB_DESC_INDEX (18)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CCB_DESC_INDEX_LOG (5)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CFG_DATA (32)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CH (5)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CH_CREDIT_SIZE (10)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CH_RX_CREDIT (13)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CH_TX_CREDIT (12)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CORE_ADDR (35)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CORE_BASE_ADDR (29)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_CSR_CFG_ADDR (13)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DDR_ADDR (35)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DDR_BASE_ADDR (26)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DD_DESC (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DESC (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DESC_DEMUX_ADDR (43)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DIR_CH (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_DM_DESC (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_ENG_CFG_ADDR (14)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_MAX_PATTERNS (2)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_PATTERNS_MAX_LINES (18)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_PATTERNS_MAX_PAGES (18)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_SD_DESC (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_SHMIFO (5)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_SM_DESC (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_SW_CH (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_ADDR (64)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_DATA_DATA (64)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_DATA_DESC (128)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_ID_DATA0 (2)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_ID_DATA1 (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_AXI_ID_DESC (3)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_CFG_ADDR (10)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_MEM_ADDR (5)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_ENGINE__W_VDMA_MEM_DATA (256)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__ADDR_ALLSTRB_OFFSET (56)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__ADDR_APCMD_OFFSET (55)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__FPGA_N_HW_DMA_ENG (0)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_DESC_AXI (1)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_DMA_ENG (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_HMASTER (4)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_HW_DMA_ENG (3)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_SW_DMA_ENG (1)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_TOT_DMA_DIR_CH (48)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__N_VISION_CH (10)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__W_CFG_ADDR (16)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__W_CFG_DATA (32)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__W_CSR_CFG_ADDR (12)
-#define DRAM_DMA_PACKAGE__DRAM_DMA_WRAPPER__W_TOT_DMA_DIR_CH (6)
-
-
-#endif /* DRAM_DMA_PACKAGE_MACROS_H */
diff --git a/hailort/tools/hw_debug/main.cpp b/hailort/tools/hw_debug/main.cpp
deleted file mode 100644
index dc9ecbea..00000000
--- a/hailort/tools/hw_debug/main.cpp
+++ /dev/null
@@ -1,152 +0,0 @@
-/**
- * @file main.cpp
- * @brief Main function, and shell build for the tool.
- */
-
-#include "shell.hpp"
-#include "readline_wrapper.hpp"
-#include "memory_commands.hpp"
-#include "driver_memory.hpp"
-
-#include "CLI/CLI.hpp"
-
-#include <array>
-
-using namespace hailort;
-
-static constexpr const char *LOGO = 
-    R"( _____       _           _     __   __  )" "\n"
-    R"(|  __ \     | |         | |    \ \ / /  )" "\n" 
-    R"(| |  | | ___| |__   __ _| | ___ \ V /   )" "\n"
-    R"(| |  | |/ _ \ '_ \ / _` | |/ _ \ > <    )" "\n"
-    R"(| |__| |  __/ |_) | (_| | |  __// . \   )" "\n"
-    R"(|_____/ \___|_.__/ \__,_|_|\___/_/ \_\  )" "\n";
-
-
-static std::shared_ptr<Shell> add_memory_subshell(Shell &base_shell,
-    const std::string &name, const std::string &short_name, std::shared_ptr<MemorySource> mem)
-{
-    auto subshell = base_shell.add_subshell(name, short_name);
-    subshell->add_command(std::make_unique<MemoryReadCommand<uint32_t>>(mem));
-    subshell->add_command(std::make_unique<MemoryReadCommand<uint16_t>>(mem));
-    subshell->add_command(std::make_unique<MemoryReadCommand<uint8_t>>(mem));
-
-    subshell->add_command(std::make_unique<MemoryWriteCommand<uint32_t>>(mem));
-    subshell->add_command(std::make_unique<MemoryWriteCommand<uint16_t>>(mem));
-    subshell->add_command(std::make_unique<MemoryWriteCommand<uint8_t>>(mem));
-
-    if (!mem->get_fields().empty()) {
-        subshell->add_command(std::make_unique<PrintCommand>(mem));
-    }
-
-    return subshell;
-}
-
-template<typename DriverMemorySourceType = DriverMemorySource>
-static std::shared_ptr<Shell> add_driver_memory_subshell(Shell &base_shell,
-    const std::string &name, const std::string &short_name,
-    std::shared_ptr<HailoRTDriver> driver, MemoryType memory_type)
-{
-    auto mem = std::make_shared<DriverMemorySourceType>(driver, memory_type);
-    return add_memory_subshell(base_shell, name, short_name, mem);
-}
-
-static std::unique_ptr<Shell> create_pcie_accelerator_shell(std::shared_ptr<HailoRTDriver> driver_ptr)
-{
-    auto shell = std::make_unique<Shell>("> ");
-    add_driver_memory_subshell<VdmaMemorySource>(*shell, "vdma", "v", driver_ptr, MemoryType::VDMA0);
-    add_driver_memory_subshell(*shell, "bar0", "b0", driver_ptr, MemoryType::PCIE_BAR0);
-    add_driver_memory_subshell(*shell, "bar2", "b2", driver_ptr, MemoryType::PCIE_BAR2);
-    add_driver_memory_subshell(*shell, "bar4", "b4", driver_ptr, MemoryType::PCIE_BAR4);
-    add_driver_memory_subshell(*shell, "mem", "m", driver_ptr, MemoryType::DIRECT_MEMORY);
-    return shell;
-}
-
-static std::unique_ptr<Shell> create_vpu_shell(std::shared_ptr<HailoRTDriver> driver_ptr)
-{
-    auto shell = std::make_unique<Shell>("> ");
-    add_driver_memory_subshell<VdmaMemorySource>(*shell, "vdma0", "v0", driver_ptr, MemoryType::VDMA0);
-    add_driver_memory_subshell<VdmaMemorySource>(*shell, "vdma1", "v1", driver_ptr, MemoryType::VDMA1);
-    add_driver_memory_subshell<VdmaMemorySource>(*shell, "vdma2", "v2", driver_ptr, MemoryType::VDMA2);
-    add_driver_memory_subshell<DramDmaEngineMemorySource>(*shell, "engine0", "e0", driver_ptr, MemoryType::DMA_ENGINE0);
-    add_driver_memory_subshell<DramDmaEngineMemorySource>(*shell, "engine1", "e1", driver_ptr, MemoryType::DMA_ENGINE1);
-    add_driver_memory_subshell<DramDmaEngineMemorySource>(*shell, "engine2", "e2", driver_ptr, MemoryType::DMA_ENGINE2);
-    add_driver_memory_subshell(*shell, "mem", "m", driver_ptr, MemoryType::DIRECT_MEMORY);
-    return shell;
-}
-
-static std::vector<std::string> get_available_device_ids()
-{
-    auto scan_results = HailoRTDriver::scan_devices();
-    if (!scan_results) {
-        throw std::runtime_error("Failed scan pci");
-    }
-    if (scan_results->empty()) {
-        throw std::runtime_error("No hailo devices on the system...");
-    }
-
-    std::vector<std::string> device_ids;
-    for (const auto &scan_result : scan_results.value()) {
-        device_ids.push_back(scan_result.device_id);
-    }
-    return device_ids;
-}
-
-HailoRTDriver::DeviceInfo get_device_info(const std::string &device_id)
-{
-    auto scan_results = HailoRTDriver::scan_devices();
-    if (!scan_results) {
-        throw std::runtime_error("Failed scan pci");
-    }
-
-    auto device_found = std::find_if(scan_results->cbegin(), scan_results->cend(),
-        [&device_id](const auto &compared_scan_result) {
-            return device_id == compared_scan_result.device_id;
-        });
-    if (device_found == std::end(scan_results.value())) {
-        throw std::runtime_error("Requested device not found");
-    }
-
-    return *device_found;
-}
-
-std::shared_ptr<HailoRTDriver> create_driver_object(const std::string &device_id)
-{
-    auto device_info = get_device_info(device_id);
-    auto hailort_driver = HailoRTDriver::create(device_info);
-    if (!hailort_driver) {
-        throw std::runtime_error("Failed create hailort driver object");
-    }
-    return hailort_driver.release();
-}
-
-int main(int argc, char **argv)
-{
-    try {
-        ReadLineWrapper::init_library();
-
-        auto available_device_ids = get_available_device_ids();
-
-        CLI::App app{"Debalex"};
-        std::string device_id = available_device_ids[0];
-        app.add_option("-s,--device-id", device_id, "Device id")
-            ->check(CLI::IsMember(available_device_ids));
-        CLI11_PARSE(app, argc, argv);
-
-        auto driver = create_driver_object(device_id);
-
-
-        auto shell =
-            driver->dma_type() == HailoRTDriver::DmaType::PCIE ?
-                create_pcie_accelerator_shell(driver) :
-                create_vpu_shell(driver);
-
-        std::cout << LOGO << std::endl;
-        shell->run_forever();
-        return 0;
-    }
-    catch (const std::exception &exc) {
-        std::cerr << "Failure: " << exc.what();
-        return 1;
-    }
-}
diff --git a/hailort/tools/hw_debug/memory_commands.cpp b/hailort/tools/hw_debug/memory_commands.cpp
deleted file mode 100644
index b753503f..00000000
--- a/hailort/tools/hw_debug/memory_commands.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/**
- * @file memory_commands.cpp
- * @brief Commands to access (read/write) some memory (for example - channel registers, descriptors, physical, etc.)
- */
-#include "memory_commands.hpp"
-
-#include <regex>
-#include <cassert>
-
-Field::Field(std::string &&name, std::string &&description) :
-    m_name(std::move(name)),
-    m_description(std::move(description))
-{}
-
-
-const std::string &Field::name() const
-{
-    return m_name;
-}
-
-const std::string &Field::description() const
-{
-    return m_description;
-}
-
-const std::map<std::string, std::shared_ptr<Field>> &MemorySource::get_fields() const
-{
-    return m_fields;
-}
-
-void MemorySource::add_field(std::shared_ptr<Field> field)
-{
-    assert(m_fields.find(field->name()) == m_fields.end());
-    m_fields[field->name()] = field;
-}
-
-constexpr size_t PrintCommand::PRINT_ALL;
-
-PrintCommand::PrintCommand(std::shared_ptr<MemorySource> memory) :
-    ShellCommand("print", "p", get_help(memory->get_fields())),
-    m_memory(memory)
-{}
-
-ShellResult PrintCommand::execute(const std::vector<std::string> &args)
-{
-    if (args.size() != 1) {
-        return ShellResult("Invalid params\n");
-    }
-
-    std::string field_name{};
-    size_t index{};
-    std::tie(field_name, index) = parse_field(args[0]);
-
-    const auto &fields = m_memory->get_fields();
-    auto field_it = fields.find(field_name);
-    if (fields.end() == field_it) {
-        throw std::runtime_error(fmt::format("Field {} does not exist", field_name));
-    }
-    const auto &field = field_it->second;
-
-    if (index == PRINT_ALL) {
-        std::vector<ShellResult> results;
-        results.reserve(field->elements_count());
-        for (size_t i = 0; i < field->elements_count(); i++) {
-            results.emplace_back(ShellResult(field->print_element(*m_memory, i)));
-        }
-        return ShellResult(results);
-    }
-    else {
-        if (index >= field->elements_count()) {
-            throw std::runtime_error(fmt::format("Index {} is out of range (max {})", index, field->elements_count()));
-        }
-        return ShellResult(field->print_element(*m_memory, index));
-    }
-}
-
-std::pair<std::string, size_t> PrintCommand::parse_field(const std::string &field_arg)
-{
-    static const std::regex field_name_pattern("([a-zA-Z]+)");
-    static const std::regex array_access_pattern("([a-zA-Z]+)\\[([0-9]+)\\]");
-    std::smatch match;
-
-    if (std::regex_match(field_arg, match, field_name_pattern)) {
-        assert(match.size() == 2);
-        const auto field = match[1];
-        return std::make_pair(field, PRINT_ALL);
-    }
-    else if (std::regex_match(field_arg, match, array_access_pattern)) {
-        assert(match.size() == 3);
-        const auto &field = match[1];
-        const auto index = std::atoi(match[2].str().c_str());
-        return std::make_pair(field, index);
-    }
-    else {
-        throw std::runtime_error(fmt::format("Invalid syntax {}", field_arg));
-    }
-}
-
-std::string PrintCommand::get_help(const std::map<std::string, std::shared_ptr<Field>> &fields)
-{
-    std::string help = "Pretty print some field, usage: print <field-name>[<index>]. Fields:\n";
-    for (auto field : fields) {
-        help += fmt::format("\t{} - {}\n", field.second->name(), field.second->description());
-    }
-    return help;
-}
diff --git a/hailort/tools/hw_debug/memory_commands.hpp b/hailort/tools/hw_debug/memory_commands.hpp
deleted file mode 100644
index 95dc913f..00000000
--- a/hailort/tools/hw_debug/memory_commands.hpp
+++ /dev/null
@@ -1,211 +0,0 @@
-/**
- * @file memory_commands.hpp
- * @brief Commands to access (read/write) some memory (for example - channel registers, descriptors, physical, etc.)
- */
-
-#ifndef _HW_DEBUG_MEMORY_COMMANDS_H_
-#define _HW_DEBUG_MEMORY_COMMANDS_H_
-
-#include "shell.hpp"
-#include "hailo/hailort.h"
-#include "hailo/expected.hpp"
-#include "spdlog/fmt/fmt.h"
-
-#include <sstream>
-#include <iomanip>
-#include <cinttypes>
-#include <map>
-
-class MemorySource;
-
-class Field {
-public:
-    explicit Field(std::string &&name, std::string &&description);
-    virtual ~Field() = default;
-
-    Field(const Field &other) = delete;
-    Field &operator=(const Field &other) = delete;
-
-    const std::string &name() const;
-    const std::string &description() const;
-
-    virtual size_t elements_count() const = 0;
-    virtual std::string print_element(MemorySource& memory, size_t index) const = 0;
-private:
-    const std::string m_name;
-    const std::string m_description;
-};
-
-class MemorySource {
-public:
-    virtual ~MemorySource() = default;
-
-    virtual hailo_status read(uint64_t offset, uint8_t *data, size_t size) = 0;
-    virtual hailo_status write(uint64_t offset, const uint8_t *data, size_t size) = 0;
-    virtual size_t total_size() const = 0;
-
-    template<typename T>
-    T read(uint64_t offset)
-    {
-        static_assert(std::is_trivial<T>::value, "Non trivial type");
-        T value{};
-        auto status = read(offset, reinterpret_cast<uint8_t*>(&value), sizeof(value));
-        if (HAILO_SUCCESS != status) {
-            throw std::runtime_error(fmt::format("Failed read at {} (size {})", offset, sizeof(value)));
-        }
-        return value;
-    }
-
-    const std::map<std::string, std::shared_ptr<Field>> &get_fields() const;
-protected:
-    void add_field(std::shared_ptr<Field> field);
-
-private:
-    std::map<std::string, std::shared_ptr<Field>> m_fields;
-};
-
-template<typename IntType>
-class MemoryWriteCommand : public ShellCommand {
-public:
-    static_assert(std::is_integral<IntType>::value, "MemoryWriteCommand works only with integers");
-
-    MemoryWriteCommand(std::shared_ptr<MemorySource> memory) :
-        ShellCommand(get_name(), get_short_name(), get_help()),
-        m_memory(memory)
-    {}
-
-    ShellResult execute(const std::vector<std::string> &args) {
-        if (args.size() != 2) {
-            return ShellResult("Invalid params\n");
-        }
-
-        uint64_t offset;
-        if (sscanf(args[0].c_str(), "%" SCNx64, &offset) != 1) {
-            return ShellResult(fmt::format("Invalid offset {}\n"));
-        }
-
-        uint32_t data;
-        if (sscanf(args[1].c_str(), "%" SCNx32, &data) != 1) {
-            return ShellResult(fmt::format("Invalid data {}\n", args[1]));
-        }
-
-        if ((offset % sizeof(IntType)) != 0) {
-            return ShellResult(fmt::format("Offset {:x} must be a multiple of {}\n", offset, sizeof(IntType)));
-        }
-
-        if (offset + sizeof(IntType) > m_memory->total_size()) {
-            return ShellResult(fmt::format("Offset {:x} too large (max {:x})\n", offset, m_memory->total_size()));
-        }
-
-        if (data > std::numeric_limits<IntType>::max()) {
-            return ShellResult(fmt::format("data {:x} too large\n", data));
-        }
-        IntType data_as_int = static_cast<IntType>(data);
-        auto status = m_memory->write(offset, reinterpret_cast<uint8_t*>(&data_as_int), sizeof(data_as_int));   
-        if (HAILO_SUCCESS != status) {
-            return ShellResult(fmt::format("Failed write memory {}\n", status));
-        }
-
-        return ShellResult("");
-    }
-
-private:
-    std::shared_ptr<MemorySource> m_memory;
-
-    static size_t get_bits() { return sizeof(IntType) * 8; }
-    static std::string get_name() { return fmt::format("write{}", get_bits()); }
-    static std::string get_short_name() { return fmt::format("w{}", get_bits()); }
-    static std::string get_help()
-    {
-        return fmt::format("Writes memory in {} granularity. Usage: {} <offset> <data>. Offset and data are hex integers.", get_bits(),
-            get_name());
-    }
-};
-
-template<typename IntType>
-class MemoryReadCommand : public ShellCommand {
-public:
-    static_assert(std::is_integral<IntType>::value, "MemoryReadCommand works only with integers");
-
-    MemoryReadCommand(std::shared_ptr<MemorySource> memory) :
-        ShellCommand(get_name(), get_short_name(), get_help()),
-        m_memory(memory)
-    {}
-
-    ShellResult execute(const std::vector<std::string> &args) {
-        if (args.size() != 2) {
-            return ShellResult("Invalid params\n");
-        }
-
-        uint64_t offset;
-        if (sscanf(args[0].c_str(), "%" SCNx64, &offset) != 1) {
-            return ShellResult(fmt::format("Invalid offset {}\n", args[0]));
-        }
-
-        uint32_t size;
-        if (sscanf(args[1].c_str(), "%" SCNx32, &size) != 1) {
-            return ShellResult(fmt::format("Invalid size {}\n", args[1]));
-        }
-
-        if ((offset % sizeof(IntType)) != 0) {
-            return ShellResult(fmt::format("Offset {:x} must be a multiple of {}\n", offset, sizeof(IntType)));
-        }
-
-        if ((size % sizeof(IntType)) != 0) {
-            return ShellResult(fmt::format("Size {:x} must be a multiple of {}\n", size, sizeof(IntType)));
-        }
-
-        if (offset + size > m_memory->total_size()) {
-            return ShellResult(fmt::format("Offset {:x} and size {:x} too large (max {:x})\n", offset, size,
-                m_memory->total_size()));
-        }
-
-        std::vector<uint8_t> data(size, 0);
-        auto status = m_memory->read(offset, data.data(), data.size());
-        if (HAILO_SUCCESS != status) {
-            return ShellResult(fmt::format("Failed read memory {}\n", status));
-        }
-
-        std::stringstream result;
-        result << std::hex << std::setfill('0');
-        for (size_t i = 0; i < size; i += sizeof(IntType)) {
-            if ((i % 16) == 0) {
-                // Print address
-                result << std::endl << std::setw(8) << (offset + i) << "\t";
-            }
-            IntType *ptr = reinterpret_cast<IntType*>(data.data() + i);
-            result << " " << std::setw(sizeof(IntType) * 2) << static_cast<uint32_t>(*ptr);
-        }
-        result << std::endl;
-        return result.str();
-    }
-
-private:
-    std::shared_ptr<MemorySource> m_memory;
-
-    static size_t get_bits() { return sizeof(IntType) * 8; }
-    static std::string get_name() { return fmt::format("read{}", get_bits()); }
-    static std::string get_short_name() { return fmt::format("r{}", get_bits()); }
-    static std::string get_help()
-    {
-        return fmt::format("Reads memory in {} granularity. Usage: {} <offset> <size>. Offset and size are hex integers.",
-            get_bits(), get_name());
-    }
-};
-
-class PrintCommand : public ShellCommand {
-public:
-    PrintCommand(std::shared_ptr<MemorySource> memory);
-    virtual ShellResult execute(const std::vector<std::string> &args) override;
-
-private:
-    // Returns pair of field name and the index
-    static std::pair<std::string, size_t> parse_field(const std::string &field_arg);
-    static std::string get_help(const std::map<std::string, std::shared_ptr<Field>> &fields);
-
-    std::shared_ptr<MemorySource> m_memory;
-
-    static constexpr size_t PRINT_ALL = std::numeric_limits<size_t>::max();
-};
-
-#endif /* _HW_DEBUG_MEMORY_COMMANDS_H_ */
diff --git a/hailort/tools/hw_debug/readline_wrapper.cpp b/hailort/tools/hw_debug/readline_wrapper.cpp
deleted file mode 100644
index 03ab84ce..00000000
--- a/hailort/tools/hw_debug/readline_wrapper.cpp
+++ /dev/null
@@ -1,110 +0,0 @@
-/**
- * @file readline_wrapper.cpp
- * @brief Wrapper to the readline library, either use the library, or create simple implementation.
- */
-
-#include "readline_wrapper.hpp"
-#include <iostream>
-
-
-#ifdef USE_READLINE
-#include <readline/readline.h>
-#include <readline/history.h>
-#include <signal.h>
-
-static void int_handler(int)
-{
-    printf("\n"); // Move to a new line
-    rl_on_new_line(); // Regenerate the prompt on a newline
-    rl_replace_line("", 0); // Clear the previous text
-    rl_redisplay();
-}
-
-static ReadLineWrapper::AutoCompleter g_auto_completer = nullptr;
-
-static char *name_generator(const char *text, int index)
-{
-    if (!g_auto_completer) {
-        return nullptr;
-    }
-
-    auto results = g_auto_completer(std::string(text));
-    if (static_cast<size_t>(index) >= results.size()) {
-        return nullptr;
-    }
-
-    return strdup(results[index].c_str());
-}
-
-static char **name_completion(const char *text, int start, int)
-{
-    if (start > 0) {
-        // We use autocomplete only for the first arg (command name).
-        return nullptr;
-    }
-
-    rl_attempted_completion_over = 1;
-    return rl_completion_matches(text, name_generator);
-}
-
-void ReadLineWrapper::init_library()
-{
-    rl_attempted_completion_function = name_completion;
-    signal(SIGINT, int_handler);
-}
-
-std::string ReadLineWrapper::readline(const std::string &prompt)
-{
-    auto line_raw = ::readline(prompt.c_str());
-    if (line_raw == nullptr) {
-        // Ctrl+D handle
-        printf("\n");
-        return "";
-    }
-
-    const std::string line(line_raw);
-    free(line_raw);
-    return line;
-}
-
-void ReadLineWrapper::add_history(const std::string &line)
-{
-    ::add_history(line.c_str());
-}
-
-void ReadLineWrapper::set_auto_completer(AutoCompleter completer)
-{
-    g_auto_completer = completer;
-}
-
-void ReadLineWrapper::remove_auto_completer()
-{
-    g_auto_completer = nullptr;
-}
-
-#else
-
-void ReadLineWrapper::init_library()
-{}
-
-// Non readline implementation
-std::string ReadLineWrapper::readline(const std::string &prompt)
-{
-    std::cout << prompt;
-    std::string command;
-    std::getline(std::cin, command);
-    return command;
-}
-
-void ReadLineWrapper::add_history(const std::string &)
-{
-    // No history, just NOP.
-}
-
-void ReadLineWrapper::set_auto_completer(AutoCompleter)
-{}
-
-void ReadLineWrapper::remove_auto_completer()
-{}
-
-#endif
\ No newline at end of file
diff --git a/hailort/tools/hw_debug/readline_wrapper.hpp b/hailort/tools/hw_debug/readline_wrapper.hpp
deleted file mode 100644
index eae25c82..00000000
--- a/hailort/tools/hw_debug/readline_wrapper.hpp
+++ /dev/null
@@ -1,26 +0,0 @@
-/**
- * @file readline_wrapper.hpp
- * @brief Wrapper to the readline library, either use the library, or create simple implementation.
- */
-
-#ifndef _HW_DEBUG_READLINE_WRAPPER_H_
-#define _HW_DEBUG_READLINE_WRAPPER_H_
-
-#include <string>
-#include <vector>
-#include <functional>
-
-class ReadLineWrapper final {
-public:
-    ReadLineWrapper() = delete;
-
-    static void init_library();
-    static std::string readline(const std::string &prompt);
-    static void add_history(const std::string &line);
-
-    using AutoCompleter = std::function<std::vector<std::string>(const std::string &text)>;
-    static void set_auto_completer(AutoCompleter completer);
-    static void remove_auto_completer();
-};
-
-#endif /* _HW_DEBUG_READLINE_WRAPPER_H_ */
\ No newline at end of file
diff --git a/hailort/tools/hw_debug/shell.cpp b/hailort/tools/hw_debug/shell.cpp
deleted file mode 100644
index ee5ca38b..00000000
--- a/hailort/tools/hw_debug/shell.cpp
+++ /dev/null
@@ -1,197 +0,0 @@
-/**
- * @file shell.cpp
- * @brief Generic shell - contains commands and sub-shells. The shell implements
- *        a parse-execute commands loop.
- */
-#include "shell.hpp"
-#include "readline_wrapper.hpp"
-#include "spdlog/fmt/fmt.h"
-
-#include <cassert>
-#include <tuple>
-
-static std::vector<std::string> split_string(std::string s, const std::string &delimiter = " ")
-{
-    std::vector<std::string> parts;
-    auto pos = std::string::npos;
-    while ((pos = s.find(delimiter)) != std::string::npos) {
-        parts.push_back(s.substr(0, pos));
-        s.erase(0, pos + delimiter.size());
-    }
-    parts.push_back(s);
-    return parts;
-}
-
-ShellCommand::ShellCommand(const std::string &name, const std::string &short_name, const std::string &help) :
-    m_name(name),
-    m_short_name(short_name),
-    m_help(help)
-{}
-
-Shell::Shell(const std::string &prompt) :
-    m_prompt(prompt),
-    m_commands(),
-    m_should_quit(false)
-{
-    add_command(std::make_unique<Quit>(*this));
-    add_command(std::make_unique<Help>(*this));
-}
-
-void Shell::add_command(std::unique_ptr<ShellCommand> shell_command)
-{
-    assert(nullptr == get_command_by_name(shell_command->name()));
-    assert(nullptr == get_command_by_name(shell_command->short_name()));
-
-    m_commands.emplace_back(std::move(shell_command));
-}
-
-std::shared_ptr<Shell> Shell::add_subshell(const std::string &name, const std::string &short_name)
-{
-    auto subshell_cmd = std::make_unique<StartSubshellCommand>(name, short_name,
-        fmt::format("Start {} subshell", name));
-    auto shell = subshell_cmd->get_shell();
-    add_command(std::move(subshell_cmd));
-    return shell;
-}
-
-void Shell::run_forever()
-{
-    ReadLineWrapper::set_auto_completer([this](const std::string &text) {
-        return autocomplete(text);
-    });
-
-    std::cout << get_help() << std::endl;
-    while (!m_should_quit) {
-        std::string name;
-        std::vector<std::string> args;
-        std::tie(name, args) = ask_user_command();
-
-        auto cmd = get_command_by_name(name);
-        if (cmd == nullptr) {
-            std::cout << fmt::format("Command {} not found...", name) << std::endl;
-            continue;
-        }
-
-        try {
-            auto cmd_result = cmd->execute(args);
-            cmd_result.print(std::cout);
-        } catch (const std::runtime_error &exc) {
-            std::cerr << fmt::format("Error: {}", exc.what()) << std::endl;
-        }
-    }
-
-    ReadLineWrapper::remove_auto_completer();
-
-    // Disable quit for next run
-    m_should_quit = false;
-}
-
-std::vector<std::string> Shell::autocomplete(const std::string &text)
-{
-    std::vector<std::string> names;
-    for (const auto &cmd : m_commands) {
-        if (text.empty() || (cmd->name().rfind(text, 0) == 0)) {
-            names.emplace_back(cmd->name());
-        }
-
-        if (text.empty() || (cmd->short_name().rfind(text, 0) == 0)) {
-            names.emplace_back(cmd->short_name());
-        }
-    }
-
-    return names;
-}
-
-std::pair<std::string, std::vector<std::string>> Shell::ask_user_command()
-{
-    while (true) {
-        auto line = ReadLineWrapper::readline(m_prompt);
-        auto parts = split_and_trim_line(line);
-        if (parts.empty()) {
-            continue;
-        }
-
-        ReadLineWrapper::add_history(line);
-        const auto name = parts[0];
-        const std::vector<std::string> args(parts.begin() + 1, parts.end());
-        return std::make_pair(name, args);
-    }
-}
-
-std::vector<std::string> Shell::split_and_trim_line(const std::string &line)
-{
-    auto parts = split_string(line, " ");
-
-    // remove spaces 
-    for (auto &part : parts) {
-        part.erase(std::remove_if(part.begin(), part.end(), [](char c) {
-            return std::isspace(c);
-        }), part.end());
-    }
-
-    // Remove empty commands
-    parts.erase(std::remove_if(parts.begin(), parts.end(), [](const std::string &s) {
-        return s.empty();
-    }), parts.end());
-
-    return parts;
-}
-
-std::string Shell::get_help() const
-{
-    std::string result;
-    for (const auto &cmd : m_commands) {
-        auto full_name = fmt::format("{}({})", cmd->name(), cmd->short_name());
-        result += fmt::format("{:<30}{}\n", full_name, cmd->help());
-    }
-    return result;
-}
-
-ShellCommand *Shell::get_command_by_name(const std::string &name)
-{
-    for (const auto &cmd : m_commands) {
-        if ((name == cmd->name()) || (name == cmd->short_name())) {
-            return cmd.get();
-        }
-    }
-    return nullptr;
-}
-
-Shell::Help::Help(Shell &shell) :
-    ShellCommand("help", "h", "Show help on all commands"),
-    m_shell(shell)
-{}
-
-ShellResult Shell::Help::execute(const std::vector<std::string> &)
-{
-    return m_shell.get_help();
-}
-
-Shell::Quit::Quit(Shell &shell) :
-    ShellCommand("quit", "q", "Quit current shell"),
-    m_shell(shell)
-{}
-
-ShellResult Shell::Quit::execute(const std::vector<std::string> &)
-{
-    m_shell.m_should_quit = true;
-    return ShellResult("");
-}
-
-
-StartSubshellCommand::StartSubshellCommand(const std::string &name, const std::string &short_name,
-    const std::string &help) :
-    ShellCommand(name, short_name, help),
-    m_shell(std::make_shared<Shell>(fmt::format("({})> ", name)))
-{}
-
-ShellResult StartSubshellCommand::execute(const std::vector<std::string> &)
-{
-    m_shell->run_forever();
-    return ShellResult("");
-}
-
-std::shared_ptr<Shell> StartSubshellCommand::get_shell()
-{
-    return m_shell;
-}
diff --git a/hailort/tools/hw_debug/shell.hpp b/hailort/tools/hw_debug/shell.hpp
deleted file mode 100644
index a06e42a9..00000000
--- a/hailort/tools/hw_debug/shell.hpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/**
- * @file shell.hpp
- * @brief Generic shell - contains commands and sub-shells. The shell implements
- *        a parse-execute commands loop.
- */
-
-#ifndef _HW_DEBUG_SHELL_H_
-#define _HW_DEBUG_SHELL_H_
-
-#include <string>
-#include <iostream>
-#include <sstream>
-#include <vector>
-#include <memory>
-
-// Result returned from each command. Currently wrapper to the output string.
-class ShellResult final {
-public:
-    ShellResult(const std::string &str) :
-        m_str(str)
-    {}
-
-    ShellResult(const std::vector<ShellResult> &results)
-    {
-        std::stringstream out;
-        for (const auto &result : results) {
-            result.print(out);
-        }
-        m_str = out.str();
-    }
-
-    void print(std::ostream &out) const
-    {
-        out << m_str;
-    }
-
-private:
-    std::string m_str;
-};
-
-// Base abstract class for some shell command.
-class ShellCommand {
-public:
-    virtual ~ShellCommand() = default;
-
-    ShellCommand(const std::string &name, const std::string &short_name,
-        const std::string &help);
-    
-    std::string name() const { return m_name; }
-    std::string short_name() const { return m_short_name; }
-    std::string help() const { return m_help; }
-
-    virtual ShellResult execute(const std::vector<std::string> &args) = 0;
-private:
-    const std::string m_name;
-    const std::string m_short_name;
-    const std::string m_help;
-};
-
-class Shell final {
-public:
-    explicit Shell(const std::string &prompt);
-
-    Shell(const Shell &other) = delete;
-    Shell &operator=(const Shell &other) = delete;
-
-    void add_command(std::unique_ptr<ShellCommand> shell_command);
-    std::shared_ptr<Shell> add_subshell(const std::string &name, const std::string &short_name);
-    void run_forever();
-    std::vector<std::string> autocomplete(const std::string &text);
-
-private:
-
-    class Help : public ShellCommand {
-    public:
-        Help(Shell &shell);
-        ShellResult execute(const std::vector<std::string> &args) override;
-    private:
-        Shell &m_shell;
-    };
-
-    class Quit : public ShellCommand {
-    public:
-        Quit(Shell &shell);
-        ShellResult execute(const std::vector<std::string> &args) override;
-    private:
-        Shell &m_shell;
-    };
-
-    // pair of command name and its arguments.
-    std::pair<std::string, std::vector<std::string>> ask_user_command();
-    static std::vector<std::string> split_and_trim_line(const std::string &line);
-
-    std::string get_help() const;
-    // Gets a command or nullptr if it doesn't exists.
-    ShellCommand *get_command_by_name(const std::string &name);
-
-    const std::string m_prompt;
-    std::vector<std::unique_ptr<ShellCommand>> m_commands;
-    bool m_should_quit;
-};
-
-
-// This command starts a new subshell
-class StartSubshellCommand : public ShellCommand {
-public:
-    StartSubshellCommand(const std::string &name, const std::string &short_name,
-        const std::string &help);
-    ShellResult execute(const std::vector<std::string> &) override;
-
-    std::shared_ptr<Shell> get_shell();
-private:
-    std::shared_ptr<Shell> m_shell;
-};
-
-#endif /* _HW_DEBUG_SHELL_H_ */
\ No newline at end of file