From d6a03b33d294058419b13da61378d8e751b4ee4b Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Thu, 21 Dec 2023 20:12:34 +0000 Subject: [PATCH 1/4] refactor(cri): fast-track add containers to cache (synchronous lookup case) Signed-off-by: Melissa Kilby --- userspace/libsinsp/container_engine/cri.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/userspace/libsinsp/container_engine/cri.cpp b/userspace/libsinsp/container_engine/cri.cpp index 666f8b9e14..73174a772f 100644 --- a/userspace/libsinsp/container_engine/cri.cpp +++ b/userspace/libsinsp/container_engine/cri.cpp @@ -186,6 +186,7 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) container_id.c_str(), container.m_mesos_task_id.c_str()); } + // note: query_os_for_missing_info is set to 'true' by default if (query_os_for_missing_info) { libsinsp_logger()->format(sinsp_logger::SEV_DEBUG, @@ -243,7 +244,27 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) libsinsp_logger()->format(sinsp_logger::SEV_DEBUG, "cri_async (%s): Starting synchronous lookup", container_id.c_str()); + // lookup_sync function directly invokes the container engine specific parser `parse` done = m_async_source->lookup_sync(key, result); + // explicitly check for the most crucial retrieved value to be present + if(!result.m_image.empty()) + { + /* + * Only for synchronous lookup option (e.g. Falco's default is async not sync) + * + * Fast-track addition of enriched containers (fields successfully retrieved from the container runtime socket) + * to the container cache, bypassing the round-trip process: + * `source_callback` -> `notify_new_container` -> + * `container_to_sinsp_event(container_to_json(container_info), ...)` -> + * `parse_container_json_evt` -> `m_inspector->m_container_manager.add_container()` + * + * Although we still re-add the container in `parse_container_json_evt` to also support native 'container' events, it + * introduces an avoidable delay in the incoming syscall event stream. Syscall events do not explicitly require container + * events and instead directly retrieve container details from the container cache. This behavior could potentially + * contribute to the issues noted by adopters, such as the absence of container images in syscall events. + */ + cache->replace_container(std::make_shared(result)); + } } if (done) From 516ae9730899954b9d99a284e8184d3dd09810c2 Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Sun, 11 Feb 2024 02:40:05 +0000 Subject: [PATCH 2/4] cleanup(cri): cleanup lookup_status handling * Now more precise, logical and clean, set STARTED at the beginning and SUCCESSFUL at the end of `parse_container_json_evt` if and only if the container image was successfully retrieved and present before calling `add_container` or if it's a pod sanbox container. * In `lookup_sync` we previously set the lookup_status to SUCCESSFUL, which can be avoided to have a more consolidated and clean behavior in `parse_container_json_evt`. Signed-off-by: Melissa Kilby --- .../container_async_source.tpp | 1 - userspace/libsinsp/container_engine/cri.cpp | 24 +++++++------ userspace/libsinsp/parsers.cpp | 35 ++++++++++++------- 3 files changed, 37 insertions(+), 23 deletions(-) diff --git a/userspace/libsinsp/container_engine/container_async_source.tpp b/userspace/libsinsp/container_engine/container_async_source.tpp index c02c392da1..8b3f48ebf6 100644 --- a/userspace/libsinsp/container_engine/container_async_source.tpp +++ b/userspace/libsinsp/container_engine/container_async_source.tpp @@ -58,7 +58,6 @@ bool container_async_source::lookup(const key_type& key, template bool container_async_source::lookup_sync(const key_type& key, sinsp_container_info& value) { - value.set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); value.m_type = container_type(key); value.m_id = container_id(key); diff --git a/userspace/libsinsp/container_engine/cri.cpp b/userspace/libsinsp/container_engine/cri.cpp index 73174a772f..1ecc283afc 100644 --- a/userspace/libsinsp/container_engine/cri.cpp +++ b/userspace/libsinsp/container_engine/cri.cpp @@ -193,7 +193,6 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) "cri (%s): Performing lookup", container_id.c_str()); - container.set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); libsinsp::cgroup_limits::cgroup_limits_key key( container.m_id, tinfo->get_cgroup("cpu"), @@ -244,24 +243,29 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) libsinsp_logger()->format(sinsp_logger::SEV_DEBUG, "cri_async (%s): Starting synchronous lookup", container_id.c_str()); - // lookup_sync function directly invokes the container engine specific parser `parse` + // `lookup_sync` function directly invokes the container engine specific parser `parse` done = m_async_source->lookup_sync(key, result); - // explicitly check for the most crucial retrieved value to be present - if(!result.m_image.empty()) + if(!result.m_image.empty() || result.is_pod_sandbox()) { /* * Only for synchronous lookup option (e.g. Falco's default is async not sync) * - * Fast-track addition of enriched containers (fields successfully retrieved from the container runtime socket) - * to the container cache, bypassing the round-trip process: + * Explicitly check for the most crucial retrieved value (`m_image`) to be present before enabling the + * fast-track container add option. At this point, the container with only the cgroup (container id) was + * already added to the cache. Therefore, we can proceed to call `replace_container`. + * + * Bypassing the round-trip process: * `source_callback` -> `notify_new_container` -> * `container_to_sinsp_event(container_to_json(container_info), ...)` -> * `parse_container_json_evt` -> `m_inspector->m_container_manager.add_container()` * - * Although we still re-add the container in `parse_container_json_evt` to also support native 'container' events, it - * introduces an avoidable delay in the incoming syscall event stream. Syscall events do not explicitly require container - * events and instead directly retrieve container details from the container cache. This behavior could potentially - * contribute to the issues noted by adopters, such as the absence of container images in syscall events. + * In `parse_container_json_evt`, we still re-add the container to support native 'container' events + * and new container callbacks that may expect the container as JSON in the artificial sinsp evt. + * However, we can avoid delays by storing the container struct in the container cache now. + * This is beneficial because syscall events do not explicitly require container events, instead, + * they directly retrieve container details from the container cache. This new feature can mitigate + * issues noted by adopters, such as the absence of container images in syscall events even when + * disabling async lookups. */ cache->replace_container(std::make_shared(result)); } diff --git a/userspace/libsinsp/parsers.cpp b/userspace/libsinsp/parsers.cpp index d2d423b367..4170a7eda4 100644 --- a/userspace/libsinsp/parsers.cpp +++ b/userspace/libsinsp/parsers.cpp @@ -5078,22 +5078,11 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) const Json::Value& lookup_state = container["lookup_state"]; if(check_json_val_is_convertible(lookup_state, Json::uintValue, "lookup_state")) { - container_info->set_lookup_status(static_cast(lookup_state.asUInt())); - switch(container_info->get_lookup_status()) - { - case sinsp_container_lookup::state::STARTED: - case sinsp_container_lookup::state::SUCCESSFUL: - case sinsp_container_lookup::state::FAILED: - break; - default: - container_info->set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); - } - // state == STARTED doesn't make sense in a scap file // as there's no actual lookup that would ever finish if(!evt->get_tinfo_ref() && container_info->get_lookup_status() == sinsp_container_lookup::state::STARTED) { - SINSP_DEBUG("Rewriting lookup_state = STARTED from scap file to FAILED for container %s", + SINSP_DEBUG("Rewriting lookup_state == STARTED from scap file to FAILED for container %s", container_info->m_id.c_str()); container_info->set_lookup_status(sinsp_container_lookup::state::FAILED); } @@ -5239,6 +5228,28 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) } evt->set_tinfo_ref(container_info->get_tinfo(m_inspector)); evt->set_tinfo(evt->get_tinfo_ref().get()); + container_info->set_lookup_status(static_cast(lookup_state.asUInt())); + switch(container_info->get_lookup_status()) + { + // Preserve case where lookup_state == STARTED from scap file was set to FAILED + // or otherwise set to false. + case sinsp_container_lookup::state::FAILED: + break; + default: + { + if (!container_info->m_image.empty() || container_info->is_pod_sandbox()) + { + // Marked as SUCCESSFUL if and only if the container image field present or + // if it's a pod sandbox container + container_info->set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); + } else + { + container_info->set_lookup_status(sinsp_container_lookup::state::FAILED); + } + } + } + // `add_container` also triggers the callbacks, therefore always continue even if the + // lookup_status was set to FAILED m_inspector->m_container_manager.add_container(container_info, evt->get_thread_info(true)); /* SINSP_STR_DEBUG("Container\n-------\nID:" + container_info.m_id + From 6c0da857ed978dd72cc6c00b215d446cc3f8a7d6 Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Sat, 24 Feb 2024 21:45:16 +0000 Subject: [PATCH 3/4] fix(cri): properly handle state assignments in new fast-track CRI container sync lookups Co-authored-by: Roberto Scolaro Signed-off-by: Melissa Kilby --- userspace/libsinsp/container_engine/cri.cpp | 4 ++++ userspace/libsinsp/parsers.cpp | 16 +++++++++++----- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/userspace/libsinsp/container_engine/cri.cpp b/userspace/libsinsp/container_engine/cri.cpp index 1ecc283afc..0a80afb6e7 100644 --- a/userspace/libsinsp/container_engine/cri.cpp +++ b/userspace/libsinsp/container_engine/cri.cpp @@ -245,6 +245,9 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) container_id.c_str()); // `lookup_sync` function directly invokes the container engine specific parser `parse` done = m_async_source->lookup_sync(key, result); + // note: The container image is the most crucial field from a security incident response perspective. + // We aim to raise the bar for successful container lookups. Conversely, pod sandboxes do not include + // a container image in the API response. if(!result.m_image.empty() || result.is_pod_sandbox()) { /* @@ -267,6 +270,7 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) * issues noted by adopters, such as the absence of container images in syscall events even when * disabling async lookups. */ + result.set_lookup_status(sinsp_container_lookup::state::STARTED); cache->replace_container(std::make_shared(result)); } } diff --git a/userspace/libsinsp/parsers.cpp b/userspace/libsinsp/parsers.cpp index 4170a7eda4..bb57f19475 100644 --- a/userspace/libsinsp/parsers.cpp +++ b/userspace/libsinsp/parsers.cpp @@ -5221,11 +5221,6 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) } } - if(!container_info->is_successful()) - { - SINSP_DEBUG("Filtering container event for failed lookup of %s (but calling callbacks anyway)", container_info->m_id.c_str()); - evt->set_filtered_out(true); - } evt->set_tinfo_ref(container_info->get_tinfo(m_inspector)); evt->set_tinfo(evt->get_tinfo_ref().get()); container_info->set_lookup_status(static_cast(lookup_state.asUInt())); @@ -5237,6 +5232,9 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) break; default: { + // note: The container image is the most crucial field from a security incident response perspective. + // We aim to raise the bar for successful container lookups. Conversely, pod sandboxes do not include + // a container image in the API response. if (!container_info->m_image.empty() || container_info->is_pod_sandbox()) { // Marked as SUCCESSFUL if and only if the container image field present or @@ -5248,6 +5246,14 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) } } } + + // note: Important, only set event filtering flag after all final state updates are done + if(!container_info->is_successful()) + { + SINSP_DEBUG("Filtering container event for failed lookup of %s (but calling callbacks anyway)", container_info->m_id.c_str()); + evt->set_filtered_out(true); + } + // `add_container` also triggers the callbacks, therefore always continue even if the // lookup_status was set to FAILED m_inspector->m_container_manager.add_container(container_info, evt->get_thread_info(true)); From 9646c877a3d2525255cff7ceefafb4245dae30fd Mon Sep 17 00:00:00 2001 From: Melissa Kilby Date: Mon, 26 Feb 2024 19:07:38 +0000 Subject: [PATCH 4/4] cleanup: revert any changes to parse_container_json_evt + ensure existing state updates in lookup_sync are preserved Signed-off-by: Melissa Kilby --- .../container_async_source.tpp | 1 + userspace/libsinsp/container_engine/cri.cpp | 4 ++ userspace/libsinsp/parsers.cpp | 45 ++++++------------- 3 files changed, 19 insertions(+), 31 deletions(-) diff --git a/userspace/libsinsp/container_engine/container_async_source.tpp b/userspace/libsinsp/container_engine/container_async_source.tpp index 8b3f48ebf6..c02c392da1 100644 --- a/userspace/libsinsp/container_engine/container_async_source.tpp +++ b/userspace/libsinsp/container_engine/container_async_source.tpp @@ -58,6 +58,7 @@ bool container_async_source::lookup(const key_type& key, template bool container_async_source::lookup_sync(const key_type& key, sinsp_container_info& value) { + value.set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); value.m_type = container_type(key); value.m_id = container_id(key); diff --git a/userspace/libsinsp/container_engine/cri.cpp b/userspace/libsinsp/container_engine/cri.cpp index 0a80afb6e7..3b8d1a3335 100644 --- a/userspace/libsinsp/container_engine/cri.cpp +++ b/userspace/libsinsp/container_engine/cri.cpp @@ -271,7 +271,11 @@ bool cri::resolve(sinsp_threadinfo *tinfo, bool query_os_for_missing_info) * disabling async lookups. */ result.set_lookup_status(sinsp_container_lookup::state::STARTED); + // note: The cache should not have SUCCESSFUL as lookup status at this point, else `parse_container_json_evt` would wrongly exit early. cache->replace_container(std::make_shared(result)); + // note: On the other hand `parse_container_json_evt` expects SUCCESSFUL as lookup state for the incoming container event / + // the not yet cached container, exactly how it was done within `lookup_sync`. + result.set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); } } diff --git a/userspace/libsinsp/parsers.cpp b/userspace/libsinsp/parsers.cpp index bb57f19475..d2d423b367 100644 --- a/userspace/libsinsp/parsers.cpp +++ b/userspace/libsinsp/parsers.cpp @@ -5078,11 +5078,22 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) const Json::Value& lookup_state = container["lookup_state"]; if(check_json_val_is_convertible(lookup_state, Json::uintValue, "lookup_state")) { + container_info->set_lookup_status(static_cast(lookup_state.asUInt())); + switch(container_info->get_lookup_status()) + { + case sinsp_container_lookup::state::STARTED: + case sinsp_container_lookup::state::SUCCESSFUL: + case sinsp_container_lookup::state::FAILED: + break; + default: + container_info->set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); + } + // state == STARTED doesn't make sense in a scap file // as there's no actual lookup that would ever finish if(!evt->get_tinfo_ref() && container_info->get_lookup_status() == sinsp_container_lookup::state::STARTED) { - SINSP_DEBUG("Rewriting lookup_state == STARTED from scap file to FAILED for container %s", + SINSP_DEBUG("Rewriting lookup_state = STARTED from scap file to FAILED for container %s", container_info->m_id.c_str()); container_info->set_lookup_status(sinsp_container_lookup::state::FAILED); } @@ -5221,41 +5232,13 @@ void sinsp_parser::parse_container_json_evt(sinsp_evt *evt) } } - evt->set_tinfo_ref(container_info->get_tinfo(m_inspector)); - evt->set_tinfo(evt->get_tinfo_ref().get()); - container_info->set_lookup_status(static_cast(lookup_state.asUInt())); - switch(container_info->get_lookup_status()) - { - // Preserve case where lookup_state == STARTED from scap file was set to FAILED - // or otherwise set to false. - case sinsp_container_lookup::state::FAILED: - break; - default: - { - // note: The container image is the most crucial field from a security incident response perspective. - // We aim to raise the bar for successful container lookups. Conversely, pod sandboxes do not include - // a container image in the API response. - if (!container_info->m_image.empty() || container_info->is_pod_sandbox()) - { - // Marked as SUCCESSFUL if and only if the container image field present or - // if it's a pod sandbox container - container_info->set_lookup_status(sinsp_container_lookup::state::SUCCESSFUL); - } else - { - container_info->set_lookup_status(sinsp_container_lookup::state::FAILED); - } - } - } - - // note: Important, only set event filtering flag after all final state updates are done if(!container_info->is_successful()) { SINSP_DEBUG("Filtering container event for failed lookup of %s (but calling callbacks anyway)", container_info->m_id.c_str()); evt->set_filtered_out(true); } - - // `add_container` also triggers the callbacks, therefore always continue even if the - // lookup_status was set to FAILED + evt->set_tinfo_ref(container_info->get_tinfo(m_inspector)); + evt->set_tinfo(evt->get_tinfo_ref().get()); m_inspector->m_container_manager.add_container(container_info, evt->get_thread_info(true)); /* SINSP_STR_DEBUG("Container\n-------\nID:" + container_info.m_id +