diff --git a/plugins/anomalydetection/src/num/cms.h b/plugins/anomalydetection/src/num/cms.h index d89eddca..942175ae 100644 --- a/plugins/anomalydetection/src/num/cms.h +++ b/plugins/anomalydetection/src/num/cms.h @@ -43,10 +43,34 @@ class cms double eps_; // Relative error (e.g. 0.0001) public: + static uint64_t calculate_d_rows_from_gamma(double gamma) + { + // -> determine Rows / number of hash functions + return static_cast(std::ceil(std::log(1.0 / gamma))); + } + + static double calculate_gamma_rows_from_d(uint64_t d) + { + // -> reverse calculate error probability from Rows / number of hash functions + return 1.0 / std::exp(d); + } + + static uint64_t calculate_w_cols_buckets_from_eps(double eps) + { + // -> determine Cols / number of buckets + return static_cast(std::ceil(std::exp(1) / eps)); + } + + static double calculate_eps_cols_buckets_from_w(uint64_t w) + { + // -> reverse calculate relative error from Cols / number of buckets + return std::exp(1) / w; + } + cms(double gamma, double eps) { - d_ = static_cast(std::ceil(std::log(1.0 / gamma))); // -> determine Rows / number of hash functions - w_ = static_cast(std::ceil(std::exp(1) / eps)); // -> determine Cols / number of buckets + d_ = calculate_d_rows_from_gamma(gamma); // -> determine Rows / number of hash functions + w_ = calculate_w_cols_buckets_from_eps(eps); // -> determine Cols / number of buckets gamma_ = gamma; eps_ = eps; sketch = std::make_unique[]>(d_); @@ -62,8 +86,8 @@ class cms { d_ = d; w_ = w; - gamma_ = 1.0 / std::exp(d); // -> reverse calculate error probability from Rows / number of hash functions - eps_ = std::exp(1) / w; // -> reverse calculate relative error from Cols / number of buckets + gamma_ = calculate_gamma_rows_from_d(d); // -> reverse calculate error probability from Rows / number of hash functions + eps_ = calculate_eps_cols_buckets_from_w(w); // -> reverse calculate relative error from Cols / number of buckets sketch = std::make_unique[]>(d_); for (uint64_t i = 0; i < d_; ++i) { @@ -149,6 +173,11 @@ class cms return d_ * w_ * sizeof(T); } + static size_t get_size_bytes(uint64_t d, uint64_t w) + { + return d * w * sizeof(T); + } + std::pair get_dimensions() const { return std::make_pair(d_, w_); diff --git a/plugins/anomalydetection/src/plugin.cpp b/plugins/anomalydetection/src/plugin.cpp index 36442a22..2ff02fbb 100644 --- a/plugins/anomalydetection/src/plugin.cpp +++ b/plugins/anomalydetection/src/plugin.cpp @@ -136,13 +136,25 @@ void anomalydetection::parse_init_config(nlohmann::json& config_json) auto gamma_eps_pointer = nlohmann::json::json_pointer("/count_min_sketch/gamma_eps"); if (config_json.contains(gamma_eps_pointer) && config_json[gamma_eps_pointer].is_array()) { + int i = 0; for (const auto& array : config_json[gamma_eps_pointer]) { if (array.is_array() && array.size() == 2) { std::vector sub_array = {array[0].get(), array[1].get()}; - m_gamma_eps.emplace_back(sub_array); + log_error("Count min sketch data structure number (" + + std::to_string(i+1) + ") loaded with gamma and eps values (" + + std::to_string(sub_array[0]) + "," + + std::to_string(sub_array[1]) + + ") equivalent to sketch dimensions (" + + std::to_string(plugin::anomalydetection::num::cms::calculate_d_rows_from_gamma(sub_array[0])) + "," + + std::to_string(plugin::anomalydetection::num::cms::calculate_w_cols_buckets_from_eps(sub_array[1])) + + ") -> adding (" + + std::to_string(plugin::anomalydetection::num::cms::get_size_bytes(plugin::anomalydetection::num::cms::calculate_d_rows_from_gamma(sub_array[0]),plugin::anomalydetection::num::cms::calculate_w_cols_buckets_from_eps(sub_array[1]))) + + ") bytes of constant memory allocation on the heap"); + m_gamma_eps.emplace_back(sub_array); } + i++; } } @@ -150,13 +162,29 @@ void anomalydetection::parse_init_config(nlohmann::json& config_json) auto rows_cols_pointer = nlohmann::json::json_pointer("/count_min_sketch/rows_cols"); if (config_json.contains(rows_cols_pointer) && config_json[rows_cols_pointer].is_array()) { + int i = 0; + if (config_json.contains(gamma_eps_pointer) && config_json[gamma_eps_pointer].is_array()) + { + log_error("[Override Notice] Count min sketch data structures will be overriden with below settings as 'rows_cols' config overrides any previous setting"); + } for (const auto& array : config_json[rows_cols_pointer]) { if (array.is_array() && array.size() == 2) { std::vector sub_array = {array[0].get(), array[1].get()}; + log_error("Count min sketch data structure number (" + + std::to_string(i+1) + ") loaded with d and w/buckets values (" + + std::to_string(sub_array[0]) + "," + + std::to_string(sub_array[1]) + + ") equivalent to sketch error probability and relative error tolerances (" + + std::to_string(plugin::anomalydetection::num::cms::calculate_gamma_rows_from_d(sub_array[0])) + "," + + std::to_string(plugin::anomalydetection::num::cms::calculate_eps_cols_buckets_from_w(sub_array[1])) + + ") -> adding (" + + std::to_string(plugin::anomalydetection::num::cms::get_size_bytes(sub_array[0],sub_array[1])) + + ") bytes of constant memory allocation on the heap"); m_rows_cols.emplace_back(sub_array); } + i++; } } @@ -175,6 +203,19 @@ void anomalydetection::parse_init_config(nlohmann::json& config_json) PPME_SYSCALL_OPENAT2_X, PPME_SYSCALL_OPEN_BY_HANDLE_AT_X }; + std::vector supported_codes_any_profile = { + PPME_SYSCALL_EXECVEAT_X, + PPME_SYSCALL_EXECVE_19_X, + PPME_SYSCALL_CLONE_20_X, + PPME_SYSCALL_CLONE3_X + }; + supported_codes_any_profile.insert( + supported_codes_any_profile.end(), + supported_codes_fd_profile.begin(), + supported_codes_fd_profile.end() + ); + + int n = 1; for (const auto& profile : behavior_profiles) { std::vector filter_check_fields; @@ -182,26 +223,46 @@ void anomalydetection::parse_init_config(nlohmann::json& config_json) if (profile.contains("fields") && profile.contains("event_codes")) { filter_check_fields = plugin_anomalydetection::utils::get_profile_fields(profile["fields"].get()); + std::ostringstream oss; + bool first_event_code = true; for (const auto& code : profile["event_codes"]) { codes.insert((ppm_event_code)code.get()); + if (!first_event_code) + { + oss << ","; + } + oss << code; + first_event_code = false; } + std::string event_codes_string = oss.str(); + log_error("Behavior profile number (" + std::to_string(n) + ") loaded and applied to event_codes (" + event_codes_string + ") with behavior profile fields (" + profile["fields"].get() + ")"); + /* Some rudimentary initial checks to ensure profiles with %fd fields are applied on fd related events only */ if (profile["fields"].get().find("%fd") != std::string::npos) { for (const auto& code : codes) { - if (std::find(supported_codes_fd_profile .begin(), supported_codes_fd_profile .end(), code) == supported_codes_fd_profile .end()) + if (std::find(supported_codes_fd_profile.begin(), supported_codes_fd_profile.end(), code) == supported_codes_fd_profile.end()) { - log_error("Current behavior profile: " + profile["fields"].get()); - log_error("The above behavior profile contains '%fd' related fields for non fd related event codes, read the docs for help, exiting..."); + log_error("The above behavior profile contains '%fd' related fields but includes non fd related event codes such as code (" + std::to_string(code) + "), which is not allowed. Please refer to the docs for assistance, exiting..."); exit(1); } } } + /* Some rudimentary checks to generally limit the event codes to a subset of supported event codes. */ + for (const auto& code : codes) + { + if (std::find(supported_codes_any_profile.begin(), supported_codes_any_profile.end(), code) == supported_codes_any_profile.end()) + { + log_error("The above behavior profile contains event codes such as code (" + std::to_string(code) + ") that are currently not at all allowed for behavior profiles. Please refer to the docs for assistance, exiting..."); + exit(1); + } + } } m_behavior_profiles_fields.emplace_back(filter_check_fields); m_behavior_profiles_event_codes.emplace_back(std::move(codes)); + n++; } }