From f2fab0a4d693bddfe814d9d0923d68e63f828f58 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Fri, 22 Nov 2024 13:49:35 -0700 Subject: [PATCH] ocp-nvme: Add a different formatting for JSON output The current OCP JSON format for the SMART extended log page is not condusive to metric collection via tools like Prometheus. So we add a new output mode that uses all lower case and underscores (instead of spaces). This should help with metric collection. At the same time we clean up some of the field names. Note documentation has NOT yet been updated. Fixes #2577. Signed-off-by: Stephen Bates --- plugins/ocp/ocp-print-json.c | 121 ++++++++++++++++++++++++++- plugins/ocp/ocp-print-stdout.c | 2 +- plugins/ocp/ocp-print.c | 4 +- plugins/ocp/ocp-print.h | 4 +- plugins/ocp/ocp-smart-extended-log.c | 9 +- 5 files changed, 131 insertions(+), 9 deletions(-) diff --git a/plugins/ocp/ocp-print-json.c b/plugins/ocp/ocp-print-json.c index d32ed99180..61c732b6ee 100644 --- a/plugins/ocp/ocp-print-json.c +++ b/plugins/ocp/ocp-print-json.c @@ -136,7 +136,7 @@ static void json_fw_activation_history(const struct fw_activation_history *fw_hi printf("\n"); } -static void json_smart_extended_log(void *data) +static void json_smart_extended_log_legacy(void *data) { struct json_object *root; struct json_object *pmuw; @@ -248,6 +248,125 @@ static void json_smart_extended_log(void *data) json_free_object(root); } +static void json_smart_extended_log_new(void *data) +{ + struct json_object *root; + struct json_object *pmuw; + struct json_object *pmur; + uint16_t smart_log_ver = 0; + __u8 *log_data = data; + char guid[40]; + + root = json_create_object(); + pmuw = json_create_object(); + pmur = json_create_object(); + + json_object_add_value_uint64(pmuw, "hi", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW + 8] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_uint64(pmuw, "lo", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_object(root, "physical_media_units_written", pmuw); + json_object_add_value_uint64(pmur, "hi", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR + 8] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_uint64(pmur, "lo", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_object(root, "physical_media_units_read", pmur); + json_object_add_value_uint64(root, "bad_user_nand_blocks_raw", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BUNBR] & 0x0000FFFFFFFFFFFF)); + json_object_add_value_uint(root, "bad_user_nand_blocks_normalized", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BUNBN])); + json_object_add_value_uint64(root, "bad_system_nand_blocks_raw", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BSNBR] & 0x0000FFFFFFFFFFFF)); + json_object_add_value_uint(root, "bad_system_nand_blocks_normalized", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BSNBN])); + json_object_add_value_uint64(root, "xor_recovery_count", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_XRC])); + json_object_add_value_uint64(root, "uncorrectable_read_errors", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UREC])); + json_object_add_value_uint64(root, "soft_ecc_error_count", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SEEC])); + json_object_add_value_uint(root, "end_to_end_detected_errors", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EEDC])); + json_object_add_value_uint(root, "end_to_end_corrected_errors", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EECE])); + json_object_add_value_uint(root, "system_data_percent_used", + (__u8)log_data[SCAO_SDPU]); + json_object_add_value_uint64(root, "refresh_count", + (uint64_t)(le64_to_cpu(*(uint64_t *)&log_data[SCAO_RFSC]) & 0x00FFFFFFFFFFFFFF)); + json_object_add_value_uint(root, "max_user_data_erase_count", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MXUDEC])); + json_object_add_value_uint(root, "min_user_data_erase_count", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MNUDEC])); + json_object_add_value_uint(root, "thermal_throttling_events", + (__u8)log_data[SCAO_NTTE]); + json_object_add_value_uint(root, "current_throttling_status", + (__u8)log_data[SCAO_CTS]); + json_object_add_value_uint64(root, "pcie_correctable_errors", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PCEC])); + json_object_add_value_uint(root, "incomplete_shutdowns", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_ICS])); + json_object_add_value_uint(root, "percent_free_blocks", + (__u8)log_data[SCAO_PFB]); + json_object_add_value_uint(root, "capacitor_health", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_CPH])); + json_object_add_value_uint64(root, "unaligned_io", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UIO])); + json_object_add_value_uint64(root, "security_version_number", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SVN])); + json_object_add_value_uint64(root, "nuse_namespace_utilization", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_NUSE])); + json_object_add_value_uint128(root, "plp_start_count", + le128_to_cpu(&log_data[SCAO_PSC])); + json_object_add_value_uint128(root, "endurance_estimate", + le128_to_cpu(&log_data[SCAO_EEST])); + smart_log_ver = (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_LPV]); + + json_object_add_value_uint(root, "log_page_version", smart_log_ver); + + memset((void *)guid, 0, 40); + sprintf((char *)guid, "0x%"PRIx64"%"PRIx64"", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG + 8]), + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG])); + json_object_add_value_string(root, "log_page_guid", guid); + + switch (smart_log_ver) { + case 0 ... 1: + break; + default: + case 4: + json_object_add_value_uint(root, "nvme_command_set_errata_version", + (__u8)log_data[SCAO_NCSEV]); + json_object_add_value_uint(root, "lowest_permitted_firmware_revision", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC])); + fallthrough; + case 2 ... 3: + json_object_add_value_uint(root, "errata_version_field", + (__u8)log_data[SCAO_EVF]); + json_object_add_value_uint(root, "point_version_field", + le16_to_cpu(*(uint16_t *)&log_data[SCAO_PVF])); + json_object_add_value_uint(root, "minor_version_field", + le16_to_cpu(*(uint16_t *)&log_data[SCAO_MIVF])); + json_object_add_value_uint(root, "major_version_field", + (__u8)log_data[SCAO_MAVF]); + json_object_add_value_uint(root, "nvme_base_errata_version", + (__u8)log_data[SCAO_NBEV]); + json_object_add_value_uint(root, "pcie_link_retraining_count", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PLRC])); + json_object_add_value_uint(root, "power_state_change_count", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC])); + } + json_print_object(root, NULL); + printf("\n"); + json_free_object(root); +} + +static void json_smart_extended_log(void *data, bool legacy) +{ + if (legacy) + json_smart_extended_log_legacy(data); + else + json_smart_extended_log_new(data); +} static void json_telemetry_log(struct ocp_telemetry_parse_options *options) { print_ocp_telemetry_json(options); diff --git a/plugins/ocp/ocp-print-stdout.c b/plugins/ocp/ocp-print-stdout.c index 1de237d741..7c4d27ec63 100644 --- a/plugins/ocp/ocp-print-stdout.c +++ b/plugins/ocp/ocp-print-stdout.c @@ -98,7 +98,7 @@ static void stdout_fw_activation_history(const struct fw_activation_history *fw_ printf("\n"); } -static void stdout_smart_extended_log(void *data) +static void stdout_smart_extended_log(void *data, bool ignored) { uint16_t smart_log_ver = 0; __u8 *log_data = data; diff --git a/plugins/ocp/ocp-print.c b/plugins/ocp/ocp-print.c index 916c653d4a..62e6a983c0 100644 --- a/plugins/ocp/ocp-print.c +++ b/plugins/ocp/ocp-print.c @@ -36,9 +36,9 @@ void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_pri ocp_print(fw_act_history, flags, fw_history); } -void ocp_smart_extended_log(void *data, nvme_print_flags_t flags) +void ocp_smart_extended_log(void *data, bool legacy, nvme_print_flags_t flags) { - ocp_print(smart_extended_log, flags, data); + ocp_print(smart_extended_log, flags, data, legacy); } void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags) diff --git a/plugins/ocp/ocp-print.h b/plugins/ocp/ocp-print.h index 85655b9340..e2fe8ed912 100644 --- a/plugins/ocp/ocp-print.h +++ b/plugins/ocp/ocp-print.h @@ -10,7 +10,7 @@ struct ocp_print_ops { void (*hwcomp_log)(struct hwcomp_log *log, __u32 id, bool list); void (*fw_act_history)(const struct fw_activation_history *fw_history); - void (*smart_extended_log)(void *data); + void (*smart_extended_log)(void *data, bool legacy); void (*telemetry_log)(struct ocp_telemetry_parse_options *options); void (*c3_log)(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data); void (*c5_log)(struct nvme_dev *dev, struct unsupported_requirement_log *log_data); @@ -36,7 +36,7 @@ static inline struct ocp_print_ops *ocp_get_json_print_ops(nvme_print_flags_t fl void ocp_show_hwcomp_log(struct hwcomp_log *log, __u32 id, bool list, nvme_print_flags_t flags); void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_print_flags_t flags); -void ocp_smart_extended_log(void *data, nvme_print_flags_t flags); +void ocp_smart_extended_log(void *data, bool legacy, nvme_print_flags_t flags); void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags); void ocp_c3_log(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data, nvme_print_flags_t flags); diff --git a/plugins/ocp/ocp-smart-extended-log.c b/plugins/ocp/ocp-smart-extended-log.c index 5f84191efa..60035954ae 100644 --- a/plugins/ocp/ocp-smart-extended-log.c +++ b/plugins/ocp/ocp-smart-extended-log.c @@ -26,7 +26,7 @@ static __u8 scao_guid[GUID_LEN] = { 0xC9, 0x14, 0xD5, 0xAF }; -static int get_c0_log_page(int fd, char *format) +static int get_c0_log_page(int fd, char *format, bool legacy) { nvme_print_flags_t fmt; __u8 *data; @@ -76,7 +76,7 @@ static int get_c0_log_page(int fd, char *format) } /* print the data */ - ocp_smart_extended_log(data, fmt); + ocp_smart_extended_log(data, legacy, fmt); } else { fprintf(stderr, "ERROR : OCP : Unable to read C0 data from buffer\n"); } @@ -95,14 +95,17 @@ int ocp_smart_add_log(int argc, char **argv, struct command *cmd, struct config { char *output_format; + bool non_legacy; }; struct config cfg = { .output_format = "normal", + .non_legacy = false, }; OPT_ARGS(opts) = { OPT_FMT("output-format", 'o', &cfg.output_format, "output Format: normal|json"), + OPT_FLAG("non-legacy", 'b', &cfg.non_legacy, "--non-legacy"), OPT_END() }; @@ -110,7 +113,7 @@ int ocp_smart_add_log(int argc, char **argv, struct command *cmd, if (ret) return ret; - ret = get_c0_log_page(dev_fd(dev), cfg.output_format); + ret = get_c0_log_page(dev_fd(dev), cfg.output_format, !cfg.non_legacy); if (ret) fprintf(stderr, "ERROR : OCP : Failure reading the C0 Log Page, ret = %d\n", ret);