From 6a73c83b3c07c8a03b55cee86ff7457722037c79 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Fri, 22 Nov 2024 13:49:35 -0700 Subject: [PATCH] ocp-nvme: Add a different formatting for JSON output The current OCP JSON format for the SMART extended log page is not condusive to metric collection via tools like Prometheus. So we add a new output mode that uses all lower case and underscores (instead of spaces). This should help with metric collection. At the same time we clean up some of the field names. We add a new argument (--output-format-version) to allow us to select which output version we want. Documentation updated to reflect this change and mark this as experimental. Fixes #2577. Signed-off-by: Stephen Bates --- Documentation/nvme-ocp-smart-add-log.txt | 10 +- plugins/ocp/ocp-print-json.c | 125 ++++++++++++++++++++++- plugins/ocp/ocp-print-stdout.c | 2 +- plugins/ocp/ocp-print.c | 4 +- plugins/ocp/ocp-print.h | 4 +- plugins/ocp/ocp-smart-extended-log.c | 11 +- 6 files changed, 146 insertions(+), 10 deletions(-) diff --git a/Documentation/nvme-ocp-smart-add-log.txt b/Documentation/nvme-ocp-smart-add-log.txt index 66a55a90ec..97192d6940 100644 --- a/Documentation/nvme-ocp-smart-add-log.txt +++ b/Documentation/nvme-ocp-smart-add-log.txt @@ -9,7 +9,7 @@ compliant device SYNOPSIS -------- [verse] -'nvme ocp smart-add-log' [--output-format= | -o ] +'nvme ocp smart-add-log' [--output-format= | -o ] [--output-format-version=] DESCRIPTION ----------- @@ -22,6 +22,10 @@ device (ex: /dev/nvme0) or block device (ex: /dev/nvme0n1). This will only work on OCP compliant devices supporting this feature. Results for any other device are undefined. +EXPERIMENTAL. The --output-format-version can be set to 2 to generate field names +for the outputs that are easier to process via scripts. Note this is +experimental and the field names are subject to change. + On success it returns 0, error code otherwise. OPTIONS @@ -31,6 +35,10 @@ OPTIONS Set the reporting format to 'normal' or 'json'. Only one output format can be used at a time. The default is normal. +--output-format-version=:: + Set the field labels in the reporting format to either '1' + (the original) or '2'. The default is 1. Note this is experimental. + EXAMPLES -------- * Has the program issue a smart-add-log command to retrieve the 0xC0 log page. diff --git a/plugins/ocp/ocp-print-json.c b/plugins/ocp/ocp-print-json.c index e62dfb3ab8..17789cbd44 100644 --- a/plugins/ocp/ocp-print-json.c +++ b/plugins/ocp/ocp-print-json.c @@ -136,7 +136,7 @@ static void json_fw_activation_history(const struct fw_activation_history *fw_hi printf("\n"); } -static void json_smart_extended_log(void *data) +static void json_smart_extended_log_v1(void *data) { struct json_object *root; struct json_object *pmuw; @@ -248,6 +248,129 @@ static void json_smart_extended_log(void *data) json_free_object(root); } +static void json_smart_extended_log_v2(void *data) +{ + struct json_object *root; + struct json_object *pmuw; + struct json_object *pmur; + uint16_t smart_log_ver = 0; + __u8 *log_data = data; + char guid[40]; + + root = json_create_object(); + pmuw = json_create_object(); + pmur = json_create_object(); + + json_object_add_value_uint64(pmuw, "hi", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW + 8] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_uint64(pmuw, "lo", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_object(root, "physical_media_units_written", pmuw); + json_object_add_value_uint64(pmur, "hi", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR + 8] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_uint64(pmur, "lo", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR] & 0xFFFFFFFFFFFFFFFF)); + json_object_add_value_object(root, "physical_media_units_read", pmur); + json_object_add_value_uint64(root, "bad_user_nand_blocks_raw", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BUNBR] & 0x0000FFFFFFFFFFFF)); + json_object_add_value_uint(root, "bad_user_nand_blocks_normalized", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BUNBN])); + json_object_add_value_uint64(root, "bad_system_nand_blocks_raw", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BSNBR] & 0x0000FFFFFFFFFFFF)); + json_object_add_value_uint(root, "bad_system_nand_blocks_normalized", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BSNBN])); + json_object_add_value_uint64(root, "xor_recovery_count", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_XRC])); + json_object_add_value_uint64(root, "uncorrectable_read_errors", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UREC])); + json_object_add_value_uint64(root, "soft_ecc_error_count", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SEEC])); + json_object_add_value_uint(root, "end_to_end_detected_errors", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EEDC])); + json_object_add_value_uint(root, "end_to_end_corrected_errors", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EECE])); + json_object_add_value_uint(root, "system_data_percent_used", + (__u8)log_data[SCAO_SDPU]); + json_object_add_value_uint64(root, "refresh_count", + (uint64_t)(le64_to_cpu(*(uint64_t *)&log_data[SCAO_RFSC]) & 0x00FFFFFFFFFFFFFF)); + json_object_add_value_uint(root, "max_user_data_erase_count", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MXUDEC])); + json_object_add_value_uint(root, "min_user_data_erase_count", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MNUDEC])); + json_object_add_value_uint(root, "thermal_throttling_events", + (__u8)log_data[SCAO_NTTE]); + json_object_add_value_uint(root, "current_throttling_status", + (__u8)log_data[SCAO_CTS]); + json_object_add_value_uint64(root, "pcie_correctable_errors", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PCEC])); + json_object_add_value_uint(root, "incomplete_shutdowns", + (uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_ICS])); + json_object_add_value_uint(root, "percent_free_blocks", + (__u8)log_data[SCAO_PFB]); + json_object_add_value_uint(root, "capacitor_health", + (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_CPH])); + json_object_add_value_uint64(root, "unaligned_io", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UIO])); + json_object_add_value_uint64(root, "security_version_number", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SVN])); + json_object_add_value_uint64(root, "nuse_namespace_utilization", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_NUSE])); + json_object_add_value_uint128(root, "plp_start_count", + le128_to_cpu(&log_data[SCAO_PSC])); + json_object_add_value_uint128(root, "endurance_estimate", + le128_to_cpu(&log_data[SCAO_EEST])); + smart_log_ver = (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_LPV]); + + json_object_add_value_uint(root, "log_page_version", smart_log_ver); + + memset((void *)guid, 0, 40); + sprintf((char *)guid, "0x%"PRIx64"%"PRIx64"", + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG + 8]), + (uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG])); + json_object_add_value_string(root, "log_page_guid", guid); + + switch (smart_log_ver) { + case 0 ... 1: + break; + default: + case 4: + json_object_add_value_uint(root, "nvme_command_set_errata_version", + (__u8)log_data[SCAO_NCSEV]); + json_object_add_value_uint(root, "lowest_permitted_firmware_revision", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC])); + fallthrough; + case 2 ... 3: + json_object_add_value_uint(root, "errata_version_field", + (__u8)log_data[SCAO_EVF]); + json_object_add_value_uint(root, "point_version_field", + le16_to_cpu(*(uint16_t *)&log_data[SCAO_PVF])); + json_object_add_value_uint(root, "minor_version_field", + le16_to_cpu(*(uint16_t *)&log_data[SCAO_MIVF])); + json_object_add_value_uint(root, "major_version_field", + (__u8)log_data[SCAO_MAVF]); + json_object_add_value_uint(root, "nvme_base_errata_version", + (__u8)log_data[SCAO_NBEV]); + json_object_add_value_uint(root, "pcie_link_retraining_count", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PLRC])); + json_object_add_value_uint(root, "power_state_change_count", + le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC])); + } + json_print_object(root, NULL); + printf("\n"); + json_free_object(root); +} + +static void json_smart_extended_log(void *data, unsigned int version) +{ + switch (version) { + default: + case 1: + json_smart_extended_log_v1(data); + break; + case 2: + json_smart_extended_log_v2(data); + } +} static void json_telemetry_log(struct ocp_telemetry_parse_options *options) { print_ocp_telemetry_json(options); diff --git a/plugins/ocp/ocp-print-stdout.c b/plugins/ocp/ocp-print-stdout.c index f0bd78f790..f86c1b8428 100644 --- a/plugins/ocp/ocp-print-stdout.c +++ b/plugins/ocp/ocp-print-stdout.c @@ -98,7 +98,7 @@ static void stdout_fw_activation_history(const struct fw_activation_history *fw_ printf("\n"); } -static void stdout_smart_extended_log(void *data) +static void stdout_smart_extended_log(void *data, unsigned int version) { uint16_t smart_log_ver = 0; __u8 *log_data = data; diff --git a/plugins/ocp/ocp-print.c b/plugins/ocp/ocp-print.c index 916c653d4a..eb6c4d137c 100644 --- a/plugins/ocp/ocp-print.c +++ b/plugins/ocp/ocp-print.c @@ -36,9 +36,9 @@ void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_pri ocp_print(fw_act_history, flags, fw_history); } -void ocp_smart_extended_log(void *data, nvme_print_flags_t flags) +void ocp_smart_extended_log(void *data, unsigned int version, nvme_print_flags_t flags) { - ocp_print(smart_extended_log, flags, data); + ocp_print(smart_extended_log, flags, data, version); } void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags) diff --git a/plugins/ocp/ocp-print.h b/plugins/ocp/ocp-print.h index 85655b9340..e623cc8355 100644 --- a/plugins/ocp/ocp-print.h +++ b/plugins/ocp/ocp-print.h @@ -10,7 +10,7 @@ struct ocp_print_ops { void (*hwcomp_log)(struct hwcomp_log *log, __u32 id, bool list); void (*fw_act_history)(const struct fw_activation_history *fw_history); - void (*smart_extended_log)(void *data); + void (*smart_extended_log)(void *data, unsigned int version); void (*telemetry_log)(struct ocp_telemetry_parse_options *options); void (*c3_log)(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data); void (*c5_log)(struct nvme_dev *dev, struct unsupported_requirement_log *log_data); @@ -36,7 +36,7 @@ static inline struct ocp_print_ops *ocp_get_json_print_ops(nvme_print_flags_t fl void ocp_show_hwcomp_log(struct hwcomp_log *log, __u32 id, bool list, nvme_print_flags_t flags); void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_print_flags_t flags); -void ocp_smart_extended_log(void *data, nvme_print_flags_t flags); +void ocp_smart_extended_log(void *data, unsigned int version, nvme_print_flags_t flags); void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags); void ocp_c3_log(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data, nvme_print_flags_t flags); diff --git a/plugins/ocp/ocp-smart-extended-log.c b/plugins/ocp/ocp-smart-extended-log.c index 42e77771df..6a1b981289 100644 --- a/plugins/ocp/ocp-smart-extended-log.c +++ b/plugins/ocp/ocp-smart-extended-log.c @@ -27,7 +27,8 @@ static __u8 scao_guid[GUID_LEN] = { 0xC9, 0x14, 0xD5, 0xAF }; -static int get_c0_log_page(struct nvme_dev *dev, char *format) +static int get_c0_log_page(struct nvme_dev *dev, char *format, + unsigned int format_version) { nvme_print_flags_t fmt; __u8 *data; @@ -86,7 +87,7 @@ static int get_c0_log_page(struct nvme_dev *dev, char *format) } /* print the data */ - ocp_smart_extended_log(data, fmt); + ocp_smart_extended_log(data, format_version, fmt); } else { fprintf(stderr, "ERROR : OCP : Unable to read C0 data from buffer\n"); } @@ -105,14 +106,17 @@ int ocp_smart_add_log(int argc, char **argv, struct command *cmd, struct config { char *output_format; + unsigned int output_format_version; }; struct config cfg = { .output_format = "normal", + .output_format_version = 1, }; OPT_ARGS(opts) = { OPT_FMT("output-format", 'o', &cfg.output_format, "output Format: normal|json"), + OPT_UINT("output-format-version", 0, &cfg.output_format_version, "output Format version: 1|2"), OPT_END() }; @@ -120,7 +124,8 @@ int ocp_smart_add_log(int argc, char **argv, struct command *cmd, if (ret) return ret; - ret = get_c0_log_page(dev, cfg.output_format); + ret = get_c0_log_page(dev, cfg.output_format, + cfg.output_format_version); if (ret) fprintf(stderr, "ERROR : OCP : Failure reading the C0 Log Page, ret = %d\n", ret);