Skip to content

Commit

Permalink
ocp-nvme: Add a different formatting for JSON output
Browse files Browse the repository at this point in the history
The current OCP JSON format for the SMART extended log page is not
condusive to metric collection via tools like Prometheus. So we add a
new output mode that uses all lower case and underscores (instead of
spaces). This should help with metric collection. At the same time we
clean up some of the field names.

Note documentation has NOT yet been updated.

Fixes linux-nvme#2577.

Signed-off-by: Stephen Bates <[email protected]>
  • Loading branch information
sbates130272 committed Nov 22, 2024
1 parent 98b34e4 commit f2fab0a
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 9 deletions.
121 changes: 120 additions & 1 deletion plugins/ocp/ocp-print-json.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ static void json_fw_activation_history(const struct fw_activation_history *fw_hi
printf("\n");
}

static void json_smart_extended_log(void *data)
static void json_smart_extended_log_legacy(void *data)
{
struct json_object *root;
struct json_object *pmuw;
Expand Down Expand Up @@ -248,6 +248,125 @@ static void json_smart_extended_log(void *data)
json_free_object(root);
}

static void json_smart_extended_log_new(void *data)
{
struct json_object *root;
struct json_object *pmuw;
struct json_object *pmur;
uint16_t smart_log_ver = 0;
__u8 *log_data = data;
char guid[40];

root = json_create_object();
pmuw = json_create_object();
pmur = json_create_object();

json_object_add_value_uint64(pmuw, "hi",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW + 8] & 0xFFFFFFFFFFFFFFFF));
json_object_add_value_uint64(pmuw, "lo",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW] & 0xFFFFFFFFFFFFFFFF));
json_object_add_value_object(root, "physical_media_units_written", pmuw);
json_object_add_value_uint64(pmur, "hi",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR + 8] & 0xFFFFFFFFFFFFFFFF));
json_object_add_value_uint64(pmur, "lo",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR] & 0xFFFFFFFFFFFFFFFF));
json_object_add_value_object(root, "physical_media_units_read", pmur);
json_object_add_value_uint64(root, "bad_user_nand_blocks_raw",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BUNBR] & 0x0000FFFFFFFFFFFF));
json_object_add_value_uint(root, "bad_user_nand_blocks_normalized",
(uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BUNBN]));
json_object_add_value_uint64(root, "bad_system_nand_blocks_raw",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BSNBR] & 0x0000FFFFFFFFFFFF));
json_object_add_value_uint(root, "bad_system_nand_blocks_normalized",
(uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BSNBN]));
json_object_add_value_uint64(root, "xor_recovery_count",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_XRC]));
json_object_add_value_uint64(root, "uncorrectable_read_errors",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UREC]));
json_object_add_value_uint64(root, "soft_ecc_error_count",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SEEC]));
json_object_add_value_uint(root, "end_to_end_detected_errors",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EEDC]));
json_object_add_value_uint(root, "end_to_end_corrected_errors",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EECE]));
json_object_add_value_uint(root, "system_data_percent_used",
(__u8)log_data[SCAO_SDPU]);
json_object_add_value_uint64(root, "refresh_count",
(uint64_t)(le64_to_cpu(*(uint64_t *)&log_data[SCAO_RFSC]) & 0x00FFFFFFFFFFFFFF));
json_object_add_value_uint(root, "max_user_data_erase_count",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MXUDEC]));
json_object_add_value_uint(root, "min_user_data_erase_count",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MNUDEC]));
json_object_add_value_uint(root, "thermal_throttling_events",
(__u8)log_data[SCAO_NTTE]);
json_object_add_value_uint(root, "current_throttling_status",
(__u8)log_data[SCAO_CTS]);
json_object_add_value_uint64(root, "pcie_correctable_errors",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PCEC]));
json_object_add_value_uint(root, "incomplete_shutdowns",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_ICS]));
json_object_add_value_uint(root, "percent_free_blocks",
(__u8)log_data[SCAO_PFB]);
json_object_add_value_uint(root, "capacitor_health",
(uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_CPH]));
json_object_add_value_uint64(root, "unaligned_io",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UIO]));
json_object_add_value_uint64(root, "security_version_number",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SVN]));
json_object_add_value_uint64(root, "nuse_namespace_utilization",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_NUSE]));
json_object_add_value_uint128(root, "plp_start_count",
le128_to_cpu(&log_data[SCAO_PSC]));
json_object_add_value_uint128(root, "endurance_estimate",
le128_to_cpu(&log_data[SCAO_EEST]));
smart_log_ver = (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_LPV]);

json_object_add_value_uint(root, "log_page_version", smart_log_ver);

memset((void *)guid, 0, 40);
sprintf((char *)guid, "0x%"PRIx64"%"PRIx64"",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG + 8]),
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG]));
json_object_add_value_string(root, "log_page_guid", guid);

switch (smart_log_ver) {
case 0 ... 1:
break;
default:
case 4:
json_object_add_value_uint(root, "nvme_command_set_errata_version",
(__u8)log_data[SCAO_NCSEV]);
json_object_add_value_uint(root, "lowest_permitted_firmware_revision",
le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC]));
fallthrough;
case 2 ... 3:
json_object_add_value_uint(root, "errata_version_field",
(__u8)log_data[SCAO_EVF]);
json_object_add_value_uint(root, "point_version_field",
le16_to_cpu(*(uint16_t *)&log_data[SCAO_PVF]));
json_object_add_value_uint(root, "minor_version_field",
le16_to_cpu(*(uint16_t *)&log_data[SCAO_MIVF]));
json_object_add_value_uint(root, "major_version_field",
(__u8)log_data[SCAO_MAVF]);
json_object_add_value_uint(root, "nvme_base_errata_version",
(__u8)log_data[SCAO_NBEV]);
json_object_add_value_uint(root, "pcie_link_retraining_count",
le64_to_cpu(*(uint64_t *)&log_data[SCAO_PLRC]));
json_object_add_value_uint(root, "power_state_change_count",
le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC]));
}
json_print_object(root, NULL);
printf("\n");
json_free_object(root);
}

static void json_smart_extended_log(void *data, bool legacy)
{
if (legacy)
json_smart_extended_log_legacy(data);
else
json_smart_extended_log_new(data);
}
static void json_telemetry_log(struct ocp_telemetry_parse_options *options)
{
print_ocp_telemetry_json(options);
Expand Down
2 changes: 1 addition & 1 deletion plugins/ocp/ocp-print-stdout.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ static void stdout_fw_activation_history(const struct fw_activation_history *fw_
printf("\n");
}

static void stdout_smart_extended_log(void *data)
static void stdout_smart_extended_log(void *data, bool ignored)
{
uint16_t smart_log_ver = 0;
__u8 *log_data = data;
Expand Down
4 changes: 2 additions & 2 deletions plugins/ocp/ocp-print.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_pri
ocp_print(fw_act_history, flags, fw_history);
}

void ocp_smart_extended_log(void *data, nvme_print_flags_t flags)
void ocp_smart_extended_log(void *data, bool legacy, nvme_print_flags_t flags)
{
ocp_print(smart_extended_log, flags, data);
ocp_print(smart_extended_log, flags, data, legacy);
}

void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags)
Expand Down
4 changes: 2 additions & 2 deletions plugins/ocp/ocp-print.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
struct ocp_print_ops {
void (*hwcomp_log)(struct hwcomp_log *log, __u32 id, bool list);
void (*fw_act_history)(const struct fw_activation_history *fw_history);
void (*smart_extended_log)(void *data);
void (*smart_extended_log)(void *data, bool legacy);
void (*telemetry_log)(struct ocp_telemetry_parse_options *options);
void (*c3_log)(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data);
void (*c5_log)(struct nvme_dev *dev, struct unsupported_requirement_log *log_data);
Expand All @@ -36,7 +36,7 @@ static inline struct ocp_print_ops *ocp_get_json_print_ops(nvme_print_flags_t fl

void ocp_show_hwcomp_log(struct hwcomp_log *log, __u32 id, bool list, nvme_print_flags_t flags);
void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_print_flags_t flags);
void ocp_smart_extended_log(void *data, nvme_print_flags_t flags);
void ocp_smart_extended_log(void *data, bool legacy, nvme_print_flags_t flags);
void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags);
void ocp_c3_log(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data,
nvme_print_flags_t flags);
Expand Down
9 changes: 6 additions & 3 deletions plugins/ocp/ocp-smart-extended-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ static __u8 scao_guid[GUID_LEN] = {
0xC9, 0x14, 0xD5, 0xAF
};

static int get_c0_log_page(int fd, char *format)
static int get_c0_log_page(int fd, char *format, bool legacy)
{
nvme_print_flags_t fmt;
__u8 *data;
Expand Down Expand Up @@ -76,7 +76,7 @@ static int get_c0_log_page(int fd, char *format)
}

/* print the data */
ocp_smart_extended_log(data, fmt);
ocp_smart_extended_log(data, legacy, fmt);
} else {
fprintf(stderr, "ERROR : OCP : Unable to read C0 data from buffer\n");
}
Expand All @@ -95,22 +95,25 @@ int ocp_smart_add_log(int argc, char **argv, struct command *cmd,

struct config {
char *output_format;
bool non_legacy;
};

struct config cfg = {
.output_format = "normal",
.non_legacy = false,
};

OPT_ARGS(opts) = {
OPT_FMT("output-format", 'o', &cfg.output_format, "output Format: normal|json"),
OPT_FLAG("non-legacy", 'b', &cfg.non_legacy, "--non-legacy"),
OPT_END()
};

ret = parse_and_open(&dev, argc, argv, desc, opts);
if (ret)
return ret;

ret = get_c0_log_page(dev_fd(dev), cfg.output_format);
ret = get_c0_log_page(dev_fd(dev), cfg.output_format, !cfg.non_legacy);
if (ret)
fprintf(stderr, "ERROR : OCP : Failure reading the C0 Log Page, ret = %d\n",
ret);
Expand Down

0 comments on commit f2fab0a

Please sign in to comment.