Skip to content

Commit

Permalink
ocp-nvme: Add a different formatting for JSON output
Browse files Browse the repository at this point in the history
The current OCP JSON format for the SMART extended log page is not
condusive to metric collection via tools like Prometheus. So we add a
new output mode that uses all lower case and underscores (instead of
spaces). This should help with metric collection. At the same time we
clean up some of the field names. We add a new argument
(--output-format-version) to allow us to select which output version
we want. Documentation updated to reflect this change and mark this as
experimental.

Fixes #2577.

Signed-off-by: Stephen Bates <[email protected]>
  • Loading branch information
sbates130272 authored and igaw committed Dec 20, 2024
1 parent f258dca commit 6a73c83
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 10 deletions.
10 changes: 9 additions & 1 deletion Documentation/nvme-ocp-smart-add-log.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ compliant device
SYNOPSIS
--------
[verse]
'nvme ocp smart-add-log' <device> [--output-format=<fmt> | -o <fmt>]
'nvme ocp smart-add-log' <device> [--output-format=<fmt> | -o <fmt>] [--output-format-version=<version>]

DESCRIPTION
-----------
Expand All @@ -22,6 +22,10 @@ device (ex: /dev/nvme0) or block device (ex: /dev/nvme0n1).
This will only work on OCP compliant devices supporting this feature.
Results for any other device are undefined.

EXPERIMENTAL. The --output-format-version can be set to 2 to generate field names
for the outputs that are easier to process via scripts. Note this is
experimental and the field names are subject to change.

On success it returns 0, error code otherwise.

OPTIONS
Expand All @@ -31,6 +35,10 @@ OPTIONS
Set the reporting format to 'normal' or 'json'. Only one output format
can be used at a time. The default is normal.

--output-format-version=<version>::
Set the field labels in the reporting format to either '1'
(the original) or '2'. The default is 1. Note this is experimental.

EXAMPLES
--------
* Has the program issue a smart-add-log command to retrieve the 0xC0 log page.
Expand Down
125 changes: 124 additions & 1 deletion plugins/ocp/ocp-print-json.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ static void json_fw_activation_history(const struct fw_activation_history *fw_hi
printf("\n");
}

static void json_smart_extended_log(void *data)
static void json_smart_extended_log_v1(void *data)
{
struct json_object *root;
struct json_object *pmuw;
Expand Down Expand Up @@ -248,6 +248,129 @@ static void json_smart_extended_log(void *data)
json_free_object(root);
}

static void json_smart_extended_log_v2(void *data)
{
struct json_object *root;
struct json_object *pmuw;
struct json_object *pmur;
uint16_t smart_log_ver = 0;
__u8 *log_data = data;
char guid[40];

root = json_create_object();
pmuw = json_create_object();
pmur = json_create_object();

json_object_add_value_uint64(pmuw, "hi",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW + 8] & 0xFFFFFFFFFFFFFFFF));
json_object_add_value_uint64(pmuw, "lo",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUW] & 0xFFFFFFFFFFFFFFFF));
json_object_add_value_object(root, "physical_media_units_written", pmuw);
json_object_add_value_uint64(pmur, "hi",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR + 8] & 0xFFFFFFFFFFFFFFFF));
json_object_add_value_uint64(pmur, "lo",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PMUR] & 0xFFFFFFFFFFFFFFFF));
json_object_add_value_object(root, "physical_media_units_read", pmur);
json_object_add_value_uint64(root, "bad_user_nand_blocks_raw",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BUNBR] & 0x0000FFFFFFFFFFFF));
json_object_add_value_uint(root, "bad_user_nand_blocks_normalized",
(uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BUNBN]));
json_object_add_value_uint64(root, "bad_system_nand_blocks_raw",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_BSNBR] & 0x0000FFFFFFFFFFFF));
json_object_add_value_uint(root, "bad_system_nand_blocks_normalized",
(uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_BSNBN]));
json_object_add_value_uint64(root, "xor_recovery_count",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_XRC]));
json_object_add_value_uint64(root, "uncorrectable_read_errors",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UREC]));
json_object_add_value_uint64(root, "soft_ecc_error_count",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SEEC]));
json_object_add_value_uint(root, "end_to_end_detected_errors",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EEDC]));
json_object_add_value_uint(root, "end_to_end_corrected_errors",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_EECE]));
json_object_add_value_uint(root, "system_data_percent_used",
(__u8)log_data[SCAO_SDPU]);
json_object_add_value_uint64(root, "refresh_count",
(uint64_t)(le64_to_cpu(*(uint64_t *)&log_data[SCAO_RFSC]) & 0x00FFFFFFFFFFFFFF));
json_object_add_value_uint(root, "max_user_data_erase_count",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MXUDEC]));
json_object_add_value_uint(root, "min_user_data_erase_count",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_MNUDEC]));
json_object_add_value_uint(root, "thermal_throttling_events",
(__u8)log_data[SCAO_NTTE]);
json_object_add_value_uint(root, "current_throttling_status",
(__u8)log_data[SCAO_CTS]);
json_object_add_value_uint64(root, "pcie_correctable_errors",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_PCEC]));
json_object_add_value_uint(root, "incomplete_shutdowns",
(uint32_t)le32_to_cpu(*(uint32_t *)&log_data[SCAO_ICS]));
json_object_add_value_uint(root, "percent_free_blocks",
(__u8)log_data[SCAO_PFB]);
json_object_add_value_uint(root, "capacitor_health",
(uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_CPH]));
json_object_add_value_uint64(root, "unaligned_io",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_UIO]));
json_object_add_value_uint64(root, "security_version_number",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_SVN]));
json_object_add_value_uint64(root, "nuse_namespace_utilization",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_NUSE]));
json_object_add_value_uint128(root, "plp_start_count",
le128_to_cpu(&log_data[SCAO_PSC]));
json_object_add_value_uint128(root, "endurance_estimate",
le128_to_cpu(&log_data[SCAO_EEST]));
smart_log_ver = (uint16_t)le16_to_cpu(*(uint16_t *)&log_data[SCAO_LPV]);

json_object_add_value_uint(root, "log_page_version", smart_log_ver);

memset((void *)guid, 0, 40);
sprintf((char *)guid, "0x%"PRIx64"%"PRIx64"",
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG + 8]),
(uint64_t)le64_to_cpu(*(uint64_t *)&log_data[SCAO_LPG]));
json_object_add_value_string(root, "log_page_guid", guid);

switch (smart_log_ver) {
case 0 ... 1:
break;
default:
case 4:
json_object_add_value_uint(root, "nvme_command_set_errata_version",
(__u8)log_data[SCAO_NCSEV]);
json_object_add_value_uint(root, "lowest_permitted_firmware_revision",
le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC]));
fallthrough;
case 2 ... 3:
json_object_add_value_uint(root, "errata_version_field",
(__u8)log_data[SCAO_EVF]);
json_object_add_value_uint(root, "point_version_field",
le16_to_cpu(*(uint16_t *)&log_data[SCAO_PVF]));
json_object_add_value_uint(root, "minor_version_field",
le16_to_cpu(*(uint16_t *)&log_data[SCAO_MIVF]));
json_object_add_value_uint(root, "major_version_field",
(__u8)log_data[SCAO_MAVF]);
json_object_add_value_uint(root, "nvme_base_errata_version",
(__u8)log_data[SCAO_NBEV]);
json_object_add_value_uint(root, "pcie_link_retraining_count",
le64_to_cpu(*(uint64_t *)&log_data[SCAO_PLRC]));
json_object_add_value_uint(root, "power_state_change_count",
le64_to_cpu(*(uint64_t *)&log_data[SCAO_PSCC]));
}
json_print_object(root, NULL);
printf("\n");
json_free_object(root);
}

static void json_smart_extended_log(void *data, unsigned int version)
{
switch (version) {
default:
case 1:
json_smart_extended_log_v1(data);
break;
case 2:
json_smart_extended_log_v2(data);
}
}
static void json_telemetry_log(struct ocp_telemetry_parse_options *options)
{
print_ocp_telemetry_json(options);
Expand Down
2 changes: 1 addition & 1 deletion plugins/ocp/ocp-print-stdout.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ static void stdout_fw_activation_history(const struct fw_activation_history *fw_
printf("\n");
}

static void stdout_smart_extended_log(void *data)
static void stdout_smart_extended_log(void *data, unsigned int version)
{
uint16_t smart_log_ver = 0;
__u8 *log_data = data;
Expand Down
4 changes: 2 additions & 2 deletions plugins/ocp/ocp-print.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_pri
ocp_print(fw_act_history, flags, fw_history);
}

void ocp_smart_extended_log(void *data, nvme_print_flags_t flags)
void ocp_smart_extended_log(void *data, unsigned int version, nvme_print_flags_t flags)
{
ocp_print(smart_extended_log, flags, data);
ocp_print(smart_extended_log, flags, data, version);
}

void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags)
Expand Down
4 changes: 2 additions & 2 deletions plugins/ocp/ocp-print.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
struct ocp_print_ops {
void (*hwcomp_log)(struct hwcomp_log *log, __u32 id, bool list);
void (*fw_act_history)(const struct fw_activation_history *fw_history);
void (*smart_extended_log)(void *data);
void (*smart_extended_log)(void *data, unsigned int version);
void (*telemetry_log)(struct ocp_telemetry_parse_options *options);
void (*c3_log)(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data);
void (*c5_log)(struct nvme_dev *dev, struct unsupported_requirement_log *log_data);
Expand All @@ -36,7 +36,7 @@ static inline struct ocp_print_ops *ocp_get_json_print_ops(nvme_print_flags_t fl

void ocp_show_hwcomp_log(struct hwcomp_log *log, __u32 id, bool list, nvme_print_flags_t flags);
void ocp_fw_act_history(const struct fw_activation_history *fw_history, nvme_print_flags_t flags);
void ocp_smart_extended_log(void *data, nvme_print_flags_t flags);
void ocp_smart_extended_log(void *data, unsigned int version, nvme_print_flags_t flags);
void ocp_show_telemetry_log(struct ocp_telemetry_parse_options *options, nvme_print_flags_t flags);
void ocp_c3_log(struct nvme_dev *dev, struct ssd_latency_monitor_log *log_data,
nvme_print_flags_t flags);
Expand Down
11 changes: 8 additions & 3 deletions plugins/ocp/ocp-smart-extended-log.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ static __u8 scao_guid[GUID_LEN] = {
0xC9, 0x14, 0xD5, 0xAF
};

static int get_c0_log_page(struct nvme_dev *dev, char *format)
static int get_c0_log_page(struct nvme_dev *dev, char *format,
unsigned int format_version)
{
nvme_print_flags_t fmt;
__u8 *data;
Expand Down Expand Up @@ -86,7 +87,7 @@ static int get_c0_log_page(struct nvme_dev *dev, char *format)
}

/* print the data */
ocp_smart_extended_log(data, fmt);
ocp_smart_extended_log(data, format_version, fmt);
} else {
fprintf(stderr, "ERROR : OCP : Unable to read C0 data from buffer\n");
}
Expand All @@ -105,22 +106,26 @@ int ocp_smart_add_log(int argc, char **argv, struct command *cmd,

struct config {
char *output_format;
unsigned int output_format_version;
};

struct config cfg = {
.output_format = "normal",
.output_format_version = 1,
};

OPT_ARGS(opts) = {
OPT_FMT("output-format", 'o', &cfg.output_format, "output Format: normal|json"),
OPT_UINT("output-format-version", 0, &cfg.output_format_version, "output Format version: 1|2"),
OPT_END()
};

ret = parse_and_open(&dev, argc, argv, desc, opts);
if (ret)
return ret;

ret = get_c0_log_page(dev, cfg.output_format);
ret = get_c0_log_page(dev, cfg.output_format,
cfg.output_format_version);
if (ret)
fprintf(stderr, "ERROR : OCP : Failure reading the C0 Log Page, ret = %d\n",
ret);
Expand Down

0 comments on commit 6a73c83

Please sign in to comment.