From 8aff52a26a37737bb91c34c899fd5ae5f50794ad Mon Sep 17 00:00:00 2001 From: hardikl Date: Tue, 19 Nov 2024 15:40:51 +0530 Subject: [PATCH 01/13] feat: Tracking cluster image update progress via Rest --- conf/rest/9.6.0/clusterupdate.yaml | 16 ++++++++++++++++ conf/rest/default.yaml | 3 ++- 2 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 conf/rest/9.6.0/clusterupdate.yaml diff --git a/conf/rest/9.6.0/clusterupdate.yaml b/conf/rest/9.6.0/clusterupdate.yaml new file mode 100644 index 000000000..dba72b979 --- /dev/null +++ b/conf/rest/9.6.0/clusterupdate.yaml @@ -0,0 +1,16 @@ + +name: ClusterUpdate +query: api/private/cli/cluster/image/show-update-progress +object: cluster_update + +counters: + - ^^ndu_phase => phase + - ^^phase_description => phase_name + - ^^phase_status => status + +export_options: + instance_keys: + - phase + - status + instance_labels: + - phase_name diff --git a/conf/rest/default.yaml b/conf/rest/default.yaml index 495451a07..f1b86f420 100644 --- a/conf/rest/default.yaml +++ b/conf/rest/default.yaml @@ -3,7 +3,7 @@ collector: Rest schedule: - counter: 24h # This handles cases such as cluster upgrades or collector cache updates. - - data: 3m + - data: 1m # See https://github.com/NetApp/harvest/blob/main/docs/architecture/rest-strategy.md # for details on how Harvest handles the ONTAP transition from ZAPI to REST. @@ -16,6 +16,7 @@ objects: # CIFSShare: cifs_share.yaml CloudTarget: cloud_target.yaml ClusterPeer: clusterpeer.yaml + ClusterUpdate: clusterupdate.yaml Disk: disk.yaml EmsDestination: ems_destination.yaml # ExportRule: exports.yaml From dfa366a4d88e38e2ddfb3bb1c4a8b0207485e6c2 Mon Sep 17 00:00:00 2001 From: hardikl Date: Tue, 19 Nov 2024 15:41:48 +0530 Subject: [PATCH 02/13] feat: Tracking cluster image update progress via Rest --- conf/rest/default.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conf/rest/default.yaml b/conf/rest/default.yaml index f1b86f420..27b3c31a1 100644 --- a/conf/rest/default.yaml +++ b/conf/rest/default.yaml @@ -3,7 +3,7 @@ collector: Rest schedule: - counter: 24h # This handles cases such as cluster upgrades or collector cache updates. - - data: 1m + - data: 3m # See https://github.com/NetApp/harvest/blob/main/docs/architecture/rest-strategy.md # for details on how Harvest handles the ONTAP transition from ZAPI to REST. From 7bd59fe8f71ec90a339b268fd2e318379ded5691 Mon Sep 17 00:00:00 2001 From: hardikl Date: Tue, 19 Nov 2024 17:59:22 +0530 Subject: [PATCH 03/13] feat: Adding plugin to tracking cluster image update --- .../plugins/clusterupdate/clusterupdate.go | 92 +++++++++++++++++++ cmd/collectors/rest/rest.go | 3 + conf/rest/9.6.0/clusterupdate.yaml | 16 ++-- 3 files changed, 101 insertions(+), 10 deletions(-) create mode 100644 cmd/collectors/rest/plugins/clusterupdate/clusterupdate.go diff --git a/cmd/collectors/rest/plugins/clusterupdate/clusterupdate.go b/cmd/collectors/rest/plugins/clusterupdate/clusterupdate.go new file mode 100644 index 000000000..b002ccced --- /dev/null +++ b/cmd/collectors/rest/plugins/clusterupdate/clusterupdate.go @@ -0,0 +1,92 @@ +package clusterupdate + +import ( + "github.com/netapp/harvest/v2/cmd/poller/plugin" + "github.com/netapp/harvest/v2/pkg/conf" + "github.com/netapp/harvest/v2/pkg/matrix" + "github.com/netapp/harvest/v2/pkg/slogx" + "github.com/netapp/harvest/v2/pkg/tree/node" + "github.com/netapp/harvest/v2/pkg/util" + "github.com/tidwall/gjson" + "log/slog" +) + +type ClusterUpdate struct { + *plugin.AbstractPlugin + data *matrix.Matrix +} + +func New(p *plugin.AbstractPlugin) plugin.Plugin { + return &ClusterUpdate{AbstractPlugin: p} +} + +func (c *ClusterUpdate) Init(conf.Remote) error { + if err := c.InitAbc(); err != nil { + return err + } + + c.data = matrix.New(c.Parent+".ClusterUpdate", "cluster_update", "cluster_update") + exportOptions := node.NewS("export_options") + instanceKeys := exportOptions.NewChildS("instance_keys", "") + instanceKeys.NewChildS("", "phase") + instanceKeys.NewChildS("", "state") + instanceKeys.NewChildS("", "node") + c.data.SetExportOptions(exportOptions) + + if _, err := c.data.NewMetricFloat64("status", "status"); err != nil { + c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", "status")) + return err + } + + return nil +} + +func (c *ClusterUpdate) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *util.Metadata, error) { + var ( + clusterUpdateInstance *matrix.Instance + key string + err error + ) + // Purge and reset data + c.data.PurgeInstances() + c.data.Reset() + + // Set all global labels + data := dataMap[c.Object] + c.data.SetGlobalLabels(data.GetGlobalLabels()) + + for _, instance := range data.GetInstances() { + instance.SetExportable(false) + updateDetails := instance.GetLabel("update_details") + updateDetailsJSON := gjson.Result{Type: gjson.JSON, Raw: "[" + updateDetails + "]"} + for _, updateDetail := range updateDetailsJSON.Array() { + phase := updateDetail.Get("phase").String() + state := updateDetail.Get("state").String() + nodeName := updateDetail.Get("node.name").String() + key = phase + state + nodeName + + if clusterUpdateInstance, err = c.data.NewInstance(key); err != nil { + c.SLogger.Error("Failed to create instance", slogx.Err(err), slog.String("key", key)) + continue + } + clusterUpdateInstance.SetLabel("node", nodeName) + clusterUpdateInstance.SetLabel("state", state) + clusterUpdateInstance.SetLabel("phase", phase) + + // populate numeric data + value := 0.0 + if state == "completed" { + value = 1.0 + } + + met := c.data.GetMetric("status") + if err := met.SetValueFloat64(clusterUpdateInstance, value); err != nil { + c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) + } else { + c.SLogger.Debug("added value", slog.Float64("value", value)) + } + } + } + + return []*matrix.Matrix{c.data}, nil, nil +} diff --git a/cmd/collectors/rest/rest.go b/cmd/collectors/rest/rest.go index b723bb1bc..de92ae737 100644 --- a/cmd/collectors/rest/rest.go +++ b/cmd/collectors/rest/rest.go @@ -7,6 +7,7 @@ import ( "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/aggregate" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/certificate" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/cluster" + "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/clusterupdate" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/disk" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/health" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/metroclustercheck" @@ -487,6 +488,8 @@ func (r *Rest) LoadPlugin(kind string, abc *plugin.AbstractPlugin) plugin.Plugin return aggregate.New(abc) case "Cluster": return cluster.New(abc) + case "ClusterUpdate": + return clusterupdate.New(abc) case "Disk": return disk.New(abc) case "Health": diff --git a/conf/rest/9.6.0/clusterupdate.yaml b/conf/rest/9.6.0/clusterupdate.yaml index dba72b979..386e27351 100644 --- a/conf/rest/9.6.0/clusterupdate.yaml +++ b/conf/rest/9.6.0/clusterupdate.yaml @@ -1,16 +1,12 @@ name: ClusterUpdate -query: api/private/cli/cluster/image/show-update-progress +query: api/cluster/software object: cluster_update counters: - - ^^ndu_phase => phase - - ^^phase_description => phase_name - - ^^phase_status => status + - ^update_details => update_details + +plugins: + - ClusterUpdate + -export_options: - instance_keys: - - phase - - status - instance_labels: - - phase_name From 063f1062f87ae58e60b9cc43e53a66bafd767f38 Mon Sep 17 00:00:00 2001 From: hardikl Date: Thu, 21 Nov 2024 15:02:47 +0530 Subject: [PATCH 04/13] feat: Adding status and validation metrics in cluster software --- .../clustersoftware/clustersoftware.go | 255 ++++++++++++++++++ .../plugins/clusterupdate/clusterupdate.go | 92 ------- cmd/collectors/rest/rest.go | 6 +- conf/rest/9.6.0/clustersoftware.yaml | 14 + conf/rest/9.6.0/clusterupdate.yaml | 12 - conf/rest/default.yaml | 2 +- 6 files changed, 273 insertions(+), 108 deletions(-) create mode 100644 cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go delete mode 100644 cmd/collectors/rest/plugins/clusterupdate/clusterupdate.go create mode 100644 conf/rest/9.6.0/clustersoftware.yaml delete mode 100644 conf/rest/9.6.0/clusterupdate.yaml diff --git a/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go b/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go new file mode 100644 index 000000000..54c9a0d61 --- /dev/null +++ b/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go @@ -0,0 +1,255 @@ +package clustersoftware + +import ( + "github.com/netapp/harvest/v2/cmd/poller/plugin" + "github.com/netapp/harvest/v2/pkg/conf" + "github.com/netapp/harvest/v2/pkg/matrix" + "github.com/netapp/harvest/v2/pkg/slogx" + "github.com/netapp/harvest/v2/pkg/tree/node" + "github.com/netapp/harvest/v2/pkg/util" + "github.com/tidwall/gjson" + "log/slog" +) + +const updateMatrix = "cluster_software_update" +const StatusMatrix = "cluster_software_status" +const validationMatrix = "cluster_software_validation" +const labels = "labels" + +type ClusterSoftware struct { + *plugin.AbstractPlugin + data map[string]*matrix.Matrix +} + +func New(p *plugin.AbstractPlugin) plugin.Plugin { + return &ClusterSoftware{AbstractPlugin: p} +} + +func (c *ClusterSoftware) Init(conf.Remote) error { + if err := c.InitAbc(); err != nil { + return err + } + + c.data = make(map[string]*matrix.Matrix) + if err := c.createUpdateMetrics(); err != nil { + return err + } + if err := c.createStatusMetrics(); err != nil { + return err + } + if err := c.createValidationMetrics(); err != nil { + return err + } + + return nil +} + +func (c *ClusterSoftware) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *util.Metadata, error) { + data := dataMap[c.Object] + globalLabels := data.GetGlobalLabels() + + for _, instance := range data.GetInstances() { + instance.SetExportable(false) + // generate update details metrics + updateDetails := instance.GetLabel("update_details") + updateDetailsJSON := gjson.Result{Type: gjson.JSON, Raw: "[" + updateDetails + "]"} + c.handleUpdateDetails(updateDetailsJSON, globalLabels) + + // generate status details metrics + statusDetails := instance.GetLabel("status_details") + statusDetailsJSON := gjson.Result{Type: gjson.JSON, Raw: "[" + statusDetails + "]"} + c.handleStatusDetails(statusDetailsJSON, globalLabels) + + // generate update details metrics + validationResults := instance.GetLabel("validation_results") + validationResultsJSON := gjson.Result{Type: gjson.JSON, Raw: "[" + validationResults + "]"} + c.handleValidationDetails(validationResultsJSON, globalLabels) + } + + softwareMetrics := make([]*matrix.Matrix, 0, len(c.data)) + for _, val := range c.data { + softwareMetrics = append(softwareMetrics, val) + } + + return softwareMetrics, nil, nil +} + +func (c *ClusterSoftware) createUpdateMetrics() error { + mat := matrix.New(c.Parent+".ClusterSoftware", updateMatrix, updateMatrix) + exportOptions := node.NewS("export_options") + instanceKeys := exportOptions.NewChildS("instance_keys", "") + instanceKeys.NewChildS("", "phase") + instanceKeys.NewChildS("", "state") + instanceKeys.NewChildS("", "node") + + mat.SetExportOptions(exportOptions) + + if _, err := mat.NewMetricFloat64(labels, labels); err != nil { + c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", labels)) + return err + } + + c.data[updateMatrix] = mat + return nil +} + +func (c *ClusterSoftware) createStatusMetrics() error { + mat := matrix.New(c.Parent+".ClusterUpdate", StatusMatrix, StatusMatrix) + exportOptions := node.NewS("export_options") + instanceKeys := exportOptions.NewChildS("instance_keys", "") + instanceKeys.NewChildS("", "state") + instanceKeys.NewChildS("", "node") + instanceKeys.NewChildS("", "name") + + mat.SetExportOptions(exportOptions) + + if _, err := mat.NewMetricFloat64(labels, labels); err != nil { + c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", labels)) + return err + } + + c.data[StatusMatrix] = mat + return nil +} + +func (c *ClusterSoftware) createValidationMetrics() error { + mat := matrix.New(c.Parent+".ClusterUpdate", validationMatrix, validationMatrix) + exportOptions := node.NewS("export_options") + instanceKeys := exportOptions.NewChildS("instance_keys", "") + instanceKeys.NewChildS("", "status") + instanceKeys.NewChildS("", "update_check") + + mat.SetExportOptions(exportOptions) + + if _, err := mat.NewMetricFloat64(labels, labels); err != nil { + c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", labels)) + return err + } + + c.data[validationMatrix] = mat + return nil +} + +func (c *ClusterSoftware) handleUpdateDetails(updateDetailsJSON gjson.Result, globalLabels map[string]string) { + var ( + clusterUpdateInstance *matrix.Instance + key string + err error + ) + // Purge and reset data + c.data[updateMatrix].PurgeInstances() + c.data[updateMatrix].Reset() + + // Set all global labels + c.data[updateMatrix].SetGlobalLabels(globalLabels) + + for _, updateDetail := range updateDetailsJSON.Array() { + phase := updateDetail.Get("phase").String() + state := updateDetail.Get("state").String() + nodeName := updateDetail.Get("node.name").String() + key = phase + state + nodeName + + if clusterUpdateInstance, err = c.data[updateMatrix].NewInstance(key); err != nil { + c.SLogger.Error("Failed to create instance", slogx.Err(err), slog.String("key", key)) + continue + } + clusterUpdateInstance.SetLabel("node", nodeName) + clusterUpdateInstance.SetLabel("state", state) + clusterUpdateInstance.SetLabel("phase", phase) + + // populate numeric data + value := 0.0 + if state == "completed" { + value = 1.0 + } + + met := c.data[updateMatrix].GetMetric(labels) + if err := met.SetValueFloat64(clusterUpdateInstance, value); err != nil { + c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) + } else { + c.SLogger.Debug("added value", slog.Float64("value", value)) + } + } +} + +func (c *ClusterSoftware) handleStatusDetails(statusDetailsJSON gjson.Result, globalLabels map[string]string) { + var ( + clusterStatusInstance *matrix.Instance + key string + err error + ) + // Purge and reset data + c.data[StatusMatrix].PurgeInstances() + c.data[StatusMatrix].Reset() + + // Set all global labels + c.data[StatusMatrix].SetGlobalLabels(globalLabels) + + for _, updateDetail := range statusDetailsJSON.Array() { + name := updateDetail.Get("name").String() + state := updateDetail.Get("state").String() + nodeName := updateDetail.Get("node.name").String() + key = name + state + nodeName + + if clusterStatusInstance, err = c.data[StatusMatrix].NewInstance(key); err != nil { + c.SLogger.Error("Failed to create instance", slogx.Err(err), slog.String("key", key)) + continue + } + clusterStatusInstance.SetLabel("node", nodeName) + clusterStatusInstance.SetLabel("state", state) + clusterStatusInstance.SetLabel("name", name) + + // populate numeric data + value := 0.0 + if state == "completed" { + value = 1.0 + } + + met := c.data[StatusMatrix].GetMetric(labels) + if err := met.SetValueFloat64(clusterStatusInstance, value); err != nil { + c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) + } else { + c.SLogger.Debug("added value", slog.Float64("value", value)) + } + } +} + +func (c *ClusterSoftware) handleValidationDetails(validationDetailsJSON gjson.Result, globalLabels map[string]string) { + var ( + clusterValidationInstance *matrix.Instance + key string + err error + ) + // Purge and reset data + c.data[validationMatrix].PurgeInstances() + c.data[validationMatrix].Reset() + + // Set all global labels + c.data[validationMatrix].SetGlobalLabels(globalLabels) + + for _, updateDetail := range validationDetailsJSON.Array() { + updateCheck := updateDetail.Get("update_check").String() + status := updateDetail.Get("status").String() + key = updateCheck + status + + if clusterValidationInstance, err = c.data[validationMatrix].NewInstance(key); err != nil { + c.SLogger.Error("Failed to create instance", slogx.Err(err), slog.String("key", key)) + continue + } + clusterValidationInstance.SetLabel("update_check", updateCheck) + clusterValidationInstance.SetLabel("status", status) + + // populate numeric data + value := 0.0 + if status == "warning" { + value = 1.0 + } + + met := c.data[validationMatrix].GetMetric(labels) + if err := met.SetValueFloat64(clusterValidationInstance, value); err != nil { + c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) + } else { + c.SLogger.Debug("added value", slog.Float64("value", value)) + } + } +} diff --git a/cmd/collectors/rest/plugins/clusterupdate/clusterupdate.go b/cmd/collectors/rest/plugins/clusterupdate/clusterupdate.go deleted file mode 100644 index b002ccced..000000000 --- a/cmd/collectors/rest/plugins/clusterupdate/clusterupdate.go +++ /dev/null @@ -1,92 +0,0 @@ -package clusterupdate - -import ( - "github.com/netapp/harvest/v2/cmd/poller/plugin" - "github.com/netapp/harvest/v2/pkg/conf" - "github.com/netapp/harvest/v2/pkg/matrix" - "github.com/netapp/harvest/v2/pkg/slogx" - "github.com/netapp/harvest/v2/pkg/tree/node" - "github.com/netapp/harvest/v2/pkg/util" - "github.com/tidwall/gjson" - "log/slog" -) - -type ClusterUpdate struct { - *plugin.AbstractPlugin - data *matrix.Matrix -} - -func New(p *plugin.AbstractPlugin) plugin.Plugin { - return &ClusterUpdate{AbstractPlugin: p} -} - -func (c *ClusterUpdate) Init(conf.Remote) error { - if err := c.InitAbc(); err != nil { - return err - } - - c.data = matrix.New(c.Parent+".ClusterUpdate", "cluster_update", "cluster_update") - exportOptions := node.NewS("export_options") - instanceKeys := exportOptions.NewChildS("instance_keys", "") - instanceKeys.NewChildS("", "phase") - instanceKeys.NewChildS("", "state") - instanceKeys.NewChildS("", "node") - c.data.SetExportOptions(exportOptions) - - if _, err := c.data.NewMetricFloat64("status", "status"); err != nil { - c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", "status")) - return err - } - - return nil -} - -func (c *ClusterUpdate) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *util.Metadata, error) { - var ( - clusterUpdateInstance *matrix.Instance - key string - err error - ) - // Purge and reset data - c.data.PurgeInstances() - c.data.Reset() - - // Set all global labels - data := dataMap[c.Object] - c.data.SetGlobalLabels(data.GetGlobalLabels()) - - for _, instance := range data.GetInstances() { - instance.SetExportable(false) - updateDetails := instance.GetLabel("update_details") - updateDetailsJSON := gjson.Result{Type: gjson.JSON, Raw: "[" + updateDetails + "]"} - for _, updateDetail := range updateDetailsJSON.Array() { - phase := updateDetail.Get("phase").String() - state := updateDetail.Get("state").String() - nodeName := updateDetail.Get("node.name").String() - key = phase + state + nodeName - - if clusterUpdateInstance, err = c.data.NewInstance(key); err != nil { - c.SLogger.Error("Failed to create instance", slogx.Err(err), slog.String("key", key)) - continue - } - clusterUpdateInstance.SetLabel("node", nodeName) - clusterUpdateInstance.SetLabel("state", state) - clusterUpdateInstance.SetLabel("phase", phase) - - // populate numeric data - value := 0.0 - if state == "completed" { - value = 1.0 - } - - met := c.data.GetMetric("status") - if err := met.SetValueFloat64(clusterUpdateInstance, value); err != nil { - c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) - } else { - c.SLogger.Debug("added value", slog.Float64("value", value)) - } - } - } - - return []*matrix.Matrix{c.data}, nil, nil -} diff --git a/cmd/collectors/rest/rest.go b/cmd/collectors/rest/rest.go index de92ae737..afab98929 100644 --- a/cmd/collectors/rest/rest.go +++ b/cmd/collectors/rest/rest.go @@ -7,7 +7,7 @@ import ( "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/aggregate" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/certificate" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/cluster" - "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/clusterupdate" + "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/clustersoftware" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/disk" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/health" "github.com/netapp/harvest/v2/cmd/collectors/rest/plugins/metroclustercheck" @@ -488,8 +488,8 @@ func (r *Rest) LoadPlugin(kind string, abc *plugin.AbstractPlugin) plugin.Plugin return aggregate.New(abc) case "Cluster": return cluster.New(abc) - case "ClusterUpdate": - return clusterupdate.New(abc) + case "ClusterSoftware": + return clustersoftware.New(abc) case "Disk": return disk.New(abc) case "Health": diff --git a/conf/rest/9.6.0/clustersoftware.yaml b/conf/rest/9.6.0/clustersoftware.yaml new file mode 100644 index 000000000..6760466d5 --- /dev/null +++ b/conf/rest/9.6.0/clustersoftware.yaml @@ -0,0 +1,14 @@ + +name: ClusterSoftware +query: api/cluster/software +object: cluster_software + +counters: + - ^status_details => status_details + - ^update_details => update_details + - ^validation_results => validation_results + +plugins: + - ClusterSoftware + + diff --git a/conf/rest/9.6.0/clusterupdate.yaml b/conf/rest/9.6.0/clusterupdate.yaml deleted file mode 100644 index 386e27351..000000000 --- a/conf/rest/9.6.0/clusterupdate.yaml +++ /dev/null @@ -1,12 +0,0 @@ - -name: ClusterUpdate -query: api/cluster/software -object: cluster_update - -counters: - - ^update_details => update_details - -plugins: - - ClusterUpdate - - diff --git a/conf/rest/default.yaml b/conf/rest/default.yaml index 27b3c31a1..2fa210fd6 100644 --- a/conf/rest/default.yaml +++ b/conf/rest/default.yaml @@ -16,7 +16,7 @@ objects: # CIFSShare: cifs_share.yaml CloudTarget: cloud_target.yaml ClusterPeer: clusterpeer.yaml - ClusterUpdate: clusterupdate.yaml + ClusterSoftware: clustersoftware.yaml Disk: disk.yaml EmsDestination: ems_destination.yaml # ExportRule: exports.yaml From 0498d39a6123d3b224bfb3e42f090ad20eefb968 Mon Sep 17 00:00:00 2001 From: hardikl Date: Thu, 21 Nov 2024 18:22:26 +0530 Subject: [PATCH 05/13] feat: handled review comments --- .../clustersoftware/clustersoftware.go | 55 +++++++++---------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go b/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go index 54c9a0d61..26d6d88a0 100644 --- a/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go +++ b/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go @@ -7,14 +7,14 @@ import ( "github.com/netapp/harvest/v2/pkg/slogx" "github.com/netapp/harvest/v2/pkg/tree/node" "github.com/netapp/harvest/v2/pkg/util" - "github.com/tidwall/gjson" + "github.com/netapp/harvest/v2/third_party/tidwall/gjson" "log/slog" ) -const updateMatrix = "cluster_software_update" -const StatusMatrix = "cluster_software_status" -const validationMatrix = "cluster_software_validation" -const labels = "labels" +const clusterSoftware = "cluster_software" +const updateMatrix = "update" +const StatusMatrix = "status" +const validationMatrix = "validation" type ClusterSoftware struct { *plugin.AbstractPlugin @@ -45,10 +45,9 @@ func (c *ClusterSoftware) Init(conf.Remote) error { } func (c *ClusterSoftware) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matrix, *util.Metadata, error) { - data := dataMap[c.Object] - globalLabels := data.GetGlobalLabels() + globalLabels := dataMap[c.Object].GetGlobalLabels() - for _, instance := range data.GetInstances() { + for _, instance := range dataMap[c.Object].GetInstances() { instance.SetExportable(false) // generate update details metrics updateDetails := instance.GetLabel("update_details") @@ -75,7 +74,7 @@ func (c *ClusterSoftware) Run(dataMap map[string]*matrix.Matrix) ([]*matrix.Matr } func (c *ClusterSoftware) createUpdateMetrics() error { - mat := matrix.New(c.Parent+".ClusterSoftware", updateMatrix, updateMatrix) + mat := matrix.New(c.Parent+"."+updateMatrix, clusterSoftware, clusterSoftware) exportOptions := node.NewS("export_options") instanceKeys := exportOptions.NewChildS("instance_keys", "") instanceKeys.NewChildS("", "phase") @@ -84,8 +83,8 @@ func (c *ClusterSoftware) createUpdateMetrics() error { mat.SetExportOptions(exportOptions) - if _, err := mat.NewMetricFloat64(labels, labels); err != nil { - c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", labels)) + if _, err := mat.NewMetricFloat64(updateMatrix); err != nil { + c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", updateMatrix)) return err } @@ -94,7 +93,7 @@ func (c *ClusterSoftware) createUpdateMetrics() error { } func (c *ClusterSoftware) createStatusMetrics() error { - mat := matrix.New(c.Parent+".ClusterUpdate", StatusMatrix, StatusMatrix) + mat := matrix.New(c.Parent+"."+StatusMatrix, clusterSoftware, clusterSoftware) exportOptions := node.NewS("export_options") instanceKeys := exportOptions.NewChildS("instance_keys", "") instanceKeys.NewChildS("", "state") @@ -103,8 +102,8 @@ func (c *ClusterSoftware) createStatusMetrics() error { mat.SetExportOptions(exportOptions) - if _, err := mat.NewMetricFloat64(labels, labels); err != nil { - c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", labels)) + if _, err := mat.NewMetricFloat64(StatusMatrix); err != nil { + c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", StatusMatrix)) return err } @@ -113,7 +112,7 @@ func (c *ClusterSoftware) createStatusMetrics() error { } func (c *ClusterSoftware) createValidationMetrics() error { - mat := matrix.New(c.Parent+".ClusterUpdate", validationMatrix, validationMatrix) + mat := matrix.New(c.Parent+"."+validationMatrix, clusterSoftware, clusterSoftware) exportOptions := node.NewS("export_options") instanceKeys := exportOptions.NewChildS("instance_keys", "") instanceKeys.NewChildS("", "status") @@ -121,8 +120,8 @@ func (c *ClusterSoftware) createValidationMetrics() error { mat.SetExportOptions(exportOptions) - if _, err := mat.NewMetricFloat64(labels, labels); err != nil { - c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", labels)) + if _, err := mat.NewMetricFloat64(validationMatrix); err != nil { + c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", validationMatrix)) return err } @@ -144,9 +143,9 @@ func (c *ClusterSoftware) handleUpdateDetails(updateDetailsJSON gjson.Result, gl c.data[updateMatrix].SetGlobalLabels(globalLabels) for _, updateDetail := range updateDetailsJSON.Array() { - phase := updateDetail.Get("phase").String() - state := updateDetail.Get("state").String() - nodeName := updateDetail.Get("node.name").String() + phase := updateDetail.Get("phase").ClonedString() + state := updateDetail.Get("state").ClonedString() + nodeName := updateDetail.Get("node.name").ClonedString() key = phase + state + nodeName if clusterUpdateInstance, err = c.data[updateMatrix].NewInstance(key); err != nil { @@ -163,7 +162,7 @@ func (c *ClusterSoftware) handleUpdateDetails(updateDetailsJSON gjson.Result, gl value = 1.0 } - met := c.data[updateMatrix].GetMetric(labels) + met := c.data[updateMatrix].GetMetric(updateMatrix) if err := met.SetValueFloat64(clusterUpdateInstance, value); err != nil { c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) } else { @@ -186,9 +185,9 @@ func (c *ClusterSoftware) handleStatusDetails(statusDetailsJSON gjson.Result, gl c.data[StatusMatrix].SetGlobalLabels(globalLabels) for _, updateDetail := range statusDetailsJSON.Array() { - name := updateDetail.Get("name").String() - state := updateDetail.Get("state").String() - nodeName := updateDetail.Get("node.name").String() + name := updateDetail.Get("name").ClonedString() + state := updateDetail.Get("state").ClonedString() + nodeName := updateDetail.Get("node.name").ClonedString() key = name + state + nodeName if clusterStatusInstance, err = c.data[StatusMatrix].NewInstance(key); err != nil { @@ -205,7 +204,7 @@ func (c *ClusterSoftware) handleStatusDetails(statusDetailsJSON gjson.Result, gl value = 1.0 } - met := c.data[StatusMatrix].GetMetric(labels) + met := c.data[StatusMatrix].GetMetric(StatusMatrix) if err := met.SetValueFloat64(clusterStatusInstance, value); err != nil { c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) } else { @@ -228,8 +227,8 @@ func (c *ClusterSoftware) handleValidationDetails(validationDetailsJSON gjson.Re c.data[validationMatrix].SetGlobalLabels(globalLabels) for _, updateDetail := range validationDetailsJSON.Array() { - updateCheck := updateDetail.Get("update_check").String() - status := updateDetail.Get("status").String() + updateCheck := updateDetail.Get("update_check").ClonedString() + status := updateDetail.Get("status").ClonedString() key = updateCheck + status if clusterValidationInstance, err = c.data[validationMatrix].NewInstance(key); err != nil { @@ -245,7 +244,7 @@ func (c *ClusterSoftware) handleValidationDetails(validationDetailsJSON gjson.Re value = 1.0 } - met := c.data[validationMatrix].GetMetric(labels) + met := c.data[validationMatrix].GetMetric(validationMatrix) if err := met.SetValueFloat64(clusterValidationInstance, value); err != nil { c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) } else { From 166017c5bc405460862ff42bc0a467a2698b39d6 Mon Sep 17 00:00:00 2001 From: hardikl Date: Fri, 22 Nov 2024 18:54:42 +0530 Subject: [PATCH 06/13] feat: handled review comments --- .../clustersoftware/clustersoftware.go | 28 +- grafana/dashboards/cmode/cluster.json | 483 ++++++++++++++++++ 2 files changed, 498 insertions(+), 13 deletions(-) diff --git a/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go b/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go index 26d6d88a0..321985cf3 100644 --- a/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go +++ b/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go @@ -11,10 +11,12 @@ import ( "log/slog" ) -const clusterSoftware = "cluster_software" -const updateMatrix = "update" -const StatusMatrix = "status" -const validationMatrix = "validation" +const ( + clusterSoftware = "cluster_software" + updateMatrix = "update" + statusMatrix = "status" + validationMatrix = "validation" +) type ClusterSoftware struct { *plugin.AbstractPlugin @@ -93,7 +95,7 @@ func (c *ClusterSoftware) createUpdateMetrics() error { } func (c *ClusterSoftware) createStatusMetrics() error { - mat := matrix.New(c.Parent+"."+StatusMatrix, clusterSoftware, clusterSoftware) + mat := matrix.New(c.Parent+"."+statusMatrix, clusterSoftware, clusterSoftware) exportOptions := node.NewS("export_options") instanceKeys := exportOptions.NewChildS("instance_keys", "") instanceKeys.NewChildS("", "state") @@ -102,12 +104,12 @@ func (c *ClusterSoftware) createStatusMetrics() error { mat.SetExportOptions(exportOptions) - if _, err := mat.NewMetricFloat64(StatusMatrix); err != nil { - c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", StatusMatrix)) + if _, err := mat.NewMetricFloat64(statusMatrix); err != nil { + c.SLogger.Error("Failed to create metric", slogx.Err(err), slog.String("metric", statusMatrix)) return err } - c.data[StatusMatrix] = mat + c.data[statusMatrix] = mat return nil } @@ -178,11 +180,11 @@ func (c *ClusterSoftware) handleStatusDetails(statusDetailsJSON gjson.Result, gl err error ) // Purge and reset data - c.data[StatusMatrix].PurgeInstances() - c.data[StatusMatrix].Reset() + c.data[statusMatrix].PurgeInstances() + c.data[statusMatrix].Reset() // Set all global labels - c.data[StatusMatrix].SetGlobalLabels(globalLabels) + c.data[statusMatrix].SetGlobalLabels(globalLabels) for _, updateDetail := range statusDetailsJSON.Array() { name := updateDetail.Get("name").ClonedString() @@ -190,7 +192,7 @@ func (c *ClusterSoftware) handleStatusDetails(statusDetailsJSON gjson.Result, gl nodeName := updateDetail.Get("node.name").ClonedString() key = name + state + nodeName - if clusterStatusInstance, err = c.data[StatusMatrix].NewInstance(key); err != nil { + if clusterStatusInstance, err = c.data[statusMatrix].NewInstance(key); err != nil { c.SLogger.Error("Failed to create instance", slogx.Err(err), slog.String("key", key)) continue } @@ -204,7 +206,7 @@ func (c *ClusterSoftware) handleStatusDetails(statusDetailsJSON gjson.Result, gl value = 1.0 } - met := c.data[StatusMatrix].GetMetric(StatusMatrix) + met := c.data[statusMatrix].GetMetric(statusMatrix) if err := met.SetValueFloat64(clusterStatusInstance, value); err != nil { c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) } else { diff --git a/grafana/dashboards/cmode/cluster.json b/grafana/dashboards/cmode/cluster.json index 45ee8a143..7a7ce9112 100644 --- a/grafana/dashboards/cmode/cluster.json +++ b/grafana/dashboards/cmode/cluster.json @@ -4401,6 +4401,489 @@ ], "title": "SVM Performance", "type": "row" + }, + { + "collapsed": true, + "datasource": "${DS_PROMETHEUS}", + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 482, + "panels": [ + { + "datasource": "${DS_PROMETHEUS}", + "description": "Software update details.", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "displayMode": "auto", + "filterable": true + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(217, 91, 91, 0.74)", + "value": null + }, + { + "color": "rgb(101, 201, 87)", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "node" + }, + "properties": [ + { + "id": "displayName", + "value": "Node" + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/cdot-node/ontap-node?orgId=1&${Datacenter:queryparam}&${Cluster:queryparam}&${__url_time_range}&var-Node=${__value.raw}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "displayName", + "value": "Cluster" + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/cdot-cluster/ontap-cluster?orgId=1&${Datacenter:queryparam}&${__url_time_range}&var-Cluster=${__value.raw}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "datacenter" + }, + "properties": [ + { + "id": "displayName", + "value": "Datacenter" + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/cdot-datacenter/ontap-datacenter?orgId=1&${__url_time_range}&var-Datacenter=${__value.raw}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 39 + }, + "id": 528, + "interval": "", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "cluster_software_update{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Cluster Software Update", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "state", + "cluster", + "phase", + "node", + "datacenter" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "cluster": 1, + "datacenter": 0, + "node": 2, + "phase": 3, + "state": 4 + }, + "renameByName": { + "phase": "Update Phase", + "state": "State" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Software status details.", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "displayMode": "auto", + "filterable": true + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(217, 91, 91, 0.74)", + "value": null + }, + { + "color": "rgb(101, 201, 87)", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "node" + }, + "properties": [ + { + "id": "displayName", + "value": "Node" + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/cdot-node/ontap-node?orgId=1&${Datacenter:queryparam}&${Cluster:queryparam}&${__url_time_range}&var-Node=${__value.raw}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "displayName", + "value": "Cluster" + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/cdot-cluster/ontap-cluster?orgId=1&${Datacenter:queryparam}&${__url_time_range}&var-Cluster=${__value.raw}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "datacenter" + }, + "properties": [ + { + "id": "displayName", + "value": "Datacenter" + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/cdot-datacenter/ontap-datacenter?orgId=1&${__url_time_range}&var-Datacenter=${__value.raw}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 529, + "interval": "", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "cluster_software_status{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Cluster Software Status", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "state", + "cluster", + "name", + "node", + "datacenter" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "cluster": 1, + "datacenter": 0, + "name": 3, + "node": 2, + "state": 4 + }, + "renameByName": { + "name": "Job Name", + "state": "State" + } + } + } + ], + "type": "table" + }, + { + "datasource": "${DS_PROMETHEUS}", + "description": "Software validation details.", + "fieldConfig": { + "defaults": { + "custom": { + "align": "left", + "displayMode": "auto", + "filterable": true + }, + "decimals": 0, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "rgba(217, 91, 91, 0.74)", + "value": null + }, + { + "color": "rgb(101, 201, 87)", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "cluster" + }, + "properties": [ + { + "id": "displayName", + "value": "Cluster" + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/cdot-cluster/ontap-cluster?orgId=1&${Datacenter:queryparam}&${__url_time_range}&var-Cluster=${__value.raw}" + } + ] + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "datacenter" + }, + "properties": [ + { + "id": "displayName", + "value": "Datacenter" + }, + { + "id": "links", + "value": [ + { + "targetBlank": true, + "title": "", + "url": "/d/cdot-datacenter/ontap-datacenter?orgId=1&${__url_time_range}&var-Datacenter=${__value.raw}" + } + ] + } + ] + } + ] + }, + "gridPos": { + "h": 7, + "w": 24, + "x": 0, + "y": 53 + }, + "id": 530, + "interval": "", + "options": { + "footer": { + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [] + }, + "pluginVersion": "8.1.8", + "targets": [ + { + "exemplar": false, + "expr": "cluster_software_validation{datacenter=~\"$Datacenter\",cluster=~\"$Cluster\"}", + "format": "table", + "instant": true, + "interval": "", + "intervalFactor": 1, + "legendFormat": "", + "refId": "A" + } + ], + "title": "Cluster Software Validation", + "transformations": [ + { + "id": "filterFieldsByName", + "options": { + "include": { + "names": [ + "status", + "cluster", + "update_check", + "datacenter" + ] + } + } + }, + { + "id": "organize", + "options": { + "excludeByName": {}, + "indexByName": { + "cluster": 1, + "datacenter": 0, + "status": 3, + "update_check": 2 + }, + "renameByName": { + "status": "Status", + "update_check": "Validation Check Name" + } + } + } + ], + "type": "table" + } + ], + "title": "Health", + "type": "row" } ], "refresh": "", From 238ce666e90e5f0c5318e294b814fd77b29201bc Mon Sep 17 00:00:00 2001 From: hardikl Date: Fri, 22 Nov 2024 18:56:36 +0530 Subject: [PATCH 07/13] feat: handled review comments --- grafana/dashboards/cmode/cluster.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grafana/dashboards/cmode/cluster.json b/grafana/dashboards/cmode/cluster.json index 7a7ce9112..77f1b8674 100644 --- a/grafana/dashboards/cmode/cluster.json +++ b/grafana/dashboards/cmode/cluster.json @@ -4882,7 +4882,7 @@ "type": "table" } ], - "title": "Health", + "title": "Software", "type": "row" } ], From fb33b2420fbe4b5ff1a6d63f9fc3bcea4fc595a3 Mon Sep 17 00:00:00 2001 From: hardikl Date: Fri, 22 Nov 2024 21:04:33 +0530 Subject: [PATCH 08/13] feat: update time field --- .../clustersoftware/clustersoftware.go | 12 ++++++--- grafana/dashboards/cmode/cluster.json | 25 +++++++++++++++---- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go b/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go index 321985cf3..c18d96e52 100644 --- a/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go +++ b/cmd/collectors/rest/plugins/clustersoftware/clustersoftware.go @@ -82,6 +82,7 @@ func (c *ClusterSoftware) createUpdateMetrics() error { instanceKeys.NewChildS("", "phase") instanceKeys.NewChildS("", "state") instanceKeys.NewChildS("", "node") + instanceKeys.NewChildS("", "elapsed_duration") mat.SetExportOptions(exportOptions) @@ -147,6 +148,7 @@ func (c *ClusterSoftware) handleUpdateDetails(updateDetailsJSON gjson.Result, gl for _, updateDetail := range updateDetailsJSON.Array() { phase := updateDetail.Get("phase").ClonedString() state := updateDetail.Get("state").ClonedString() + elapsedDuration := updateDetail.Get("elapsed_duration").ClonedString() nodeName := updateDetail.Get("node.name").ClonedString() key = phase + state + nodeName @@ -157,6 +159,7 @@ func (c *ClusterSoftware) handleUpdateDetails(updateDetailsJSON gjson.Result, gl clusterUpdateInstance.SetLabel("node", nodeName) clusterUpdateInstance.SetLabel("state", state) clusterUpdateInstance.SetLabel("phase", phase) + clusterUpdateInstance.SetLabel("elapsed_duration", elapsedDuration) // populate numeric data value := 0.0 @@ -240,12 +243,13 @@ func (c *ClusterSoftware) handleValidationDetails(validationDetailsJSON gjson.Re clusterValidationInstance.SetLabel("update_check", updateCheck) clusterValidationInstance.SetLabel("status", status) - // populate numeric data - value := 0.0 - if status == "warning" { - value = 1.0 + // ignore all the validation result which are not in warning status + if status != "warning" { + continue } + // populate numeric data + value := 1.0 met := c.data[validationMatrix].GetMetric(validationMatrix) if err := met.SetValueFloat64(clusterValidationInstance, value); err != nil { c.SLogger.Error("Failed to parse value", slogx.Err(err), slog.Float64("value", value)) diff --git a/grafana/dashboards/cmode/cluster.json b/grafana/dashboards/cmode/cluster.json index 77f1b8674..4154737f9 100644 --- a/grafana/dashboards/cmode/cluster.json +++ b/grafana/dashboards/cmode/cluster.json @@ -4506,6 +4506,18 @@ ] } ] + }, + { + "matcher": { + "id": "byName", + "options": "Approximate time elapsed" + }, + "properties": [ + { + "id": "unit", + "value": "s" + } + ] } ] }, @@ -4552,7 +4564,8 @@ "cluster", "phase", "node", - "datacenter" + "datacenter", + "elapsed_duration" ] } } @@ -4564,13 +4577,15 @@ "indexByName": { "cluster": 1, "datacenter": 0, + "elapsed_duration": 5, "node": 2, "phase": 3, "state": 4 }, "renameByName": { - "phase": "Update Phase", - "state": "State" + "elapsed_duration": "Approximate time elapsed", + "phase": "Phase", + "state": "Status" } } } @@ -4734,7 +4749,7 @@ }, "renameByName": { "name": "Job Name", - "state": "State" + "state": "Status" } } } @@ -4874,7 +4889,7 @@ }, "renameByName": { "status": "Status", - "update_check": "Validation Check Name" + "update_check": "Pre-update Check" } } } From f3436301755e403592efab887f02d76723030f00 Mon Sep 17 00:00:00 2001 From: hardikl Date: Mon, 25 Nov 2024 14:18:29 +0530 Subject: [PATCH 09/13] feat: adding color to status column --- grafana/dashboards/cmode/cluster.json | 172 ++++++++++++++++++++++++++ 1 file changed, 172 insertions(+) diff --git a/grafana/dashboards/cmode/cluster.json b/grafana/dashboards/cmode/cluster.json index 4154737f9..4c8c32106 100644 --- a/grafana/dashboards/cmode/cluster.json +++ b/grafana/dashboards/cmode/cluster.json @@ -4518,6 +4518,75 @@ "value": "s" } ] + }, + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "cancel_pending": { + "color": "orange", + "index": 8, + "text": "Cancel Pending" + }, + "canceled": { + "color": "red", + "index": 5, + "text": "Canceled" + }, + "completed": { + "color": "green", + "index": 4, + "text": "Completed" + }, + "failed": { + "color": "red", + "index": 6, + "text": "Failed" + }, + "in_progress": { + "color": "yellow", + "index": 0, + "text": "In Progress" + }, + "pause_pending": { + "color": "orange", + "index": 7, + "text": "Pause Pending" + }, + "paused_by_user": { + "color": "orange", + "index": 2, + "text": "Paused by User" + }, + "paused_on_error": { + "color": "orange", + "index": 3, + "text": "Paused on Error" + }, + "waiting": { + "color": "yellow", + "index": 1, + "text": "Waiting" + } + }, + "type": "value" + } + ] + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + } + ] } ] }, @@ -4685,6 +4754,75 @@ ] } ] + }, + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "cancel_pending": { + "color": "orange", + "index": 8, + "text": "Cancel Pending" + }, + "canceled": { + "color": "red", + "index": 5, + "text": "Canceled" + }, + "completed": { + "color": "green", + "index": 4, + "text": "Completed" + }, + "failed": { + "color": "red", + "index": 6, + "text": "Failed" + }, + "in_progress": { + "color": "yellow", + "index": 0, + "text": "In Progress" + }, + "pause_pending": { + "color": "orange", + "index": 7, + "text": "Pause Pending" + }, + "paused_by_user": { + "color": "orange", + "index": 2, + "text": "Paused by User" + }, + "paused_on_error": { + "color": "orange", + "index": 3, + "text": "Paused on Error" + }, + "waiting": { + "color": "yellow", + "index": 1, + "text": "Waiting" + } + }, + "type": "value" + } + ] + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + } + ] } ] }, @@ -4827,6 +4965,40 @@ ] } ] + }, + { + "matcher": { + "id": "byName", + "options": "Status" + }, + "properties": [ + { + "id": "mappings", + "value": [ + { + "options": { + "error": { + "color": "red", + "index": 1, + "text": "Error" + }, + "warning": { + "color": "yellow", + "index": 0, + "text": "Warning" + } + }, + "type": "value" + } + ] + }, + { + "id": "custom.cellOptions", + "value": { + "type": "color-background" + } + } + ] } ] }, From 496fcfb131daac6e9e69217ec3c64f86adbeb5b5 Mon Sep 17 00:00:00 2001 From: hardikl Date: Mon, 25 Nov 2024 14:28:15 +0530 Subject: [PATCH 10/13] feat: minor change --- grafana/dashboards/cmode/cluster.json | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/grafana/dashboards/cmode/cluster.json b/grafana/dashboards/cmode/cluster.json index 4c8c32106..6f1ffb3df 100644 --- a/grafana/dashboards/cmode/cluster.json +++ b/grafana/dashboards/cmode/cluster.json @@ -4581,10 +4581,8 @@ ] }, { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } + "id": "custom.displayMode", + "value": "color-background" } ] } @@ -4817,10 +4815,8 @@ ] }, { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } + "id": "custom.displayMode", + "value": "color-background" } ] } @@ -4993,10 +4989,8 @@ ] }, { - "id": "custom.cellOptions", - "value": { - "type": "color-background" - } + "id": "custom.displayMode", + "value": "color-background" } ] } From e6b5be4aa9a103095ec95c3e2366af29ee11c889 Mon Sep 17 00:00:00 2001 From: hardikl Date: Mon, 25 Nov 2024 16:46:46 +0530 Subject: [PATCH 11/13] feat: ignore counters in zapi ci --- integration/test/dashboard_json_test.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/integration/test/dashboard_json_test.go b/integration/test/dashboard_json_test.go index 855a9d218..80ebd3dd7 100644 --- a/integration/test/dashboard_json_test.go +++ b/integration/test/dashboard_json_test.go @@ -50,6 +50,9 @@ var zapiCounterMap = map[string]struct{}{ "aggr_object_store_logical_used": {}, "aggr_object_store_physical_used": {}, "fru_status": {}, + "cluster_software_update": {}, + "cluster_software_status": {}, + "cluster_software_validation": {}, } // restCounterMap are additional counters, above and beyond the ones from counterMap, which should be excluded from Rest From b9663557d28276d92baf52c2228c6d21593119fe Mon Sep 17 00:00:00 2001 From: hardikl Date: Mon, 25 Nov 2024 18:51:05 +0530 Subject: [PATCH 12/13] feat: ignore counters in ci and add docs --- cmd/tools/generate/counter.go | 1 - cmd/tools/generate/counter.yaml | 41 +++++++++++++++++++++++++ integration/test/dashboard_json_test.go | 5 ++- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/cmd/tools/generate/counter.go b/cmd/tools/generate/counter.go index 48fe2fa9e..5a3a7ac7f 100644 --- a/cmd/tools/generate/counter.go +++ b/cmd/tools/generate/counter.go @@ -173,7 +173,6 @@ var ( "_labels", "volume_arw_status", "ALERTS", - "_tags", } // Exclude extra metrics for ZAPI diff --git a/cmd/tools/generate/counter.yaml b/cmd/tools/generate/counter.yaml index c01822fe7..4d0fc725d 100644 --- a/cmd/tools/generate/counter.yaml +++ b/cmd/tools/generate/counter.yaml @@ -1988,3 +1988,44 @@ counters: Endpoint: NA ONTAPCounter: Harvest generated Template: conf/rest/9.12.0/fru.yaml + + - Name: cluster_tags + Description: This metric display tags at cluster level. + APIs: + - API: REST + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/rest/9.12.0/status.yaml + + - Name: volume_tags + Description: This metric display tags at volume level. + APIs: + - API: REST + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/rest/9.12.0/volume.yaml + + + - Name: cluster_software_update + Description: This metric displays the software update phase with its status. + APIs: + - API: REST + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/rest/9.6.0/clustersoftware.yaml + + - Name: cluster_software_status + Description: This metric displays the software job with its status. + APIs: + - API: REST + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/rest/9.6.0/clustersoftware.yaml + + - Name: cluster_software_validation + Description: This metric displays the software pre validation checks with its status. + APIs: + - API: REST + Endpoint: NA + ONTAPCounter: Harvest generated + Template: conf/rest/9.6.0/clustersoftware.yaml diff --git a/integration/test/dashboard_json_test.go b/integration/test/dashboard_json_test.go index 80ebd3dd7..88e0ffb61 100644 --- a/integration/test/dashboard_json_test.go +++ b/integration/test/dashboard_json_test.go @@ -50,9 +50,6 @@ var zapiCounterMap = map[string]struct{}{ "aggr_object_store_logical_used": {}, "aggr_object_store_physical_used": {}, "fru_status": {}, - "cluster_software_update": {}, - "cluster_software_status": {}, - "cluster_software_validation": {}, } // restCounterMap are additional counters, above and beyond the ones from counterMap, which should be excluded from Rest @@ -113,6 +110,8 @@ var excludeCounters = []string{ "svm_write_total", "volume_top_clients_", "volume_top_files_", + // Based on the cluster upgrade, these metrics existence would be changed. + "cluster_software_", } var flakyCounters = []string{ From 3bb6c6735d9de44bab1793f445f176290451a0e1 Mon Sep 17 00:00:00 2001 From: hardikl Date: Mon, 25 Nov 2024 18:56:38 +0530 Subject: [PATCH 13/13] feat: handled review comments --- cmd/tools/generate/counter.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cmd/tools/generate/counter.yaml b/cmd/tools/generate/counter.yaml index 4d0fc725d..e59691133 100644 --- a/cmd/tools/generate/counter.yaml +++ b/cmd/tools/generate/counter.yaml @@ -1990,7 +1990,7 @@ counters: Template: conf/rest/9.12.0/fru.yaml - Name: cluster_tags - Description: This metric display tags at cluster level. + Description: Displays tags at the cluster level. APIs: - API: REST Endpoint: NA @@ -1998,7 +1998,7 @@ counters: Template: conf/rest/9.12.0/status.yaml - Name: volume_tags - Description: This metric display tags at volume level. + Description: Displays tags at the volume level. APIs: - API: REST Endpoint: NA @@ -2007,7 +2007,7 @@ counters: - Name: cluster_software_update - Description: This metric displays the software update phase with its status. + Description: Displays the software update phase with its status. APIs: - API: REST Endpoint: NA @@ -2015,7 +2015,7 @@ counters: Template: conf/rest/9.6.0/clustersoftware.yaml - Name: cluster_software_status - Description: This metric displays the software job with its status. + Description: Displays the software job with its status. APIs: - API: REST Endpoint: NA @@ -2023,7 +2023,7 @@ counters: Template: conf/rest/9.6.0/clustersoftware.yaml - Name: cluster_software_validation - Description: This metric displays the software pre validation checks with its status. + Description: Displays the software pre-validation checks with their status. APIs: - API: REST Endpoint: NA