From aa822f3adb5f130618bbb6f95ee127f3c021d08e Mon Sep 17 00:00:00 2001 From: Rahul Date: Fri, 2 Aug 2024 23:29:46 +0530 Subject: [PATCH] feat: keyperfmetrics collector infrastructure (#3078) * feat: keyperfmetrics collector infrastructure * feat: keyperfmetrics collector infrastructure * feat: keyperfmetrics collector infrastructure (#3079) * feat: keyperfmetrics collector infrastructure * feat: keyperfmetrics collector infrastructure * feat: change to use KeyPerf name --------- Co-authored-by: Chris Grindstaff --- cmd/collectors/keyperf/keyperf.go | 457 ++++++++++++++++++ cmd/collectors/keyperf/keyperf_test.go | 213 ++++++++ .../testdata/conf/keyperf/9.15.0/volume.yaml | 32 ++ cmd/collectors/keyperf/testdata/config.yml | 14 + .../testdata/missingStats/volume-poll-1.json | 28 ++ .../testdata/missingStats/volume-poll-2.json | 28 ++ .../partialAggregation/volume-poll-1.json | 212 ++++++++ .../partialAggregation/volume-poll-2.json | 212 ++++++++ .../partialAggregation/volume-poll-3.json | 212 ++++++++ .../volume-poll-partial-2.json | 212 ++++++++ .../volume-poll-partial.json | 212 ++++++++ .../keyperf/testdata/volume-poll-1.json | 212 ++++++++ .../keyperf/testdata/volume-poll-2.json | 212 ++++++++ cmd/collectors/rest/rest.go | 51 +- cmd/collectors/rest/rest_test.go | 2 +- cmd/collectors/restperf/restperf.go | 15 +- cmd/collectors/zapiperf/zapiperf.go | 15 +- cmd/poller/poller.go | 1 + conf/keyperf/9.15.0/volume.yaml | 46 ++ conf/keyperf/default.yaml | 9 + pkg/matrix/matrix.go | 4 +- pkg/matrix/metric_test.go | 2 +- pkg/util/util.go | 1 + 23 files changed, 2364 insertions(+), 38 deletions(-) create mode 100644 cmd/collectors/keyperf/keyperf.go create mode 100644 cmd/collectors/keyperf/keyperf_test.go create mode 100644 cmd/collectors/keyperf/testdata/conf/keyperf/9.15.0/volume.yaml create mode 100644 cmd/collectors/keyperf/testdata/config.yml create mode 100644 cmd/collectors/keyperf/testdata/missingStats/volume-poll-1.json create mode 100644 cmd/collectors/keyperf/testdata/missingStats/volume-poll-2.json create mode 100644 cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-1.json create mode 100644 cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-2.json create mode 100644 cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-3.json create mode 100644 cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-partial-2.json create mode 100644 cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-partial.json create mode 100644 cmd/collectors/keyperf/testdata/volume-poll-1.json create mode 100644 cmd/collectors/keyperf/testdata/volume-poll-2.json create mode 100644 conf/keyperf/9.15.0/volume.yaml create mode 100644 conf/keyperf/default.yaml diff --git a/cmd/collectors/keyperf/keyperf.go b/cmd/collectors/keyperf/keyperf.go new file mode 100644 index 000000000..f9a5d48d6 --- /dev/null +++ b/cmd/collectors/keyperf/keyperf.go @@ -0,0 +1,457 @@ +package keyperf + +import ( + "fmt" + "github.com/netapp/harvest/v2/cmd/collectors/rest" + "github.com/netapp/harvest/v2/cmd/poller/collector" + "github.com/netapp/harvest/v2/cmd/poller/plugin" + "github.com/netapp/harvest/v2/pkg/errs" + "github.com/netapp/harvest/v2/pkg/matrix" + "github.com/tidwall/gjson" + "strconv" + "strings" + "time" +) + +const ( + latencyIoReqd = 10 + timestampMetricName = "statistics.timestamp" +) + +type KeyPerf struct { + *rest.Rest // provides: AbstractCollector, Client, Object, Query, TemplateFn, TemplateType + perfProp *perfProp +} + +type counter struct { + name string + counterType string + unit string + denominator string +} + +type perfProp struct { + isCacheEmpty bool + counterInfo map[string]*counter + latencyIoReqd int +} + +func init() { + plugin.RegisterModule(&KeyPerf{}) +} + +func (kp *KeyPerf) HarvestModule() plugin.ModuleInfo { + return plugin.ModuleInfo{ + ID: "harvest.collector.keyperf", + New: func() plugin.Module { return new(KeyPerf) }, + } +} + +func (kp *KeyPerf) Init(a *collector.AbstractCollector) error { + + var err error + + kp.Rest = &rest.Rest{AbstractCollector: a} + + kp.perfProp = &perfProp{} + + kp.InitProp() + + kp.perfProp.counterInfo = make(map[string]*counter) + + if err := kp.InitClient(); err != nil { + return err + } + + if kp.Prop.TemplatePath, err = kp.LoadTemplate(); err != nil { + return err + } + + kp.InitVars(a.Params) + + if err := kp.InitEndPoints(); err != nil { + return err + } + + if err := collector.Init(kp); err != nil { + return err + } + + if err := kp.InitCache(); err != nil { + return err + } + + if err := kp.InitMatrix(); err != nil { + return err + } + + kp.buildCounters() + + kp.Logger.Debug(). + Int("numMetrics", len(kp.Prop.Metrics)). + Str("timeout", kp.Client.Timeout.String()). + Msg("initialized cache") + return nil +} + +func (kp *KeyPerf) InitMatrix() error { + mat := kp.Matrix[kp.Object] + // init perf properties + kp.perfProp.latencyIoReqd = kp.loadParamInt("latency_io_reqd", latencyIoReqd) + kp.perfProp.isCacheEmpty = true + // overwrite from abstract collector + mat.Object = kp.Prop.Object + // Add system (cluster) name + mat.SetGlobalLabel("cluster", kp.Client.Cluster().Name) + if kp.Params.HasChildS("labels") { + for _, l := range kp.Params.GetChildS("labels").GetChildren() { + mat.SetGlobalLabel(l.GetNameS(), l.GetContentS()) + } + } + + // Add metadata metric for skips/numPartials + _, _ = kp.Metadata.NewMetricUint64("skips") + _, _ = kp.Metadata.NewMetricUint64("numPartials") + return nil +} + +// load an int parameter or use defaultValue +func (kp *KeyPerf) loadParamInt(name string, defaultValue int) int { + + var ( + x string + n int + e error + ) + + if x = kp.Params.GetChildContentS(name); x != "" { + if n, e = strconv.Atoi(x); e == nil { + kp.Logger.Debug().Str("name", name).Int("n", n).Send() + return n + } + kp.Logger.Warn().Str("parameter", name).Str("x", x).Msg("invalid parameter") + } + + kp.Logger.Debug().Str("name", name).Str("defaultValue", strconv.Itoa(defaultValue)).Msg("using values") + return defaultValue +} + +func (kp *KeyPerf) buildCounters() { + for k := range kp.Prop.Metrics { + if _, exists := kp.perfProp.counterInfo[k]; !exists { + var ctr *counter + + switch { + case strings.Contains(k, "latency"): + ctr = &counter{ + name: k, + counterType: "average", + unit: "microsec", + denominator: strings.Replace(k, "latency", "iops", 1), + } + case strings.Contains(k, "iops"): + ctr = &counter{ + name: k, + counterType: "rate", + unit: "per_sec", + } + case strings.Contains(k, "throughput"): + ctr = &counter{ + name: k, + counterType: "rate", + unit: "b_per_sec", + } + case strings.Contains(k, timestampMetricName): + ctr = &counter{ + name: k, + counterType: "delta", + unit: "sec", + } + } + + if ctr != nil { + kp.perfProp.counterInfo[k] = ctr + } + } + } +} + +func (kp *KeyPerf) PollData() (map[string]*matrix.Matrix, error) { + var ( + err error + perfRecords []gjson.Result + startTime time.Time + ) + startTime = time.Now() + kp.Client.Metadata.Reset() + + href := kp.Prop.Href + kp.Logger.Debug().Str("href", href).Send() + if href == "" { + return nil, errs.New(errs.ErrConfig, "empty url") + } + + perfRecords, err = kp.GetRestData(href) + if err != nil { + return nil, fmt.Errorf("failed to fetch href=%s %w", href, err) + } + + return kp.pollData(startTime, perfRecords, func(e *rest.EndPoint) ([]gjson.Result, time.Duration, error) { + return kp.ProcessEndPoint(e) + }) +} + +// validateMatrix ensures that the previous matrix (prevMat) contains all the metrics present in the current matrix (curMat). +// This is crucial for performing accurate comparisons and calculations between the two matrices, especially in scenarios where +// the current matrix may have additional metrics that are not present in the previous matrix, such as after an ONTAP upgrade. +// +// The function iterates over all the metrics in curMat and checks if each metric exists in prevMat. If a metric from curMat +// does not exist in prevMat, it is created in prevMat as a new float64 metric. This prevents potential panics or errors +// when attempting to perform calculations with metrics that are missing in prevMat. +func (kp *KeyPerf) validateMatrix(prevMat *matrix.Matrix, curMat *matrix.Matrix) error { + var err error + for k := range curMat.GetMetrics() { + if prevMat.GetMetric(k) == nil { + _, err = prevMat.NewMetricFloat64(k) + if err != nil { + return err + } + } + } + return nil +} + +func (kp *KeyPerf) pollData( + startTime time.Time, + perfRecords []gjson.Result, + endpointFunc func(e *rest.EndPoint) ([]gjson.Result, time.Duration, error), +) (map[string]*matrix.Matrix, error) { + var ( + count uint64 + apiD, parseD time.Duration + err error + skips int + numPartials uint64 + instIndex int + prevMat *matrix.Matrix + curMat *matrix.Matrix + ) + + prevMat = kp.Matrix[kp.Object] + + // clone matrix without numeric data + curMat = prevMat.Clone(matrix.With{Data: false, Metrics: true, Instances: true, ExportInstances: true}) + curMat.Reset() + + apiD = time.Since(startTime) + + startTime = time.Now() + + if len(perfRecords) == 0 { + return nil, errs.New(errs.ErrNoInstance, "no "+kp.Object+" instances on cluster") + } + count, numPartials = kp.HandleResults(curMat, perfRecords, kp.Prop, false) + + // process endpoints + eCount, endpointAPID := kp.ProcessEndPoints(curMat, endpointFunc) + count += eCount + + parseD = time.Since(startTime) + _ = kp.Metadata.LazySetValueInt64("api_time", "data", (apiD + endpointAPID).Microseconds()) + _ = kp.Metadata.LazySetValueInt64("parse_time", "data", parseD.Microseconds()) + _ = kp.Metadata.LazySetValueUint64("metrics", "data", count) + _ = kp.Metadata.LazySetValueUint64("instances", "data", uint64(len(curMat.GetInstances()))) + _ = kp.Metadata.LazySetValueUint64("bytesRx", "data", kp.Client.Metadata.BytesRx) + _ = kp.Metadata.LazySetValueUint64("numCalls", "data", kp.Client.Metadata.NumCalls) + _ = kp.Metadata.LazySetValueUint64("numPartials", "data", numPartials) + + kp.AddCollectCount(count) + + // skip calculating from delta if no data from previous poll + if kp.perfProp.isCacheEmpty { + kp.Logger.Debug().Msg("skip postprocessing until next poll (previous cache empty)") + kp.Matrix[kp.Object] = curMat + kp.perfProp.isCacheEmpty = false + return nil, nil + } + + calcStart := time.Now() + + // cache raw data for next poll + cachedData := curMat.Clone(matrix.With{Data: true, Metrics: true, Instances: true, ExportInstances: true, PartialInstances: true}) + + orderedNonDenominatorMetrics := make([]*matrix.Metric, 0, len(curMat.GetMetrics())) + orderedNonDenominatorKeys := make([]string, 0, len(orderedNonDenominatorMetrics)) + + orderedDenominatorMetrics := make([]*matrix.Metric, 0, len(curMat.GetMetrics())) + orderedDenominatorKeys := make([]string, 0, len(orderedDenominatorMetrics)) + + counterMap := kp.perfProp.counterInfo + + for key, metric := range curMat.GetMetrics() { + counter := counterMap[key] + if counter != nil { + if counter.denominator == "" { + // does not require base counter + orderedNonDenominatorMetrics = append(orderedNonDenominatorMetrics, metric) + orderedNonDenominatorKeys = append(orderedNonDenominatorKeys, key) + } else { + // does require base counter + orderedDenominatorMetrics = append(orderedDenominatorMetrics, metric) + orderedDenominatorKeys = append(orderedDenominatorKeys, key) + } + } else { + kp.Logger.Warn().Str("counter", metric.GetName()).Msg("Counter is missing or unable to parse") + } + } + + timestamp := curMat.GetMetric(timestampMetricName) + if timestamp != nil { + timestamp.SetExportable(false) + } + err = kp.validateMatrix(prevMat, curMat) + if err != nil { + return nil, err + } + + // order metrics, such that those requiring base counters are processed last + orderedMetrics := orderedNonDenominatorMetrics + orderedMetrics = append(orderedMetrics, orderedDenominatorMetrics...) + orderedKeys := orderedNonDenominatorKeys + orderedKeys = append(orderedKeys, orderedDenominatorKeys...) + + var base *matrix.Metric + var totalSkips int + + for i, metric := range orderedMetrics { + key := orderedKeys[i] + counter := counterMap[key] + if counter == nil { + kp.Logger.Error().Err(err).Str("counter", metric.GetName()).Msg("Missing counter:") + continue + } + property := counter.counterType + // used in aggregator plugin + metric.SetProperty(property) + // used in volume.go plugin + metric.SetComment(counter.denominator) + + // raw/string - submit without post-processing + if property == "raw" || property == "string" { + continue + } + + // all other properties - first calculate delta + if skips, err = curMat.Delta(key, prevMat, kp.Logger); err != nil { + kp.Logger.Error().Err(err).Str("key", key).Msg("Calculate delta") + continue + } + totalSkips += skips + + // DELTA - subtract previous value from current + if property == "delta" { + // already done + continue + } + + // RATE - delta, normalized by elapsed time + if property == "rate" { + // defer calculation, so we can first calculate averages/percents + // Note: calculating rate before averages are averages/percentages are calculated + // used to be a bug in Harvest 2.0 (Alpha, RC1, RC2) resulting in very high latency values + continue + } + + // For the next two properties we need base counters + // We assume that delta of base counters is already calculated + if base = curMat.GetMetric(counter.denominator); base == nil { + kp.Logger.Warn(). + Str("key", key). + Str("property", property). + Str("denominator", counter.denominator). + Int("instIndex", instIndex). + Msg("Base counter missing") + continue + } + + // remaining properties: average and percent + // + // AVERAGE - delta, divided by base-counter delta + // + // PERCENT - average * 100 + // special case for latency counter: apply minimum number of iops as threshold + if property == "average" || property == "percent" { + + if strings.HasSuffix(metric.GetName(), "latency") { + skips, err = curMat.DivideWithThreshold(key, counter.denominator, kp.perfProp.latencyIoReqd, cachedData, prevMat, timestampMetricName, kp.Logger) + } else { + skips, err = curMat.Divide(key, counter.denominator) + } + + if err != nil { + kp.Logger.Error().Err(err).Str("key", key).Msg("Division by base") + continue + } + totalSkips += skips + + if property == "average" { + continue + } + } + + if property == "percent" { + if skips, err = curMat.MultiplyByScalar(key, 100); err != nil { + kp.Logger.Error().Err(err).Str("key", key).Msg("Multiply by scalar") + } else { + totalSkips += skips + } + continue + } + // If we reach here then one of the earlier clauses should have executed `continue` statement + kp.Logger.Error().Err(err). + Str("key", key). + Str("property", property). + Int("instIndex", instIndex). + Msg("Unknown property") + } + + // calculate rates (which we deferred to calculate averages/percents first) + for i, metric := range orderedMetrics { + key := orderedKeys[i] + counter := counterMap[key] + if counter == nil { + kp.Logger.Warn().Str("counter", metric.GetName()).Msg("Counter is missing or unable to parse ") + continue + } + property := counter.counterType + if property == "rate" { + if skips, err = curMat.Divide(orderedKeys[i], timestampMetricName); err != nil { + kp.Logger.Error().Err(err). + Int("i", i). + Str("metric", metric.GetName()). + Str("key", orderedKeys[i]). + Int("instIndex", instIndex). + Msg("Calculate rate") + continue + } + totalSkips += skips + } + } + + calcD := time.Since(calcStart) + _ = kp.Metadata.LazySetValueUint64("instances", "data", uint64(len(curMat.GetInstances()))) + _ = kp.Metadata.LazySetValueInt64("calc_time", "data", calcD.Microseconds()) + _ = kp.Metadata.LazySetValueUint64("skips", "data", uint64(totalSkips)) + + // store cache for next poll + kp.Matrix[kp.Object] = cachedData + + newDataMap := make(map[string]*matrix.Matrix) + newDataMap[kp.Object] = curMat + return newDataMap, nil +} + +// Interface guards +var ( + _ collector.Collector = (*KeyPerf)(nil) +) diff --git a/cmd/collectors/keyperf/keyperf_test.go b/cmd/collectors/keyperf/keyperf_test.go new file mode 100644 index 000000000..5cf51530d --- /dev/null +++ b/cmd/collectors/keyperf/keyperf_test.go @@ -0,0 +1,213 @@ +package keyperf + +import ( + "fmt" + "github.com/netapp/harvest/v2/cmd/collectors" + "github.com/netapp/harvest/v2/cmd/poller/collector" + "github.com/netapp/harvest/v2/cmd/poller/options" + "github.com/netapp/harvest/v2/pkg/conf" + "github.com/netapp/harvest/v2/pkg/matrix" + "github.com/netapp/harvest/v2/pkg/tree" + "github.com/netapp/harvest/v2/pkg/tree/node" + "sort" + "testing" + "time" +) + +const ( + pollerName = "test" +) + +func TestPartialAggregationSequence(t *testing.T) { + conf.TestLoadHarvestConfig("testdata/config.yml") + kp := newKeyPerf("Volume", "volume.yaml") + + // First Poll + t.Log("Running First Poll") + kp.testPollInstanceAndDataWithMetrics(t, "testdata/partialAggregation/volume-poll-1.json", 0, 0) + + // Complete Poll + t.Log("Running Complete Poll") + kp.testPollInstanceAndDataWithMetrics(t, "testdata/partialAggregation/volume-poll-2.json", 4, 48) + + // Partial Poll + t.Log("Running Partial Poll") + kp.testPollInstanceAndDataWithMetrics(t, "testdata/partialAggregation/volume-poll-partial.json", 4, 36) + + // Partial Poll 2 + t.Log("Running Partial Poll 2") + kp.testPollInstanceAndDataWithMetrics(t, "testdata/partialAggregation/volume-poll-partial.json", 4, 36) + if t.Failed() { + t.Fatal("Partial Poll 2 failed") + } + + // First Complete Poll After Partial + t.Log("Running First Complete Poll After Partial") + kp.testPollInstanceAndDataWithMetrics(t, "testdata/partialAggregation/volume-poll-3.json", 4, 36) + if t.Failed() { + t.Fatal("First Complete Poll After Partial failed") + } + + // Second Complete Poll After Partial + t.Log("Running First Complete Poll After Partial") + kp.testPollInstanceAndDataWithMetrics(t, "testdata/partialAggregation/volume-poll-3.json", 4, 48) + if t.Failed() { + t.Fatal("First Complete Poll After Partial failed") + } + + // Partial Poll 3 + t.Log("Running Partial Poll 3") + kp.testPollInstanceAndDataWithMetrics(t, "testdata/partialAggregation/volume-poll-partial-2.json", 4, 36) + if t.Failed() { + t.Fatal("Partial Poll 3 failed") + } +} + +func (kp *KeyPerf) testPollInstanceAndDataWithMetrics(t *testing.T, pollDataFile string, expectedExportedInst, expectedExportedMetrics int) *matrix.Matrix { + // Additional logic to count metrics + pollData := collectors.JSONToGson(pollDataFile, true) + now := time.Now().Truncate(time.Second) + data, err := kp.pollData(now, pollData, nil) + if err != nil { + t.Fatal(err) + } + + totalMetrics := 0 + exportableInstance := 0 + mat := data[kp.Object] + if mat != nil { + for _, instance := range mat.GetInstances() { + if instance.IsExportable() { + exportableInstance++ + } + } + for _, met := range mat.GetMetrics() { + if !met.IsExportable() { + continue + } + records := met.GetRecords() + for _, v := range records { + if v { + totalMetrics++ + } + } + } + } + + if exportableInstance != expectedExportedInst { + t.Errorf("Exported instances got=%d, expected=%d", exportableInstance, expectedExportedInst) + } + + // Check if the total number of metrics matches the expected value + if totalMetrics != expectedExportedMetrics { + t.Errorf("Total metrics got=%d, expected=%d", totalMetrics, expectedExportedMetrics) + } + return mat +} + +func TestKeyPerf_pollData(t *testing.T) { + conf.TestLoadHarvestConfig("testdata/config.yml") + tests := []struct { + name string + wantErr bool + pollDataPath1 string + pollDataPath2 string + counter string + sum int64 + numInstances int + numMetrics int + record bool + }{ + { + name: "statistics.iops_raw.read", + counter: "statistics.iops_raw.read", + pollDataPath1: "testdata/volume-poll-1.json", + pollDataPath2: "testdata/volume-poll-2.json", + numInstances: 4, + numMetrics: 48, + sum: 4608, + record: true, + }, + { + name: "statistics.latency_raw.read", + counter: "statistics.latency_raw.read", + pollDataPath1: "testdata/volume-poll-1.json", + pollDataPath2: "testdata/volume-poll-2.json", + numInstances: 4, + numMetrics: 48, + sum: 1114, + record: true, + }, + { + name: "statistics.latency_raw.read", + counter: "statistics.latency_raw.read", + pollDataPath1: "testdata/missingStats/volume-poll-1.json", + pollDataPath2: "testdata/missingStats/volume-poll-2.json", + numInstances: 1, + numMetrics: 0, + sum: 0, + record: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + + kp := newKeyPerf("Volume", "volume.yaml") + // First poll data + kp.testPollInstanceAndDataWithMetrics(t, tt.pollDataPath1, 0, 0) + // Complete Poll + m := kp.testPollInstanceAndDataWithMetrics(t, tt.pollDataPath2, tt.numInstances, tt.numMetrics) + + var sum int64 + var names []string + for n := range m.GetInstances() { + names = append(names, n) + } + sort.Strings(names) + metric := m.GetMetric(tt.counter) + for _, name := range names { + i := m.GetInstance(name) + val, recorded := metric.GetValueInt64(i) + if recorded != tt.record { + t.Errorf("pollData() recorded got=%v, want=%v", recorded, tt.record) + } + sum += val + } + if sum != tt.sum { + t.Errorf("pollData() sum got=%v, want=%v", sum, tt.sum) + } + }) + } +} + +func newKeyPerf(object string, path string) *KeyPerf { + var err error + opts := options.New(options.WithConfPath("testdata/conf")) + opts.Poller = pollerName + opts.HomePath = "testdata" + opts.IsTest = true + + ac := collector.New("KeyPerf", object, opts, params(object, path), nil) + kp := KeyPerf{} + err = kp.Init(ac) + if err != nil { + panic(err) + } + return &kp +} + +func params(object string, path string) *node.Node { + yml := ` +schedule: + - counter: 9999h + - data: 9999h +objects: + %s: %s +` + yml = fmt.Sprintf(yml, object, path) + root, err := tree.LoadYaml([]byte(yml)) + if err != nil { + panic(err) + } + return root +} diff --git a/cmd/collectors/keyperf/testdata/conf/keyperf/9.15.0/volume.yaml b/cmd/collectors/keyperf/testdata/conf/keyperf/9.15.0/volume.yaml new file mode 100644 index 000000000..039099e4a --- /dev/null +++ b/cmd/collectors/keyperf/testdata/conf/keyperf/9.15.0/volume.yaml @@ -0,0 +1,32 @@ +name: Volume +query: api/storage/volumes +object: volume + +counters: + - ^^name => volume + - ^^svm.name => svm + - ^style => style + - ^statistics.status => status + - statistics.timestamp(timestamp) => timestamp + - statistics.latency_raw.other => other_latency + - statistics.latency_raw.total => total_latency + - statistics.latency_raw.read => read_latency + - statistics.latency_raw.write => write_latency + - statistics.iops_raw.other => other_ops + - statistics.iops_raw.total => total_ops + - statistics.iops_raw.read => read_ops + - statistics.iops_raw.write => write_ops + - statistics.throughput_raw.other => other_data + - statistics.throughput_raw.total => total_data + - statistics.throughput_raw.read => read_data + - statistics.throughput_raw.write => write_data + - hidden_fields: + - statistics + +export_options: + instance_keys: + - aggr + - node + - style + - svm + - volume \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/config.yml b/cmd/collectors/keyperf/testdata/config.yml new file mode 100644 index 000000000..28b2c7d77 --- /dev/null +++ b/cmd/collectors/keyperf/testdata/config.yml @@ -0,0 +1,14 @@ +Exporters: + prometheus: + exporter: Prometheus + port: 12990 + +Defaults: + collectors: + - KeyPerf + exporters: + - prometheus + +Pollers: + test: + addr: localhost \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/missingStats/volume-poll-1.json b/cmd/collectors/keyperf/testdata/missingStats/volume-poll-1.json new file mode 100644 index 000000000..43f8e7fb4 --- /dev/null +++ b/cmd/collectors/keyperf/testdata/missingStats/volume-poll-1.json @@ -0,0 +1,28 @@ +{ + "records": [ + { + "uuid": "02d42517-2777-11ed-8553-00a098d390f2", + "name": "astra_302_m1", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "astra_302" + }, + "_links": { + "self": { + "href": "/api/storage/volumes/02d42517-2777-11ed-8553-00a098d390f2" + } + } + } + ], + "num_records": 1, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&name=astra_302_m1" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/missingStats/volume-poll-2.json b/cmd/collectors/keyperf/testdata/missingStats/volume-poll-2.json new file mode 100644 index 000000000..43f8e7fb4 --- /dev/null +++ b/cmd/collectors/keyperf/testdata/missingStats/volume-poll-2.json @@ -0,0 +1,28 @@ +{ + "records": [ + { + "uuid": "02d42517-2777-11ed-8553-00a098d390f2", + "name": "astra_302_m1", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "astra_302" + }, + "_links": { + "self": { + "href": "/api/storage/volumes/02d42517-2777-11ed-8553-00a098d390f2" + } + } + } + ], + "num_records": 1, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&name=astra_302_m1" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-1.json b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-1.json new file mode 100644 index 000000000..4a2838252 --- /dev/null +++ b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-1.json @@ -0,0 +1,212 @@ +{ + "records": [ + { + "uuid": "03613247-54ac-4127-a512-afb331f1c207", + "name": "osc_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "latency_raw": { + "other": 1683212960, + "total": 1805863853967, + "read": 1792666552544, + "write": 11514088463 + }, + "iops_raw": { + "read": 421727167, + "write": 23780415, + "other": 7756631, + "total": 453264213 + }, + "throughput_raw": { + "read": 27610800432299, + "write": 16878751407, + "other": 0, + "total": 27627679183706 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/03613247-54ac-4127-a512-afb331f1c207" + } + } + }, + { + "uuid": "82f334bb-8b7a-11ed-86dd-00a098d390f2", + "name": "RahulTest", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + }, + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "latency_raw": { + "other": 39071916, + "total": 4392539949486, + "read": 4392500877570, + "write": 0 + }, + "iops_raw": { + "read": 13112828242, + "write": 0, + "other": 80387, + "total": 13112908629 + }, + "throughput_raw": { + "read": 859360898048009, + "write": 0, + "other": 0, + "total": 859360898048009 + }, + "cloud": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + }, + "flexcache_raw": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "cache_miss_blocks": 20020, + "client_requested_blocks": 219548327119 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/82f334bb-8b7a-11ed-86dd-00a098d390f2" + } + } + }, + { + "uuid": "90456bbd-48c9-44d9-8d56-6958d3703a6f", + "name": "osc_iscsi_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "latency_raw": { + "other": 3074372739, + "total": 581121720811, + "read": 23294132617, + "write": 554753215455 + }, + "iops_raw": { + "read": 51000601, + "write": 696233919, + "other": 9018329, + "total": 756252849 + }, + "throughput_raw": { + "read": 5227391056384, + "write": 5364299272704, + "other": 8458203800, + "total": 10600148532888 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/90456bbd-48c9-44d9-8d56-6958d3703a6f" + } + } + }, + { + "uuid": "d31cc5e3-fb3f-11ee-be42-00a098d390f2", + "name": "h_test", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "latency_raw": { + "other": 493, + "total": 3281143694823, + "read": 3281143453249, + "write": 241081 + }, + "iops_raw": { + "read": 7573155987, + "write": 507, + "other": 10, + "total": 7573156504 + }, + "throughput_raw": { + "read": 359800912150528, + "write": 2768896, + "other": 684, + "total": 359800914920108 + }, + "cloud": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/d31cc5e3-fb3f-11ee-be42-00a098d390f2" + } + } + } + ], + "num_records": 4, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&max_records=10&statistics.iops_raw.read=%3E0" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-2.json b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-2.json new file mode 100644 index 000000000..c7166e768 --- /dev/null +++ b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-2.json @@ -0,0 +1,212 @@ +{ + "records": [ + { + "uuid": "03613247-54ac-4127-a512-afb331f1c207", + "name": "osc_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 1683283363, + "total": 1806048040887, + "read": 1792850352683, + "write": 11514404841 + }, + "iops_raw": { + "read": 422149966, + "write": 23781083, + "other": 7756970, + "total": 453688019 + }, + "throughput_raw": { + "read": 27638505460121, + "write": 16878899187, + "other": 0, + "total": 27655384359308 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/03613247-54ac-4127-a512-afb331f1c207" + } + } + }, + { + "uuid": "82f334bb-8b7a-11ed-86dd-00a098d390f2", + "name": "RahulTest", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + }, + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 39071916, + "total": 4392539949486, + "read": 4392500877570, + "write": 0 + }, + "iops_raw": { + "read": 13112828242, + "write": 0, + "other": 80387, + "total": 13112908629 + }, + "throughput_raw": { + "read": 859360898048009, + "write": 0, + "other": 0, + "total": 859360898048009 + }, + "cloud": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + }, + "flexcache_raw": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "cache_miss_blocks": 20020, + "client_requested_blocks": 219548327119 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/82f334bb-8b7a-11ed-86dd-00a098d390f2" + } + } + }, + { + "uuid": "90456bbd-48c9-44d9-8d56-6958d3703a6f", + "name": "osc_iscsi_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 3074454451, + "total": 581135207905, + "read": 23294807146, + "write": 554765946308 + }, + "iops_raw": { + "read": 51002589, + "write": 696252014, + "other": 9018644, + "total": 756273247 + }, + "throughput_raw": { + "read": 5227527713280, + "write": 5364426738176, + "other": 8458447916, + "total": 10600412899372 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/90456bbd-48c9-44d9-8d56-6958d3703a6f" + } + } + }, + { + "uuid": "d31cc5e3-fb3f-11ee-be42-00a098d390f2", + "name": "h_test", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 493, + "total": 3281220636912, + "read": 3281220395338, + "write": 241081 + }, + "iops_raw": { + "read": 7573381119, + "write": 507, + "other": 10, + "total": 7573381636 + }, + "throughput_raw": { + "read": 359808936796160, + "write": 2768896, + "other": 684, + "total": 359808939565740 + }, + "cloud": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/d31cc5e3-fb3f-11ee-be42-00a098d390f2" + } + } + } + ], + "num_records": 4, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&max_records=10&statistics.iops_raw.read=%3E0" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-3.json b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-3.json new file mode 100644 index 000000000..68f2d471e --- /dev/null +++ b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-3.json @@ -0,0 +1,212 @@ +{ + "records": [ + { + "uuid": "03613247-54ac-4127-a512-afb331f1c207", + "name": "osc_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:30:51Z", + "status": "ok", + "latency_raw": { + "other": 1684141487, + "total": 1807757745878, + "read": 1794556883345, + "write": 11516721046 + }, + "iops_raw": { + "read": 423882268, + "write": 23785448, + "other": 7761115, + "total": 455428831 + }, + "throughput_raw": { + "read": 27751996757601, + "write": 16882341095, + "other": 0, + "total": 27768879098696 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/03613247-54ac-4127-a512-afb331f1c207" + } + } + }, + { + "uuid": "82f334bb-8b7a-11ed-86dd-00a098d390f2", + "name": "RahulTest", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + }, + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:30:51Z", + "status": "ok", + "latency_raw": { + "other": 39071916, + "total": 4392539949486, + "read": 4392500877570, + "write": 0 + }, + "iops_raw": { + "read": 13112828242, + "write": 0, + "other": 80387, + "total": 13112908629 + }, + "throughput_raw": { + "read": 859360898048009, + "write": 0, + "other": 0, + "total": 859360898048009 + }, + "cloud": { + "timestamp": "2024-07-29T16:30:51Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + }, + "flexcache_raw": { + "timestamp": "2024-07-29T16:30:51Z", + "status": "ok", + "cache_miss_blocks": 20020, + "client_requested_blocks": 219548327119 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/82f334bb-8b7a-11ed-86dd-00a098d390f2" + } + } + }, + { + "uuid": "90456bbd-48c9-44d9-8d56-6958d3703a6f", + "name": "osc_iscsi_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:30:51Z", + "status": "ok", + "latency_raw": { + "other": 3074945849, + "total": 581232318721, + "read": 23300556457, + "write": 554856816415 + }, + "iops_raw": { + "read": 51021413, + "write": 696367528, + "other": 9020351, + "total": 756409292 + }, + "throughput_raw": { + "read": 5228845197312, + "write": 5365371138560, + "other": 8459859538, + "total": 10602676195410 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/90456bbd-48c9-44d9-8d56-6958d3703a6f" + } + } + }, + { + "uuid": "d31cc5e3-fb3f-11ee-be42-00a098d390f2", + "name": "h_test", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:30:51Z", + "status": "ok", + "latency_raw": { + "other": 493, + "total": 3281717070006, + "read": 3281716828432, + "write": 241081 + }, + "iops_raw": { + "read": 7574900450, + "write": 507, + "other": 10, + "total": 7574900967 + }, + "throughput_raw": { + "read": 359851745419264, + "write": 2768896, + "other": 684, + "total": 359851748188844 + }, + "cloud": { + "timestamp": "2024-07-29T16:30:51Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/d31cc5e3-fb3f-11ee-be42-00a098d390f2" + } + } + } + ], + "num_records": 4, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&max_records=10&statistics.iops_raw.read=%3E0" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-partial-2.json b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-partial-2.json new file mode 100644 index 000000000..8cb9195ad --- /dev/null +++ b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-partial-2.json @@ -0,0 +1,212 @@ +{ + "records": [ + { + "uuid": "03613247-54ac-4127-a512-afb331f1c207", + "name": "osc_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:54:13Z", + "status": "partial_other_error", + "latency_raw": { + "other": 1685256890, + "total": 1814115207916, + "read": 1800910140152, + "write": 11519810874 + }, + "iops_raw": { + "read": 424703714, + "write": 23792206, + "other": 7767020, + "total": 456262940 + }, + "throughput_raw": { + "read": 27805777044950, + "write": 16884015127, + "other": 0, + "total": 27822661060077 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/03613247-54ac-4127-a512-afb331f1c207" + } + } + }, + { + "uuid": "82f334bb-8b7a-11ed-86dd-00a098d390f2", + "name": "RahulTest", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + }, + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:54:13Z", + "status": "ok", + "latency_raw": { + "other": 39071916, + "total": 4392539949486, + "read": 4392500877570, + "write": 0 + }, + "iops_raw": { + "read": 13112828242, + "write": 0, + "other": 80387, + "total": 13112908629 + }, + "throughput_raw": { + "read": 859360898048009, + "write": 0, + "other": 0, + "total": 859360898048009 + }, + "cloud": { + "timestamp": "2024-07-29T16:54:13Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + }, + "flexcache_raw": { + "timestamp": "2024-07-29T16:54:13Z", + "status": "ok", + "cache_miss_blocks": 20020, + "client_requested_blocks": 219548327119 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/82f334bb-8b7a-11ed-86dd-00a098d390f2" + } + } + }, + { + "uuid": "90456bbd-48c9-44d9-8d56-6958d3703a6f", + "name": "osc_iscsi_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:54:13Z", + "status": "ok", + "latency_raw": { + "other": 3075793810, + "total": 581374580529, + "read": 23310107535, + "write": 554988679184 + }, + "iops_raw": { + "read": 51051999, + "write": 696537806, + "other": 9023304, + "total": 756613109 + }, + "throughput_raw": { + "read": 5231079877632, + "write": 5366711808000, + "other": 8462283360, + "total": 10606253968992 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/90456bbd-48c9-44d9-8d56-6958d3703a6f" + } + } + }, + { + "uuid": "d31cc5e3-fb3f-11ee-be42-00a098d390f2", + "name": "h_test", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:54:13Z", + "status": "ok", + "latency_raw": { + "other": 493, + "total": 3282649976359, + "read": 3282649734785, + "write": 241081 + }, + "iops_raw": { + "read": 7578559232, + "write": 507, + "other": 10, + "total": 7578559749 + }, + "throughput_raw": { + "read": 359896897355776, + "write": 2768896, + "other": 684, + "total": 359896900125356 + }, + "cloud": { + "timestamp": "2024-07-29T16:54:13Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/d31cc5e3-fb3f-11ee-be42-00a098d390f2" + } + } + } + ], + "num_records": 4, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&max_records=10&statistics.iops_raw.read=%3E0" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-partial.json b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-partial.json new file mode 100644 index 000000000..fef83a847 --- /dev/null +++ b/cmd/collectors/keyperf/testdata/partialAggregation/volume-poll-partial.json @@ -0,0 +1,212 @@ +{ + "records": [ + { + "uuid": "03613247-54ac-4127-a512-afb331f1c207", + "name": "osc_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "partial_other_error", + "latency_raw": { + "other": 1683283363, + "total": 1806048040887, + "read": 1792850352683, + "write": 11514404841 + }, + "iops_raw": { + "read": 422149966, + "write": 23781083, + "other": 7756970, + "total": 453688019 + }, + "throughput_raw": { + "read": 27638505460121, + "write": 16878899187, + "other": 0, + "total": 27655384359308 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/03613247-54ac-4127-a512-afb331f1c207" + } + } + }, + { + "uuid": "82f334bb-8b7a-11ed-86dd-00a098d390f2", + "name": "RahulTest", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + }, + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 39071916, + "total": 4392539949486, + "read": 4392500877570, + "write": 0 + }, + "iops_raw": { + "read": 13112828242, + "write": 0, + "other": 80387, + "total": 13112908629 + }, + "throughput_raw": { + "read": 859360898048009, + "write": 0, + "other": 0, + "total": 859360898048009 + }, + "cloud": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + }, + "flexcache_raw": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "cache_miss_blocks": 20020, + "client_requested_blocks": 219548327119 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/82f334bb-8b7a-11ed-86dd-00a098d390f2" + } + } + }, + { + "uuid": "90456bbd-48c9-44d9-8d56-6958d3703a6f", + "name": "osc_iscsi_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 3074454451, + "total": 581135207905, + "read": 23294807146, + "write": 554765946308 + }, + "iops_raw": { + "read": 51002589, + "write": 696252014, + "other": 9018644, + "total": 756273247 + }, + "throughput_raw": { + "read": 5227527713280, + "write": 5364426738176, + "other": 8458447916, + "total": 10600412899372 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/90456bbd-48c9-44d9-8d56-6958d3703a6f" + } + } + }, + { + "uuid": "d31cc5e3-fb3f-11ee-be42-00a098d390f2", + "name": "h_test", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 493, + "total": 3281220636912, + "read": 3281220395338, + "write": 241081 + }, + "iops_raw": { + "read": 7573381119, + "write": 507, + "other": 10, + "total": 7573381636 + }, + "throughput_raw": { + "read": 359808936796160, + "write": 2768896, + "other": 684, + "total": 359808939565740 + }, + "cloud": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/d31cc5e3-fb3f-11ee-be42-00a098d390f2" + } + } + } + ], + "num_records": 4, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&max_records=10&statistics.iops_raw.read=%3E0" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/volume-poll-1.json b/cmd/collectors/keyperf/testdata/volume-poll-1.json new file mode 100644 index 000000000..4a2838252 --- /dev/null +++ b/cmd/collectors/keyperf/testdata/volume-poll-1.json @@ -0,0 +1,212 @@ +{ + "records": [ + { + "uuid": "03613247-54ac-4127-a512-afb331f1c207", + "name": "osc_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "latency_raw": { + "other": 1683212960, + "total": 1805863853967, + "read": 1792666552544, + "write": 11514088463 + }, + "iops_raw": { + "read": 421727167, + "write": 23780415, + "other": 7756631, + "total": 453264213 + }, + "throughput_raw": { + "read": 27610800432299, + "write": 16878751407, + "other": 0, + "total": 27627679183706 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/03613247-54ac-4127-a512-afb331f1c207" + } + } + }, + { + "uuid": "82f334bb-8b7a-11ed-86dd-00a098d390f2", + "name": "RahulTest", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + }, + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "latency_raw": { + "other": 39071916, + "total": 4392539949486, + "read": 4392500877570, + "write": 0 + }, + "iops_raw": { + "read": 13112828242, + "write": 0, + "other": 80387, + "total": 13112908629 + }, + "throughput_raw": { + "read": 859360898048009, + "write": 0, + "other": 0, + "total": 859360898048009 + }, + "cloud": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + }, + "flexcache_raw": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "cache_miss_blocks": 20020, + "client_requested_blocks": 219548327119 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/82f334bb-8b7a-11ed-86dd-00a098d390f2" + } + } + }, + { + "uuid": "90456bbd-48c9-44d9-8d56-6958d3703a6f", + "name": "osc_iscsi_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "latency_raw": { + "other": 3074372739, + "total": 581121720811, + "read": 23294132617, + "write": 554753215455 + }, + "iops_raw": { + "read": 51000601, + "write": 696233919, + "other": 9018329, + "total": 756252849 + }, + "throughput_raw": { + "read": 5227391056384, + "write": 5364299272704, + "other": 8458203800, + "total": 10600148532888 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/90456bbd-48c9-44d9-8d56-6958d3703a6f" + } + } + }, + { + "uuid": "d31cc5e3-fb3f-11ee-be42-00a098d390f2", + "name": "h_test", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "latency_raw": { + "other": 493, + "total": 3281143694823, + "read": 3281143453249, + "write": 241081 + }, + "iops_raw": { + "read": 7573155987, + "write": 507, + "other": 10, + "total": 7573156504 + }, + "throughput_raw": { + "read": 359800912150528, + "write": 2768896, + "other": 684, + "total": 359800914920108 + }, + "cloud": { + "timestamp": "2024-07-29T16:14:56Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/d31cc5e3-fb3f-11ee-be42-00a098d390f2" + } + } + } + ], + "num_records": 4, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&max_records=10&statistics.iops_raw.read=%3E0" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/keyperf/testdata/volume-poll-2.json b/cmd/collectors/keyperf/testdata/volume-poll-2.json new file mode 100644 index 000000000..c7166e768 --- /dev/null +++ b/cmd/collectors/keyperf/testdata/volume-poll-2.json @@ -0,0 +1,212 @@ +{ + "records": [ + { + "uuid": "03613247-54ac-4127-a512-afb331f1c207", + "name": "osc_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 1683283363, + "total": 1806048040887, + "read": 1792850352683, + "write": 11514404841 + }, + "iops_raw": { + "read": 422149966, + "write": 23781083, + "other": 7756970, + "total": 453688019 + }, + "throughput_raw": { + "read": 27638505460121, + "write": 16878899187, + "other": 0, + "total": 27655384359308 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/03613247-54ac-4127-a512-afb331f1c207" + } + } + }, + { + "uuid": "82f334bb-8b7a-11ed-86dd-00a098d390f2", + "name": "RahulTest", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + }, + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 39071916, + "total": 4392539949486, + "read": 4392500877570, + "write": 0 + }, + "iops_raw": { + "read": 13112828242, + "write": 0, + "other": 80387, + "total": 13112908629 + }, + "throughput_raw": { + "read": 859360898048009, + "write": 0, + "other": 0, + "total": 859360898048009 + }, + "cloud": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + }, + "flexcache_raw": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "cache_miss_blocks": 20020, + "client_requested_blocks": 219548327119 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/82f334bb-8b7a-11ed-86dd-00a098d390f2" + } + } + }, + { + "uuid": "90456bbd-48c9-44d9-8d56-6958d3703a6f", + "name": "osc_iscsi_vol01", + "aggregates": [ + { + "name": "umeng_aff300_aggr2", + "uuid": "3e59547d-298a-4967-bd0f-8ae96cead08c" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 3074454451, + "total": 581135207905, + "read": 23294807146, + "write": 554765946308 + }, + "iops_raw": { + "read": 51002589, + "write": 696252014, + "other": 9018644, + "total": 756273247 + }, + "throughput_raw": { + "read": 5227527713280, + "write": 5364426738176, + "other": 8458447916, + "total": 10600412899372 + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/90456bbd-48c9-44d9-8d56-6958d3703a6f" + } + } + }, + { + "uuid": "d31cc5e3-fb3f-11ee-be42-00a098d390f2", + "name": "h_test", + "aggregates": [ + { + "name": "test1", + "uuid": "c1931ba8-bb35-4b12-84dc-1e0643487144" + } + ], + "svm": { + "name": "osc" + }, + "statistics": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "latency_raw": { + "other": 493, + "total": 3281220636912, + "read": 3281220395338, + "write": 241081 + }, + "iops_raw": { + "read": 7573381119, + "write": 507, + "other": 10, + "total": 7573381636 + }, + "throughput_raw": { + "read": 359808936796160, + "write": 2768896, + "other": 684, + "total": 359808939565740 + }, + "cloud": { + "timestamp": "2024-07-29T16:17:17Z", + "status": "ok", + "iops_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + }, + "latency_raw": { + "read": 0, + "write": 0, + "other": 0, + "total": 0 + } + } + }, + "_links": { + "self": { + "href": "/api/storage/volumes/d31cc5e3-fb3f-11ee-be42-00a098d390f2" + } + } + } + ], + "num_records": 4, + "_links": { + "self": { + "href": "/api/storage/volumes?return_records=true&fields=uuid,statistics,svm.name,aggregates&max_records=10&statistics.iops_raw.read=%3E0" + } + } +} \ No newline at end of file diff --git a/cmd/collectors/rest/rest.go b/cmd/collectors/rest/rest.go index 5afd7e6b2..2ff810d65 100644 --- a/cmd/collectors/rest/rest.go +++ b/cmd/collectors/rest/rest.go @@ -51,11 +51,11 @@ type Rest struct { *collector.AbstractCollector Client *rest.Client Prop *prop - endpoints []*endPoint + endpoints []*EndPoint isIgnoreUnknownFieldsEnabled bool } -type endPoint struct { +type EndPoint struct { prop *prop name string } @@ -94,7 +94,7 @@ func (r *Rest) HarvestModule() plugin.ModuleInfo { } } -func (r *Rest) query(p *endPoint) string { +func (r *Rest) query(p *EndPoint) string { return p.prop.Query } @@ -132,7 +132,7 @@ func (r *Rest) Fields(prop *prop) []string { return fields } -func (r *Rest) filter(p *endPoint) []string { +func (r *Rest) filter(p *EndPoint) []string { return p.prop.Filter } @@ -154,7 +154,7 @@ func (r *Rest) Init(a *collector.AbstractCollector) error { r.InitVars(a.Params) - if err := r.initEndPoints(); err != nil { + if err := r.InitEndPoints(); err != nil { return err } @@ -258,14 +258,14 @@ func (r *Rest) getClient(a *collector.AbstractCollector, c *auth.Credentials) (* return client, err } -func (r *Rest) initEndPoints() error { +func (r *Rest) InitEndPoints() error { endpoints := r.Params.GetChildS("endpoints") if endpoints != nil { for _, line := range endpoints.GetChildren() { n := line.GetNameS() - e := endPoint{name: n} + e := EndPoint{name: n} p := prop{} @@ -403,15 +403,15 @@ func (r *Rest) PollData() (map[string]*matrix.Matrix, error) { return nil, errs.New(errs.ErrNoInstance, "no "+r.Object+" instances on cluster") } - return r.pollData(startTime, records, func(e *endPoint) ([]gjson.Result, time.Duration, error) { - return r.processEndPoint(e) + return r.pollData(startTime, records, func(e *EndPoint) ([]gjson.Result, time.Duration, error) { + return r.ProcessEndPoint(e) }) } func (r *Rest) pollData( startTime time.Time, records []gjson.Result, - endpointFunc func(e *endPoint) ([]gjson.Result, time.Duration, error), + endpointFunc func(e *EndPoint) ([]gjson.Result, time.Duration, error), ) (map[string]*matrix.Matrix, error) { var ( @@ -421,11 +421,12 @@ func (r *Rest) pollData( apiD = time.Since(startTime) startTime = time.Now() + mat := r.Matrix[r.Object] - count = r.HandleResults(records, r.Prop, false) + count, _ = r.HandleResults(mat, records, r.Prop, false) // process endpoints - eCount, endpointAPID := r.processEndPoints(endpointFunc) + eCount, endpointAPID := r.ProcessEndPoints(mat, endpointFunc) count += eCount parseD = time.Since(startTime) @@ -443,7 +444,7 @@ func (r *Rest) pollData( return r.Matrix, nil } -func (r *Rest) processEndPoint(e *endPoint) ([]gjson.Result, time.Duration, error) { +func (r *Rest) ProcessEndPoint(e *EndPoint) ([]gjson.Result, time.Duration, error) { now := time.Now() data, err := r.GetRestData(e.prop.Href) if err != nil { @@ -452,7 +453,7 @@ func (r *Rest) processEndPoint(e *endPoint) ([]gjson.Result, time.Duration, erro return data, time.Since(now), nil } -func (r *Rest) processEndPoints(endpointFunc func(e *endPoint) ([]gjson.Result, time.Duration, error)) (uint64, time.Duration) { +func (r *Rest) ProcessEndPoints(mat *matrix.Matrix, endpointFunc func(e *EndPoint) ([]gjson.Result, time.Duration, error)) (uint64, time.Duration) { var ( err error count uint64 @@ -477,7 +478,7 @@ func (r *Rest) processEndPoints(endpointFunc func(e *endPoint) ([]gjson.Result, r.Logger.Debug().Str("APIPath", endpoint.prop.Query).Msg("no instances on cluster") continue } - count = r.HandleResults(records, endpoint.prop, true) + count, _ = r.HandleResults(mat, records, endpoint.prop, true) } return count, totalAPID @@ -531,15 +532,15 @@ func (r *Rest) LoadPlugin(kind string, abc *plugin.AbstractPlugin) plugin.Plugin // HandleResults function is used for handling the rest response for parent as well as endpoints calls, // isEndPoint would be true only for the endpoint call, and it can't create/delete instance. -func (r *Rest) HandleResults(result []gjson.Result, prop *prop, isEndPoint bool) uint64 { +func (r *Rest) HandleResults(mat *matrix.Matrix, result []gjson.Result, prop *prop, isEndPoint bool) (uint64, uint64) { var ( - err error - count uint64 + err error + count uint64 + numPartials uint64 ) oldInstances := set.New() currentInstances := set.New() - mat := r.Matrix[r.Object] // copy keys of current instances. This is used to remove deleted instances from matrix later for key := range mat.GetInstances() { @@ -616,6 +617,16 @@ func (r *Rest) HandleResults(result []gjson.Result, prop *prop, isEndPoint bool) } } + // This is relevant for the KeyPerf collector. + // If the `statistics.status` is not OK, then set `partial` to true. + if mat.UUID == "KeyPerf" { + status := instanceData.Get("statistics.status") + if status.Exists() && status.String() != "ok" { + instance.SetPartial(true) + numPartials++ + } + } + for _, metric := range prop.Metrics { metr, ok := mat.GetMetrics()[metric.Name] if !ok { @@ -661,7 +672,7 @@ func (r *Rest) HandleResults(result []gjson.Result, prop *prop, isEndPoint bool) } } - return count + return count, numPartials } func (r *Rest) GetRestData(href string) ([]gjson.Result, error) { diff --git a/cmd/collectors/rest/rest_test.go b/cmd/collectors/rest/rest_test.go index e219d647e..6251cbe39 100644 --- a/cmd/collectors/rest/rest_test.go +++ b/cmd/collectors/rest/rest_test.go @@ -142,7 +142,7 @@ func Test_pollDataVolume(t *testing.T) { } } -func volumeEndpoints(e *endPoint) ([]gjson.Result, time.Duration, error) { +func volumeEndpoints(e *EndPoint) ([]gjson.Result, time.Duration, error) { path := "testdata/" + strings.ReplaceAll(e.prop.Query, "/", "-") + ".json.gz" gson := collectors.JSONToGson(path, true) return gson, 0, nil diff --git a/cmd/collectors/restperf/restperf.go b/cmd/collectors/restperf/restperf.go index b552921fe..b0f210ed5 100644 --- a/cmd/collectors/restperf/restperf.go +++ b/cmd/collectors/restperf/restperf.go @@ -38,6 +38,7 @@ const ( arrayKeyToken = "#" objWorkloadClass = "user_defined|system_defined" objWorkloadVolumeClass = "autovolume" + timestampMetricName = "timestamp" ) var ( @@ -389,8 +390,8 @@ func (r *RestPerf) pollCounter(records []gjson.Result, apiD time.Duration) (map[ // Create an artificial metric to hold timestamp of each instance data. // The reason we don't keep a single timestamp for the whole data // is because we might get instances in different batches - if mat.GetMetric("timestamp") == nil { - m, err := mat.NewMetricFloat64("timestamp") + if mat.GetMetric(timestampMetricName) == nil { + m, err := mat.NewMetricFloat64(timestampMetricName) if err != nil { r.Logger.Error().Err(err).Msg("add timestamp metric") } @@ -694,7 +695,7 @@ func (r *RestPerf) PollData() (map[string]*matrix.Matrix, error) { return nil, errs.New(errs.ErrNoInstance, "no "+r.Object+" instances fetched in PollInstance") } - timestamp := r.Matrix[r.Object].GetMetric("timestamp") + timestamp := r.Matrix[r.Object].GetMetric(timestampMetricName) if timestamp == nil { return nil, errs.New(errs.ErrConfig, "missing timestamp metric") } @@ -1069,7 +1070,7 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) r.Logger.Warn().Str("counter", name).Msg("Counter is missing or unable to parse.") } } - if err = curMat.GetMetric("timestamp").SetValueFloat64(instance, ts); err != nil { + if err = curMat.GetMetric(timestampMetricName).SetValueFloat64(instance, ts); err != nil { r.Logger.Error().Err(err).Msg("Failed to set timestamp") } @@ -1115,7 +1116,7 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) orderedDenominatorKeys := make([]string, 0, len(orderedDenominatorMetrics)) for key, metric := range curMat.GetMetrics() { - if metric.GetName() != "timestamp" && metric.Buckets() == nil { + if metric.GetName() != timestampMetricName && metric.Buckets() == nil { counter := r.counterLookup(metric, key) if counter != nil { if counter.denominator == "" { @@ -1215,7 +1216,7 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) if property == "average" || property == "percent" { if strings.HasSuffix(metric.GetName(), "latency") { - skips, err = curMat.DivideWithThreshold(key, counter.denominator, r.perfProp.latencyIoReqd, cachedData, prevMat, r.Logger) + skips, err = curMat.DivideWithThreshold(key, counter.denominator, r.perfProp.latencyIoReqd, cachedData, prevMat, timestampMetricName, r.Logger) } else { skips, err = curMat.Divide(key, counter.denominator) } @@ -1254,7 +1255,7 @@ func (r *RestPerf) pollData(startTime time.Time, perfRecords []rest.PerfRecord) if counter != nil { property := counter.counterType if property == "rate" { - if skips, err = curMat.Divide(orderedKeys[i], "timestamp"); err != nil { + if skips, err = curMat.Divide(orderedKeys[i], timestampMetricName); err != nil { r.Logger.Error().Err(err). Int("i", i). Str("metric", metric.GetName()). diff --git a/cmd/collectors/zapiperf/zapiperf.go b/cmd/collectors/zapiperf/zapiperf.go index 1a57fba81..f5cbd2382 100644 --- a/cmd/collectors/zapiperf/zapiperf.go +++ b/cmd/collectors/zapiperf/zapiperf.go @@ -67,6 +67,7 @@ const ( objWorkloadClass = "user_defined|system_defined" objWorkloadVolumeClass = "autovolume" BILLION = 1_000_000_000 + timestampMetricName = "timestamp" ) var workloadDetailMetrics = []string{"resource_latency"} @@ -396,7 +397,7 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { curMat := prevMat.Clone(matrix.With{Data: false, Metrics: true, Instances: true, ExportInstances: false}) curMat.Reset() - timestamp := curMat.GetMetric("timestamp") + timestamp := curMat.GetMetric(timestampMetricName) if timestamp == nil { return nil, errs.New(errs.ErrConfig, "missing timestamp metric") // @TODO errconfig?? } @@ -775,7 +776,7 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { // calculate timestamp delta first since many counters require it for postprocessing. // Timestamp has "raw" property, so it isn't post-processed automatically - if _, err = curMat.Delta("timestamp", prevMat, z.Logger); err != nil { + if _, err = curMat.Delta(timestampMetricName, prevMat, z.Logger); err != nil { z.Logger.Error().Err(err).Msg("(timestamp) calculate delta:") // @TODO terminate since other counters will be incorrect } @@ -842,7 +843,7 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { if property == "average" || property == "percent" { if strings.HasSuffix(metric.GetName(), "latency") { - skips, err = curMat.DivideWithThreshold(key, metric.GetComment(), z.latencyIoReqd, cachedData, prevMat, z.Logger) + skips, err = curMat.DivideWithThreshold(key, metric.GetComment(), z.latencyIoReqd, cachedData, prevMat, timestampMetricName, z.Logger) } else { skips, err = curMat.Divide(key, metric.GetComment()) } @@ -874,7 +875,7 @@ func (z *ZapiPerf) PollData() (map[string]*matrix.Matrix, error) { // calculate rates (which we deferred to calculate averages/percents first) for i, metric := range orderedMetrics { if metric.GetProperty() == "rate" { - if skips, err = curMat.Divide(orderedKeys[i], "timestamp"); err != nil { + if skips, err = curMat.Divide(orderedKeys[i], timestampMetricName); err != nil { z.Logger.Error().Err(err). Int("i", i). Str("key", orderedKeys[i]). @@ -1185,8 +1186,8 @@ func (z *ZapiPerf) PollCounter() (map[string]*matrix.Matrix, error) { // Create an artificial metric to hold timestamp of each instance data. // The reason we don't keep a single timestamp for the whole data // is because we might get instances in different batches - if !oldMetrics.Has("timestamp") { - m, err := mat.NewMetricFloat64("timestamp") + if !oldMetrics.Has(timestampMetricName) { + m, err := mat.NewMetricFloat64(timestampMetricName) if err != nil { z.Logger.Error().Err(err).Msg("add timestamp metric") } @@ -1275,7 +1276,7 @@ func (z *ZapiPerf) PollCounter() (map[string]*matrix.Matrix, error) { for key := range oldMetrics.Iter() { // temporary fix: prevent removing array counters // @TODO - if key != "timestamp" && !strings.Contains(key, ".") { + if key != timestampMetricName && !strings.Contains(key, ".") { mat.RemoveMetric(key) z.Logger.Debug().Msgf("removed metric [%s]", key) } diff --git a/cmd/poller/poller.go b/cmd/poller/poller.go index a9221d670..be0f8f81b 100644 --- a/cmd/poller/poller.go +++ b/cmd/poller/poller.go @@ -31,6 +31,7 @@ import ( "errors" "fmt" _ "github.com/netapp/harvest/v2/cmd/collectors/ems" + _ "github.com/netapp/harvest/v2/cmd/collectors/keyperf" _ "github.com/netapp/harvest/v2/cmd/collectors/restperf" _ "github.com/netapp/harvest/v2/cmd/collectors/simple" _ "github.com/netapp/harvest/v2/cmd/collectors/storagegrid" diff --git a/conf/keyperf/9.15.0/volume.yaml b/conf/keyperf/9.15.0/volume.yaml new file mode 100644 index 000000000..d1eeee28c --- /dev/null +++ b/conf/keyperf/9.15.0/volume.yaml @@ -0,0 +1,46 @@ +name: Volume +query: api/storage/volumes +object: volume + +counters: + - ^^name => volume + - ^^svm.name => svm + - ^style => style + - ^statistics.status => status + - statistics.timestamp(timestamp) => timestamp + - statistics.latency_raw.other => other_latency + - statistics.latency_raw.total => total_latency + - statistics.latency_raw.read => read_latency + - statistics.latency_raw.write => write_latency + - statistics.iops_raw.other => other_ops + - statistics.iops_raw.total => total_ops + - statistics.iops_raw.read => read_ops + - statistics.iops_raw.write => write_ops + - statistics.throughput_raw.other => other_data + - statistics.throughput_raw.total => total_data + - statistics.throughput_raw.read => read_data + - statistics.throughput_raw.write => write_data + - hidden_fields: + - statistics + +endpoints: + - query: api/private/cli/volume + counters: + - ^^volume => volume + - ^^vserver => svm + - ^aggr_list => aggr + - ^nodes => node + +plugins: + - Aggregator: + # plugin will create summary/average for each object + # any names after the object names will be treated as label names that will be added to instances + - node + +export_options: + instance_keys: + - aggr + - node + - style + - svm + - volume \ No newline at end of file diff --git a/conf/keyperf/default.yaml b/conf/keyperf/default.yaml new file mode 100644 index 000000000..8138417ad --- /dev/null +++ b/conf/keyperf/default.yaml @@ -0,0 +1,9 @@ +collector: KeyPerf + +# Order here matters! +schedule: + - counter: 24h + - data: 1m + +objects: + Volume: volume.yaml \ No newline at end of file diff --git a/pkg/matrix/matrix.go b/pkg/matrix/matrix.go index 8f20f6bf0..1881a5db1 100644 --- a/pkg/matrix/matrix.go +++ b/pkg/matrix/matrix.go @@ -441,7 +441,7 @@ func (m *Matrix) Divide(metricKey string, baseKey string) (int, error) { } // DivideWithThreshold applicable for latency counters -func (m *Matrix) DivideWithThreshold(metricKey string, baseKey string, threshold int, curRawMat *Matrix, prevRawMat *Matrix, logger *logging.Logger) (int, error) { +func (m *Matrix) DivideWithThreshold(metricKey string, baseKey string, threshold int, curRawMat *Matrix, prevRawMat *Matrix, timestampMetricName string, logger *logging.Logger) (int, error) { var skips int x := float64(threshold) curRawMetric := curRawMat.GetMetric(metricKey) @@ -450,7 +450,7 @@ func (m *Matrix) DivideWithThreshold(metricKey string, baseKey string, threshold prevBaseRawMetric := prevRawMat.GetMetric(baseKey) metric := m.GetMetric(metricKey) base := m.GetMetric(baseKey) - time := m.GetMetric("timestamp") + time := m.GetMetric(timestampMetricName) var tValues []float64 if time != nil { tValues = time.values diff --git a/pkg/matrix/metric_test.go b/pkg/matrix/metric_test.go index 75a4a1336..495ddd407 100644 --- a/pkg/matrix/metric_test.go +++ b/pkg/matrix/metric_test.go @@ -284,7 +284,7 @@ func TestMetricFloat64_DivideWithThreshold(t *testing.T) { } } - skips, err := curMat.DivideWithThreshold(latency, "total_ops", tt.threshold, cachedData, prevMat, logging.Get()) + skips, err := curMat.DivideWithThreshold(latency, "total_ops", tt.threshold, cachedData, prevMat, "timestamp", logging.Get()) matrixTestAdv(t, tt, curMat, skips, err, latency) }) } diff --git a/pkg/util/util.go b/pkg/util/util.go index bae63fb10..fb4f8ad97 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -31,6 +31,7 @@ var IsCollector = map[string]struct{}{ "Zapi": {}, "Rest": {}, "RestPerf": {}, + "KeyPerf": {}, "Ems": {}, "StorageGrid": {}, "Unix": {},