diff --git a/pkg/monitoring/metrics/forklift-controller/metrics.go b/pkg/monitoring/metrics/forklift-controller/metrics.go index f0c009a77..9146fd0b9 100644 --- a/pkg/monitoring/metrics/forklift-controller/metrics.go +++ b/pkg/monitoring/metrics/forklift-controller/metrics.go @@ -8,7 +8,6 @@ import ( const ( Succeeded = "Succeeded" Failed = "Failed" - Executing = "Executing" Running = "Running" Pending = "Pending" Canceled = "Canceled" @@ -22,12 +21,12 @@ const ( ) var ( - // 'status' - [ Succeeded, Failed, Executing, Canceled] + // 'status' - [ Succeeded, Failed, Canceled] // 'provider' - [oVirt, VSphere, Openstack, OVA, Openshift] // 'mode' - [Cold, Warm] // 'target' - [Local, Remote] - migrationStatusGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Name: "mtv_migrations_status", + migrationStatusCounter = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "mtv_migrations_status_total", Help: "VM Migrations sorted by status, provider, mode and destination", }, []string{ @@ -82,13 +81,13 @@ var ( }, ) - // 'status' - [ Succeeded, Failed, Executing, Canceled] + // 'status' - [ Succeeded, Failed, Canceled] // 'provider' - [oVirt, VSphere, Openstack, OVA, Openshift] // 'mode' - [Cold, Warm] // 'target' - [Local, Remote] // 'plan' - [Id] - migrationPlanCorrelationStatusGauge = promauto.NewGaugeVec(prometheus.GaugeOpts{ - Name: "mtv_workload_migrations_status", + migrationPlanCorrelationStatusCounter = promauto.NewCounterVec(prometheus.CounterOpts{ + Name: "mtv_workload_migrations_status_total", Help: "VM Migrations status by provider, mode, destination and plan", }, []string{ diff --git a/pkg/monitoring/metrics/forklift-controller/migration_metrics.go b/pkg/monitoring/metrics/forklift-controller/migration_metrics.go index 23252ede0..b30530d9e 100644 --- a/pkg/monitoring/metrics/forklift-controller/migration_metrics.go +++ b/pkg/monitoring/metrics/forklift-controller/migration_metrics.go @@ -3,7 +3,6 @@ package forklift_controller import ( "context" "fmt" - "strings" "time" api "github.com/konveyor/forklift-controller/pkg/apis/forklift/v1beta1" @@ -11,7 +10,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -var processedSucceededMigrations = make(map[string]struct{}) +var ( + processedSucceededMigrations = make(map[string]struct{}) + processedFailedMigrations = make(map[string]struct{}) + processedCanceledMigrations = make(map[string]struct{}) +) // Calculate Migrations metrics every 10 seconds func RecordMigrationMetrics(c client.Client) { @@ -29,9 +32,6 @@ func RecordMigrationMetrics(c client.Client) { continue } - // Initialize or reset the counter map at the beginning of each iteration - counterMap := make(map[string]float64) - for _, m := range migrations.Items { plan := api.Plan{} err := c.Get(context.TODO(), client.ObjectKey{Namespace: m.Spec.Plan.Namespace, Name: m.Spec.Plan.Name}, &plan) @@ -54,7 +54,7 @@ func RecordMigrationMetrics(c client.Client) { isLocal := destProvider.Spec.URL == "" isWarm := plan.Spec.Warm - var target, mode, key string + var target, mode string if isLocal { target = Local } else { @@ -67,53 +67,64 @@ func RecordMigrationMetrics(c client.Client) { } provider := sourceProvider.Type().String() - - if m.Status.HasCondition(Succeeded) { - key = fmt.Sprintf("%s|%s|%s|%s|%s", Succeeded, provider, mode, target, string(plan.UID)) - counterMap[key]++ - - startTime := m.Status.Started.Time - endTime := m.Status.Completed.Time - duration := endTime.Sub(startTime).Seconds() - - var totalDataTransferred float64 - for _, vm := range m.Status.VMs { - for _, step := range vm.Pipeline { - if step.Name == "DiskTransferV2v" || step.Name == "DiskTransfer" { - for _, task := range step.Tasks { - totalDataTransferred += float64(task.Progress.Completed) * 1024 * 1024 // convert to Bytes - } - } - } - } - - // Set the metrics for duration and data transferred and update the map for scaned migration - if _, exists := processedSucceededMigrations[string(m.UID)]; !exists { - migrationDurationGauge.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target, "plan": string(plan.UID)}).Set(duration) - migrationDurationHistogram.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target}).Observe(duration) - dataTransferredGauge.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target, "plan": string(plan.UID)}).Set(totalDataTransferred) - processedSucceededMigrations[string(m.UID)] = struct{}{} - } - } - if m.Status.HasCondition(Failed) { - key = fmt.Sprintf("%s|%s|%s|%s|%s", Failed, provider, mode, target, string(plan.UID)) - counterMap[key]++ - } - if m.Status.HasCondition(Executing) { - key = fmt.Sprintf("%s|%s|%s|%s|%s", Executing, provider, mode, target, string(plan.UID)) - counterMap[key]++ - } - if m.Status.HasCondition(Canceled) { - key = fmt.Sprintf("%s|%s|%s|%s|%s", Canceled, provider, mode, target, string(plan.UID)) - counterMap[key]++ - } + processMigration(m, provider, mode, target, string(plan.UID)) } + } + }() +} - for key, value := range counterMap { - parts := strings.Split(key, "|") - migrationStatusGauge.With(prometheus.Labels{"status": parts[0], "provider": parts[1], "mode": parts[2], "target": parts[3]}).Set(value) - migrationPlanCorrelationStatusGauge.With(prometheus.Labels{"status": parts[0], "provider": parts[1], "mode": parts[2], "target": parts[3], "plan": parts[4]}).Set(value) +func processMigration(migration api.Migration, provider, mode, target, planUID string) { + var ( + processedMigrations map[string]struct{} + status string + ) + + switch { + case migration.Status.HasCondition(Succeeded): + processedMigrations = processedSucceededMigrations + status = Succeeded + case migration.Status.HasCondition(Failed): + processedMigrations = processedFailedMigrations + status = Failed + case migration.Status.HasCondition(Canceled): + processedMigrations = processedCanceledMigrations + status = Canceled + default: + // otherwise, there's nothing to do with the current state of the migration + return + } + + if _, exists := processedMigrations[string(migration.UID)]; !exists { + updateMetricsCount(status, provider, mode, target, planUID) + if status == Succeeded { + recordSuccessfulMigrationMetrics(migration, provider, mode, target, planUID) + } + processedMigrations[string(migration.UID)] = struct{}{} + } +} + +func updateMetricsCount(status, provider, mode, target, plan string) { + migrationStatusCounter.With(prometheus.Labels{"status": status, "provider": provider, "mode": mode, "target": target}).Inc() + migrationPlanCorrelationStatusCounter.With(prometheus.Labels{"status": status, "provider": provider, "mode": mode, "target": target, "plan": plan}).Inc() +} + +func recordSuccessfulMigrationMetrics(migration api.Migration, provider, mode, target, planUID string) { + startTime := migration.Status.Started.Time + endTime := migration.Status.Completed.Time + duration := endTime.Sub(startTime).Seconds() + + var totalDataTransferred float64 + for _, vm := range migration.Status.VMs { + for _, step := range vm.Pipeline { + if step.Name == "DiskTransferV2v" || step.Name == "DiskTransfer" { + for _, task := range step.Tasks { + totalDataTransferred += float64(task.Progress.Completed) * 1024 * 1024 // convert to Bytes + } } } - }() + } + + migrationDurationGauge.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target, "plan": planUID}).Set(duration) + migrationDurationHistogram.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target}).Observe(duration) + dataTransferredGauge.With(prometheus.Labels{"provider": provider, "mode": mode, "target": target, "plan": planUID}).Set(totalDataTransferred) }