From 81c12f5ef3a60463f46a5d6e089885759d146836 Mon Sep 17 00:00:00 2001 From: gaoxihui Date: Thu, 2 Nov 2023 15:09:42 +0800 Subject: [PATCH] optimize metric query for dubboProviderSLA --- .../controller/PrometheusController.java | 4 +- .../controller/model/PromQueryRangeParam.java | 1 + .../mone/monitor/bo/AlarmCheckDataCount.java | 13 +++- .../ComputeTimerServiceExtensionImpl.java | 24 +++--- .../service/model/prometheus/Metric.java | 2 + .../service/prometheus/PrometheusService.java | 75 +++++++++++++++++-- 6 files changed, 96 insertions(+), 23 deletions(-) diff --git a/ozhera-monitor/ozhera-monitor-server/src/main/java/com/xiaomi/mone/monitor/controller/PrometheusController.java b/ozhera-monitor/ozhera-monitor-server/src/main/java/com/xiaomi/mone/monitor/controller/PrometheusController.java index 31ba98f53..4dd1ff8d6 100644 --- a/ozhera-monitor/ozhera-monitor-server/src/main/java/com/xiaomi/mone/monitor/controller/PrometheusController.java +++ b/ozhera-monitor/ozhera-monitor-server/src/main/java/com/xiaomi/mone/monitor/controller/PrometheusController.java @@ -111,7 +111,7 @@ public Result querySumOverTime(@RequestBody PromQueryRangeParam param) log.info("PrometheusController.queryRange request afterConvert Param startTime : {} ,endTime : {} ,step : {},projectName : {},metricSuffix : {}",startTime,endTime,step,projectName,metricSuffix); String pDuration = duration + "s"; - return prometheusService.queryRangeSumOverTime(param.getMetric(),param.getLabels(),projectName, metricSuffix.name(),startTime,endTime,step,pDuration); + return prometheusService.queryRangeSumOverTime(param.getMetric(),param.getLabels(),projectName, metricSuffix.name(),startTime,endTime,step,pDuration,param.getSumBy()); } @ResponseBody @@ -156,7 +156,7 @@ public Result queryIncrease(@RequestBody PromQueryRangeParam param){ String pDuration = duration + "s"; - Result pageDataResult = prometheusService.queryRangeSumOverTime(param.getMetric(), param.getLabels(), projectName, metricSuffix.name(), startTime, endTime, step, pDuration); + Result pageDataResult = prometheusService.queryRangeSumOverTime(param.getMetric(), param.getLabels(), projectName, metricSuffix.name(), startTime, endTime, step, pDuration,param.getSumBy()); return pageDataResult; diff --git a/ozhera-monitor/ozhera-monitor-server/src/main/java/com/xiaomi/mone/monitor/controller/model/PromQueryRangeParam.java b/ozhera-monitor/ozhera-monitor-server/src/main/java/com/xiaomi/mone/monitor/controller/model/PromQueryRangeParam.java index 0f694e987..500ab7d7f 100644 --- a/ozhera-monitor/ozhera-monitor-server/src/main/java/com/xiaomi/mone/monitor/controller/model/PromQueryRangeParam.java +++ b/ozhera-monitor/ozhera-monitor-server/src/main/java/com/xiaomi/mone/monitor/controller/model/PromQueryRangeParam.java @@ -24,4 +24,5 @@ public class PromQueryRangeParam implements Serializable { Long startTime; Long endTime; Long step; + String sumBy; } diff --git a/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/bo/AlarmCheckDataCount.java b/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/bo/AlarmCheckDataCount.java index 52b9e2049..3054f3a10 100644 --- a/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/bo/AlarmCheckDataCount.java +++ b/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/bo/AlarmCheckDataCount.java @@ -10,10 +10,15 @@ public enum AlarmCheckDataCount { zero("0","立即触发"), - one("1","持续30s"), - two("2","持续60s"), - three("3","持续90s"), - five("5","持续150s"); + one("1","持续20s"), + two("2","持续40s"), + three("3","持续60s"), + five("5","持续100s"), + six("6","持续120s"), + seven("7","持续140s"), + eight("8","持续160s"), + nine("9","持续180s"), + fifteen("15","持续5m"); private String code; private String message; diff --git a/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/ComputeTimerServiceExtensionImpl.java b/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/ComputeTimerServiceExtensionImpl.java index e5d1db463..38e0e3f9d 100644 --- a/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/ComputeTimerServiceExtensionImpl.java +++ b/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/ComputeTimerServiceExtensionImpl.java @@ -49,19 +49,19 @@ public void computByMetricType(AppMonitorRequest param, String appName, MetricKi case http: // http请求异常统计 - Result httpExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpError.getCode(),getLable(MetricKind.MetricType.http_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result httpExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpError.getCode(),getLable(MetricKind.MetricType.http_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.httpExceptionNum(countRecordMetric(httpExceptions)); // httpClient请求异常统计 - Result httpClientExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpClientError.getCode(), getLable(MetricKind.MetricType.http_client_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result httpClientExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpClientError.getCode(), getLable(MetricKind.MetricType.http_client_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.httpClientExceptionNum(countRecordMetric(httpClientExceptions)); // http请求慢查询统计 - Result httpSlowQuery = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpSlowQuery.getCode(),getLable(MetricKind.MetricType.http_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result httpSlowQuery = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpSlowQuery.getCode(),getLable(MetricKind.MetricType.http_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.httpSlowNum(countRecordMetric(httpSlowQuery)); // httpClient请求慢查询统计 - Result httpClientSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpClientSlowQuery.getCode(), getLable(MetricKind.MetricType.http_client_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result httpClientSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpClientSlowQuery.getCode(), getLable(MetricKind.MetricType.http_client_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.httpClientSlowNum(countRecordMetric(httpClientSlowQuerys)); break; @@ -69,17 +69,17 @@ public void computByMetricType(AppMonitorRequest param, String appName, MetricKi case dubbo: // dubbo请求异常统计 - Result dubboExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboConsumerError.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result dubboExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboConsumerError.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.dubboExceptionNum(countRecordMetric(dubboExceptions)); // dubbo请求异常统计 - Result dubboPExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboProvider.getCode(), getLable(MetricKind.MetricType.dubbo_provider_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result dubboPExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboProvider.getCode(), getLable(MetricKind.MetricType.dubbo_provider_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.dubboPExceptionNum(countRecordMetric(dubboPExceptions)); // dubbo consumer慢请求统计 - Result dubboConsumerSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboConsumerSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result dubboConsumerSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboConsumerSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.dubboCSlowQueryNum(countRecordMetric(dubboConsumerSlowQuerys)); log.info("projectName:{},dubboConsumerSlowQuerys:{}",appName,dubboConsumerSlowQuerys); // dubbo provider慢请求统计 - Result dubboProviderSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboProviderSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_provider_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result dubboProviderSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboProviderSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_provider_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.dubboProviderSlowQueryNum(countRecordMetric(dubboProviderSlowQuerys)); log.info("projectName:{},dubboProviderSlowQuerys:{}",appName,dubboProviderSlowQuerys); @@ -87,19 +87,19 @@ public void computByMetricType(AppMonitorRequest param, String appName, MetricKi case db : // mysql请求异常统计 - Result sqlExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dbError.getCode(), getLable(MetricKind.MetricType.db_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result sqlExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dbError.getCode(), getLable(MetricKind.MetricType.db_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.sqlExceptionNum(countRecordMetric(sqlExceptions)); // mysql慢请求统计 - Result sqlSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dbSlowQuery.getCode(), getLable(MetricKind.MetricType.db_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result sqlSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dbSlowQuery.getCode(), getLable(MetricKind.MetricType.db_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.sqlSlowQueryNum(countRecordMetric(sqlSlowQuerys)); break; case redis : // redis请求异常统计 - Result redisExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.redisError.getCode(), getLable(MetricKind.MetricType.redis_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result redisExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.redisError.getCode(), getLable(MetricKind.MetricType.redis_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.redisExceptionNum(countRecordMetric(redisExceptions)); - Result redisSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.redisSlow.getCode(), getLable(MetricKind.MetricType.redis_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion); + Result redisSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.redisSlow.getCode(), getLable(MetricKind.MetricType.redis_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null); dataBuilder.redisSlowNum(countRecordMetric(redisSlowQuerys)); break; diff --git a/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/model/prometheus/Metric.java b/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/model/prometheus/Metric.java index ce050bd3e..ed513b1df 100644 --- a/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/model/prometheus/Metric.java +++ b/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/model/prometheus/Metric.java @@ -1,5 +1,6 @@ package com.xiaomi.mone.monitor.service.model.prometheus; +import com.fasterxml.jackson.annotation.JsonInclude; import lombok.Data; import java.io.Serializable; @@ -10,6 +11,7 @@ * @date 2021/8/16 11:42 上午 */ @Data +@JsonInclude(JsonInclude.Include.NON_NULL) public class Metric implements Serializable { private String application; diff --git a/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/prometheus/PrometheusService.java b/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/prometheus/PrometheusService.java index 00edc13da..b7e012bd3 100644 --- a/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/prometheus/PrometheusService.java +++ b/ozhera-monitor/ozhera-monitor-service/src/main/java/com/xiaomi/mone/monitor/service/prometheus/PrometheusService.java @@ -122,7 +122,7 @@ public Result queryRange(String metric_, Map labels, String projectNam } - public Result queryRangeSumOverTime(String metric_, Map labels, String projectName, String metricSuffix, Long startTime, Long endTime, Long step, String duration) { + public Result queryRangeSumOverTime(String metric_, Map labels, String projectName, String metricSuffix, Long startTime, Long endTime, Long step, String duration,String sumBy) { String offset = null; Long offsetLong = System.currentTimeMillis() / 1000 - endTime; @@ -133,9 +133,9 @@ public Result queryRangeSumOverTime(String metric_, Map labels, String endTime = System.currentTimeMillis() / 1000; // 指标名称拼接 - String metric = completePromQL(metric_, labels, projectName, metricSuffix, null, 0, duration, offset); + String metricSource = completePromQL(metric_, labels, projectName, metricSuffix, null, 0, duration, offset); - String sumOverTimeFunc = sumSumOverTimeFunc(metric); + String sumOverTimeFunc = sumSumOverTimeFunc(metricSource,metric_,sumBy); log.info("PrometheusService.queryRangeSumOverTime sumOverTimeFunc : {} ", sumOverTimeFunc); Map map = new HashMap<>(); @@ -344,7 +344,13 @@ private List convertTeslaMetric(List result) { private List convertValidMetric(List result) { List list = new ArrayList<>(); if (!CollectionUtils.isEmpty(result)) { + int count = 0; for (MetricDataSetVector metricDataVector : result) { + + if(count > 1000){ + break; + } + Metric metric = metricDataVector.getMetric(); if (Double.valueOf(metricDataVector.getValue().get(1)) == 0d) { continue; @@ -362,6 +368,7 @@ private List convertValidMetric(List result) { } list.add(metric); + count++; } } @@ -440,12 +447,67 @@ private String sumOverTimeFunc(String source) { * @param source * @return */ - private String sumSumOverTimeFunc(String source) { + private String sumSumOverTimeFunc(String source,String metric,String sumBy) { StringBuilder sb = new StringBuilder(); sb.append("sum(sum_over_time("); sb.append(source); - sb.append(")) by (serverIp,job,application,methodName,serviceName,dataSource,sqlMethod,sql,serverEnv,serverZone,containerName,method,clientProjectId,clientProjectName,clientEnv,clientIp) "); + sb.append(")) "); + if (StringUtils.isNotBlank(sumBy)) { + sb.append(" by (").append(sumBy).append( ")"); + }else { + switch (metric) { + case "dubboProviderSLAError": + sb.append(" by (application,methodName,serviceName,serverEnv,serverZone,clientProjectName,clientEnv) "); + break; + case "dubboConsumerError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "dubboProviderError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "httpError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "httpClientError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "redisError": + sb.append(" by (serverIp,application,method,serverEnv,serverZone) "); + break; + case "dbError": + sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) "); + break; + case "grpcClientError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "grpcServerError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "thriftServerError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "thriftClientError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "apusServerError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "apusClientError": + sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) "); + break; + case "oracleError": + sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) "); + break; + case "elasticsearchClientError": + sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) "); + break; + + default: + sb.append(" by (serverIp,application,methodName,serviceName,dataSource,sqlMethod,sql,serverEnv,serverZone,containerName,method,clientProjectId,clientProjectName,clientEnv) "); + } + } + return sb.toString(); } @@ -519,6 +581,9 @@ public String completePromQL(String source, Map labels, String projectName, Stri } promQL.append(entry.getKey()); promQL.append("="); + if(StringUtils.isNotBlank(entry.getValue()) && entry.getValue().indexOf("|") > 0){ + promQL.append("~"); + } promQL.append("'"); promQL.append(entry.getValue()); promQL.append("'");