Skip to content

Commit

Permalink
optimize metric query for dubboProviderSLA (apache#127)
Browse files Browse the repository at this point in the history
  • Loading branch information
gaoxh authored and shanwb committed Nov 29, 2023
1 parent 40da24a commit 0a821e6
Show file tree
Hide file tree
Showing 6 changed files with 96 additions and 23 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public Result<PageData> querySumOverTime(@RequestBody PromQueryRangeParam param)
log.info("PrometheusController.queryRange request afterConvert Param startTime : {} ,endTime : {} ,step : {},projectName : {},metricSuffix : {}",startTime,endTime,step,projectName,metricSuffix);

String pDuration = duration + "s";
return prometheusService.queryRangeSumOverTime(param.getMetric(),param.getLabels(),projectName, metricSuffix.name(),startTime,endTime,step,pDuration);
return prometheusService.queryRangeSumOverTime(param.getMetric(),param.getLabels(),projectName, metricSuffix.name(),startTime,endTime,step,pDuration,param.getSumBy());
}

@ResponseBody
Expand Down Expand Up @@ -156,7 +156,7 @@ public Result<PageData> queryIncrease(@RequestBody PromQueryRangeParam param){

String pDuration = duration + "s";

Result<PageData> pageDataResult = prometheusService.queryRangeSumOverTime(param.getMetric(), param.getLabels(), projectName, metricSuffix.name(), startTime, endTime, step, pDuration);
Result<PageData> pageDataResult = prometheusService.queryRangeSumOverTime(param.getMetric(), param.getLabels(), projectName, metricSuffix.name(), startTime, endTime, step, pDuration,param.getSumBy());


return pageDataResult;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ public class PromQueryRangeParam implements Serializable {
Long startTime;
Long endTime;
Long step;
String sumBy;
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@
public enum AlarmCheckDataCount {

zero("0","立即触发"),
one("1","持续30s"),
two("2","持续60s"),
three("3","持续90s"),
five("5","持续150s");
one("1","持续20s"),
two("2","持续40s"),
three("3","持续60s"),
five("5","持续100s"),
six("6","持续120s"),
seven("7","持续140s"),
eight("8","持续160s"),
nine("9","持续180s"),
fifteen("15","持续5m");

private String code;
private String message;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,57 +49,57 @@ public void computByMetricType(AppMonitorRequest param, String appName, MetricKi
case http:

// http请求异常统计
Result<PageData> httpExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpError.getCode(),getLable(MetricKind.MetricType.http_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> httpExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpError.getCode(),getLable(MetricKind.MetricType.http_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.httpExceptionNum(countRecordMetric(httpExceptions));

// httpClient请求异常统计
Result<PageData> httpClientExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpClientError.getCode(), getLable(MetricKind.MetricType.http_client_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> httpClientExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpClientError.getCode(), getLable(MetricKind.MetricType.http_client_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.httpClientExceptionNum(countRecordMetric(httpClientExceptions));

// http请求慢查询统计
Result<PageData> httpSlowQuery = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpSlowQuery.getCode(),getLable(MetricKind.MetricType.http_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> httpSlowQuery = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpSlowQuery.getCode(),getLable(MetricKind.MetricType.http_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.httpSlowNum(countRecordMetric(httpSlowQuery));

// httpClient请求慢查询统计
Result<PageData> httpClientSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpClientSlowQuery.getCode(), getLable(MetricKind.MetricType.http_client_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> httpClientSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpClientSlowQuery.getCode(), getLable(MetricKind.MetricType.http_client_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.httpClientSlowNum(countRecordMetric(httpClientSlowQuerys));

break;

case dubbo:

// dubbo请求异常统计
Result<PageData> dubboExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboConsumerError.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> dubboExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboConsumerError.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.dubboExceptionNum(countRecordMetric(dubboExceptions));
// dubbo请求异常统计
Result<PageData> dubboPExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboProvider.getCode(), getLable(MetricKind.MetricType.dubbo_provider_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> dubboPExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboProvider.getCode(), getLable(MetricKind.MetricType.dubbo_provider_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.dubboPExceptionNum(countRecordMetric(dubboPExceptions));
// dubbo consumer慢请求统计
Result<PageData> dubboConsumerSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboConsumerSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> dubboConsumerSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboConsumerSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.dubboCSlowQueryNum(countRecordMetric(dubboConsumerSlowQuerys));
log.info("projectName:{},dubboConsumerSlowQuerys:{}",appName,dubboConsumerSlowQuerys);
// dubbo provider慢请求统计
Result<PageData> dubboProviderSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboProviderSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_provider_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> dubboProviderSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboProviderSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_provider_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.dubboProviderSlowQueryNum(countRecordMetric(dubboProviderSlowQuerys));
log.info("projectName:{},dubboProviderSlowQuerys:{}",appName,dubboProviderSlowQuerys);

break;

case db :
// mysql请求异常统计
Result<PageData> sqlExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dbError.getCode(), getLable(MetricKind.MetricType.db_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> sqlExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dbError.getCode(), getLable(MetricKind.MetricType.db_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.sqlExceptionNum(countRecordMetric(sqlExceptions));
// mysql慢请求统计
Result<PageData> sqlSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dbSlowQuery.getCode(), getLable(MetricKind.MetricType.db_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> sqlSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dbSlowQuery.getCode(), getLable(MetricKind.MetricType.db_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.sqlSlowQueryNum(countRecordMetric(sqlSlowQuerys));
break;

case redis :
// redis请求异常统计
Result<PageData> redisExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.redisError.getCode(), getLable(MetricKind.MetricType.redis_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> redisExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.redisError.getCode(), getLable(MetricKind.MetricType.redis_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.redisExceptionNum(countRecordMetric(redisExceptions));

Result<PageData> redisSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.redisSlow.getCode(), getLable(MetricKind.MetricType.redis_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> redisSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.redisSlow.getCode(), getLable(MetricKind.MetricType.redis_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.redisSlowNum(countRecordMetric(redisSlowQuerys));
break;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.xiaomi.mone.monitor.service.model.prometheus;

import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.Data;

import java.io.Serializable;
Expand All @@ -10,6 +11,7 @@
* @date 2021/8/16 11:42 上午
*/
@Data
@JsonInclude(JsonInclude.Include.NON_NULL)
public class Metric implements Serializable {

private String application;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public Result<PageData> queryRange(String metric_, Map labels, String projectNam
}


public Result<PageData> queryRangeSumOverTime(String metric_, Map labels, String projectName, String metricSuffix, Long startTime, Long endTime, Long step, String duration) {
public Result<PageData> queryRangeSumOverTime(String metric_, Map labels, String projectName, String metricSuffix, Long startTime, Long endTime, Long step, String duration,String sumBy) {

String offset = null;
Long offsetLong = System.currentTimeMillis() / 1000 - endTime;
Expand All @@ -133,9 +133,9 @@ public Result<PageData> queryRangeSumOverTime(String metric_, Map labels, String
endTime = System.currentTimeMillis() / 1000;

// 指标名称拼接
String metric = completePromQL(metric_, labels, projectName, metricSuffix, null, 0, duration, offset);
String metricSource = completePromQL(metric_, labels, projectName, metricSuffix, null, 0, duration, offset);

String sumOverTimeFunc = sumSumOverTimeFunc(metric);
String sumOverTimeFunc = sumSumOverTimeFunc(metricSource,metric_,sumBy);
log.info("PrometheusService.queryRangeSumOverTime sumOverTimeFunc : {} ", sumOverTimeFunc);

Map<String, Object> map = new HashMap<>();
Expand Down Expand Up @@ -344,7 +344,13 @@ private List<TeslaMetric> convertTeslaMetric(List<TeslaMetricDataSet> result) {
private List<Metric> convertValidMetric(List<MetricDataSetVector> result) {
List<Metric> list = new ArrayList<>();
if (!CollectionUtils.isEmpty(result)) {
int count = 0;
for (MetricDataSetVector metricDataVector : result) {

if(count > 1000){
break;
}

Metric metric = metricDataVector.getMetric();
if (Double.valueOf(metricDataVector.getValue().get(1)) == 0d) {
continue;
Expand All @@ -362,6 +368,7 @@ private List<Metric> convertValidMetric(List<MetricDataSetVector> result) {
}

list.add(metric);
count++;

}
}
Expand Down Expand Up @@ -440,12 +447,67 @@ private String sumOverTimeFunc(String source) {
* @param source
* @return
*/
private String sumSumOverTimeFunc(String source) {
private String sumSumOverTimeFunc(String source,String metric,String sumBy) {

StringBuilder sb = new StringBuilder();
sb.append("sum(sum_over_time(");
sb.append(source);
sb.append(")) by (serverIp,job,application,methodName,serviceName,dataSource,sqlMethod,sql,serverEnv,serverZone,containerName,method,clientProjectId,clientProjectName,clientEnv,clientIp) ");
sb.append(")) ");
if (StringUtils.isNotBlank(sumBy)) {
sb.append(" by (").append(sumBy).append( ")");
}else {
switch (metric) {
case "dubboProviderSLAError":
sb.append(" by (application,methodName,serviceName,serverEnv,serverZone,clientProjectName,clientEnv) ");
break;
case "dubboConsumerError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "dubboProviderError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "httpError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "httpClientError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "redisError":
sb.append(" by (serverIp,application,method,serverEnv,serverZone) ");
break;
case "dbError":
sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) ");
break;
case "grpcClientError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "grpcServerError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "thriftServerError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "thriftClientError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "apusServerError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "apusClientError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "oracleError":
sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) ");
break;
case "elasticsearchClientError":
sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) ");
break;

default:
sb.append(" by (serverIp,application,methodName,serviceName,dataSource,sqlMethod,sql,serverEnv,serverZone,containerName,method,clientProjectId,clientProjectName,clientEnv) ");
}
}

return sb.toString();
}

Expand Down Expand Up @@ -519,6 +581,9 @@ public String completePromQL(String source, Map labels, String projectName, Stri
}
promQL.append(entry.getKey());
promQL.append("=");
if(StringUtils.isNotBlank(entry.getValue()) && entry.getValue().indexOf("|") > 0){
promQL.append("~");
}
promQL.append("'");
promQL.append(entry.getValue());
promQL.append("'");
Expand Down

0 comments on commit 0a821e6

Please sign in to comment.