Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimize metric query for dubboProviderSLA #127

Merged
merged 1 commit into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public Result<PageData> querySumOverTime(@RequestBody PromQueryRangeParam param)
log.info("PrometheusController.queryRange request afterConvert Param startTime : {} ,endTime : {} ,step : {},projectName : {},metricSuffix : {}",startTime,endTime,step,projectName,metricSuffix);

String pDuration = duration + "s";
return prometheusService.queryRangeSumOverTime(param.getMetric(),param.getLabels(),projectName, metricSuffix.name(),startTime,endTime,step,pDuration);
return prometheusService.queryRangeSumOverTime(param.getMetric(),param.getLabels(),projectName, metricSuffix.name(),startTime,endTime,step,pDuration,param.getSumBy());
}

@ResponseBody
Expand Down Expand Up @@ -156,7 +156,7 @@ public Result<PageData> queryIncrease(@RequestBody PromQueryRangeParam param){

String pDuration = duration + "s";

Result<PageData> pageDataResult = prometheusService.queryRangeSumOverTime(param.getMetric(), param.getLabels(), projectName, metricSuffix.name(), startTime, endTime, step, pDuration);
Result<PageData> pageDataResult = prometheusService.queryRangeSumOverTime(param.getMetric(), param.getLabels(), projectName, metricSuffix.name(), startTime, endTime, step, pDuration,param.getSumBy());


return pageDataResult;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ public class PromQueryRangeParam implements Serializable {
Long startTime;
Long endTime;
Long step;
String sumBy;
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@
public enum AlarmCheckDataCount {

zero("0","立即触发"),
one("1","持续30s"),
two("2","持续60s"),
three("3","持续90s"),
five("5","持续150s");
one("1","持续20s"),
two("2","持续40s"),
three("3","持续60s"),
five("5","持续100s"),
six("6","持续120s"),
seven("7","持续140s"),
eight("8","持续160s"),
nine("9","持续180s"),
fifteen("15","持续5m");

private String code;
private String message;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,57 +49,57 @@ public void computByMetricType(AppMonitorRequest param, String appName, MetricKi
case http:

// http请求异常统计
Result<PageData> httpExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpError.getCode(),getLable(MetricKind.MetricType.http_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> httpExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpError.getCode(),getLable(MetricKind.MetricType.http_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.httpExceptionNum(countRecordMetric(httpExceptions));

// httpClient请求异常统计
Result<PageData> httpClientExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpClientError.getCode(), getLable(MetricKind.MetricType.http_client_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> httpClientExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.httpClientError.getCode(), getLable(MetricKind.MetricType.http_client_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.httpClientExceptionNum(countRecordMetric(httpClientExceptions));

// http请求慢查询统计
Result<PageData> httpSlowQuery = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpSlowQuery.getCode(),getLable(MetricKind.MetricType.http_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> httpSlowQuery = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpSlowQuery.getCode(),getLable(MetricKind.MetricType.http_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.httpSlowNum(countRecordMetric(httpSlowQuery));

// httpClient请求慢查询统计
Result<PageData> httpClientSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpClientSlowQuery.getCode(), getLable(MetricKind.MetricType.http_client_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> httpClientSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.httpClientSlowQuery.getCode(), getLable(MetricKind.MetricType.http_client_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.httpClientSlowNum(countRecordMetric(httpClientSlowQuerys));

break;

case dubbo:

// dubbo请求异常统计
Result<PageData> dubboExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboConsumerError.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> dubboExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboConsumerError.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.dubboExceptionNum(countRecordMetric(dubboExceptions));
// dubbo请求异常统计
Result<PageData> dubboPExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboProvider.getCode(), getLable(MetricKind.MetricType.dubbo_provider_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> dubboPExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dubboProvider.getCode(), getLable(MetricKind.MetricType.dubbo_provider_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.dubboPExceptionNum(countRecordMetric(dubboPExceptions));
// dubbo consumer慢请求统计
Result<PageData> dubboConsumerSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboConsumerSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> dubboConsumerSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboConsumerSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_consumer_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.dubboCSlowQueryNum(countRecordMetric(dubboConsumerSlowQuerys));
log.info("projectName:{},dubboConsumerSlowQuerys:{}",appName,dubboConsumerSlowQuerys);
// dubbo provider慢请求统计
Result<PageData> dubboProviderSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboProviderSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_provider_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> dubboProviderSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dubboProviderSlowQuery.getCode(), getLable(MetricKind.MetricType.dubbo_provider_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.dubboProviderSlowQueryNum(countRecordMetric(dubboProviderSlowQuerys));
log.info("projectName:{},dubboProviderSlowQuerys:{}",appName,dubboProviderSlowQuerys);

break;

case db :
// mysql请求异常统计
Result<PageData> sqlExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dbError.getCode(), getLable(MetricKind.MetricType.db_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> sqlExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.dbError.getCode(), getLable(MetricKind.MetricType.db_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.sqlExceptionNum(countRecordMetric(sqlExceptions));
// mysql慢请求统计
Result<PageData> sqlSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dbSlowQuery.getCode(), getLable(MetricKind.MetricType.db_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> sqlSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.dbSlowQuery.getCode(), getLable(MetricKind.MetricType.db_slow_query, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.sqlSlowQueryNum(countRecordMetric(sqlSlowQuerys));
break;

case redis :
// redis请求异常统计
Result<PageData> redisExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.redisError.getCode(), getLable(MetricKind.MetricType.redis_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> redisExceptions = prometheusService.queryRangeSumOverTime(ReqErrorMetrics.redisError.getCode(), getLable(MetricKind.MetricType.redis_exception, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.redisExceptionNum(countRecordMetric(redisExceptions));

Result<PageData> redisSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.redisSlow.getCode(), getLable(MetricKind.MetricType.redis_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion);
Result<PageData> redisSlowQuerys = prometheusService.queryRangeSumOverTime(ReqSlowMetrics.redisSlow.getCode(), getLable(MetricKind.MetricType.redis_slow, curMetricType, param), appName, MetricSuffix._total.name(), startTime, endTime, step, timeDurarion,null);
dataBuilder.redisSlowNum(countRecordMetric(redisSlowQuerys));
break;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.xiaomi.mone.monitor.service.model.prometheus;

import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.Data;

import java.io.Serializable;
Expand All @@ -10,6 +11,7 @@
* @date 2021/8/16 11:42 上午
*/
@Data
@JsonInclude(JsonInclude.Include.NON_NULL)
public class Metric implements Serializable {

private String application;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ public Result<PageData> queryRange(String metric_, Map labels, String projectNam
}


public Result<PageData> queryRangeSumOverTime(String metric_, Map labels, String projectName, String metricSuffix, Long startTime, Long endTime, Long step, String duration) {
public Result<PageData> queryRangeSumOverTime(String metric_, Map labels, String projectName, String metricSuffix, Long startTime, Long endTime, Long step, String duration,String sumBy) {

String offset = null;
Long offsetLong = System.currentTimeMillis() / 1000 - endTime;
Expand All @@ -133,9 +133,9 @@ public Result<PageData> queryRangeSumOverTime(String metric_, Map labels, String
endTime = System.currentTimeMillis() / 1000;

// 指标名称拼接
String metric = completePromQL(metric_, labels, projectName, metricSuffix, null, 0, duration, offset);
String metricSource = completePromQL(metric_, labels, projectName, metricSuffix, null, 0, duration, offset);

String sumOverTimeFunc = sumSumOverTimeFunc(metric);
String sumOverTimeFunc = sumSumOverTimeFunc(metricSource,metric_,sumBy);
log.info("PrometheusService.queryRangeSumOverTime sumOverTimeFunc : {} ", sumOverTimeFunc);

Map<String, Object> map = new HashMap<>();
Expand Down Expand Up @@ -344,7 +344,13 @@ private List<TeslaMetric> convertTeslaMetric(List<TeslaMetricDataSet> result) {
private List<Metric> convertValidMetric(List<MetricDataSetVector> result) {
List<Metric> list = new ArrayList<>();
if (!CollectionUtils.isEmpty(result)) {
int count = 0;
for (MetricDataSetVector metricDataVector : result) {

if(count > 1000){
break;
}

Metric metric = metricDataVector.getMetric();
if (Double.valueOf(metricDataVector.getValue().get(1)) == 0d) {
continue;
Expand All @@ -362,6 +368,7 @@ private List<Metric> convertValidMetric(List<MetricDataSetVector> result) {
}

list.add(metric);
count++;

}
}
Expand Down Expand Up @@ -440,12 +447,67 @@ private String sumOverTimeFunc(String source) {
* @param source
* @return
*/
private String sumSumOverTimeFunc(String source) {
private String sumSumOverTimeFunc(String source,String metric,String sumBy) {

StringBuilder sb = new StringBuilder();
sb.append("sum(sum_over_time(");
sb.append(source);
sb.append(")) by (serverIp,job,application,methodName,serviceName,dataSource,sqlMethod,sql,serverEnv,serverZone,containerName,method,clientProjectId,clientProjectName,clientEnv,clientIp) ");
sb.append(")) ");
if (StringUtils.isNotBlank(sumBy)) {
sb.append(" by (").append(sumBy).append( ")");
}else {
switch (metric) {
case "dubboProviderSLAError":
sb.append(" by (application,methodName,serviceName,serverEnv,serverZone,clientProjectName,clientEnv) ");
break;
case "dubboConsumerError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "dubboProviderError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "httpError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "httpClientError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "redisError":
sb.append(" by (serverIp,application,method,serverEnv,serverZone) ");
break;
case "dbError":
sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) ");
break;
case "grpcClientError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "grpcServerError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "thriftServerError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "thriftClientError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "apusServerError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "apusClientError":
sb.append(" by (serverIp,application,methodName,serviceName,serverEnv,serverZone) ");
break;
case "oracleError":
sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) ");
break;
case "elasticsearchClientError":
sb.append(" by (serverIp,application,dataSource,sqlMethod,sql,serverEnv,serverZone) ");
break;

default:
sb.append(" by (serverIp,application,methodName,serviceName,dataSource,sqlMethod,sql,serverEnv,serverZone,containerName,method,clientProjectId,clientProjectName,clientEnv) ");
}
}

return sb.toString();
}

Expand Down Expand Up @@ -519,6 +581,9 @@ public String completePromQL(String source, Map labels, String projectName, Stri
}
promQL.append(entry.getKey());
promQL.append("=");
if(StringUtils.isNotBlank(entry.getValue()) && entry.getValue().indexOf("|") > 0){
promQL.append("~");
}
promQL.append("'");
promQL.append(entry.getValue());
promQL.append("'");
Expand Down