diff --git a/src/write.ts b/src/write.ts index 775b23e27..484016e85 100644 --- a/src/write.ts +++ b/src/write.ts @@ -85,6 +85,7 @@ interface Alert { current: BenchmarkResult; prev: BenchmarkResult; ratio: number; + isPerformanceBetter: boolean; } function findAlerts(curSuite: Benchmark, prevSuite: Benchmark, threshold: number): Alert[] { @@ -98,7 +99,9 @@ function findAlerts(curSuite: Benchmark, prevSuite: Benchmark, threshold: number continue; } - const ratio = biggerIsBetter(curSuite.tool) + const isBiggerBetter = biggerIsBetter(curSuite.tool); + const isPerformanceBetter = isBiggerBetter ? current.value > prev.value : current.value < prev.value; + const ratio = isBiggerBetter ? prev.value / current.value // e.g. current=100, prev=200 : current.value / prev.value; // e.g. current=200, prev=100 @@ -107,7 +110,7 @@ function findAlerts(curSuite: Benchmark, prevSuite: Benchmark, threshold: number `Performance alert! Previous value was ${prev.value} and current value is ${current.value}.` + ` It is ${ratio}x worse than previous exceeding a ratio threshold ${threshold}`, ); - alerts.push({ current, prev, ratio }); + alerts.push({ current, prev, ratio, isPerformanceBetter }); } } @@ -297,7 +300,12 @@ async function handleAlert(benchName: string, curSuite: Benchmark, prevSuite: Be // Note: alertThreshold is smaller than failThreshold. It was checked in config.ts const len = alerts.length; const threshold = floatStr(failThreshold); - const failures = alerts.filter((a) => a.ratio > failThreshold); + + //Only consider it as failure if the benchmark performed better irrespective of the alert threshold + // For ex if there is a massive performance improvement between commits + // or the alert threshold and failure threshold is lower the failure might be triggered. + // This check ensures that the check is failure only if the performance is not better + const failures = alerts.filter((a) => !a.isPerformanceBetter).filter((a) => a.ratio > failThreshold); if (failures.length > 0) { core.debug('Mark this workflow as fail since one or more fatal alerts found'); if (failThreshold !== alertThreshold) { diff --git a/test/write.spec.ts b/test/write.spec.ts index faa0231ca..d85ea0699 100644 --- a/test/write.spec.ts +++ b/test/write.spec.ts @@ -727,6 +727,31 @@ describe.each(['https://github.com', 'https://github.enterprise.corp'])('writeBe }, error: undefined, }, + { + it: 'does not raise an alert when the benchmark is better irrespective of the alertThreshold', + config: { ...defaultCfg, alertThreshold: 0.01, failThreshold: 0.01 }, + data: { + lastUpdate, + repoUrl, + entries: { + 'Test benchmark': [ + { + commit: commit('prev commit id'), + date: lastUpdate - 1000, + tool: 'go', + benches: [bench('bench_fib_10', 14)], + }, + ], + }, + }, + added: { + commit: commit('current commit id'), + date: lastUpdate, + tool: 'go', + benches: [bench('bench_fib_10', 11)], // Exceeds 1.0 alertthreshold + }, + error: undefined, + }, ]; for (const t of normalCases) {