From ca016f17ac58e0138acc2aa6d0e63bdf7845213f Mon Sep 17 00:00:00 2001 From: Madhanraj Maraimani Date: Fri, 29 Apr 2022 16:23:29 +0100 Subject: [PATCH] Don't fail if benchmark performs better irrespective of threshold config There might be situations if the changes drastically improves the performance or the thresholds are configured with lower values it lead to failure of the action whereas it is not. i.e if the alert and failure threshold are configured as 0.1 and 0.2 and the previous and current values are bigger, then the existing check would consider it as failure So added a check to ensure the failure is calculated based on the performance and threshold rather than threshold alone --- src/write.ts | 14 +++++++++++--- test/write.spec.ts | 25 +++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/src/write.ts b/src/write.ts index 775b23e27..484016e85 100644 --- a/src/write.ts +++ b/src/write.ts @@ -85,6 +85,7 @@ interface Alert { current: BenchmarkResult; prev: BenchmarkResult; ratio: number; + isPerformanceBetter: boolean; } function findAlerts(curSuite: Benchmark, prevSuite: Benchmark, threshold: number): Alert[] { @@ -98,7 +99,9 @@ function findAlerts(curSuite: Benchmark, prevSuite: Benchmark, threshold: number continue; } - const ratio = biggerIsBetter(curSuite.tool) + const isBiggerBetter = biggerIsBetter(curSuite.tool); + const isPerformanceBetter = isBiggerBetter ? current.value > prev.value : current.value < prev.value; + const ratio = isBiggerBetter ? prev.value / current.value // e.g. current=100, prev=200 : current.value / prev.value; // e.g. current=200, prev=100 @@ -107,7 +110,7 @@ function findAlerts(curSuite: Benchmark, prevSuite: Benchmark, threshold: number `Performance alert! Previous value was ${prev.value} and current value is ${current.value}.` + ` It is ${ratio}x worse than previous exceeding a ratio threshold ${threshold}`, ); - alerts.push({ current, prev, ratio }); + alerts.push({ current, prev, ratio, isPerformanceBetter }); } } @@ -297,7 +300,12 @@ async function handleAlert(benchName: string, curSuite: Benchmark, prevSuite: Be // Note: alertThreshold is smaller than failThreshold. It was checked in config.ts const len = alerts.length; const threshold = floatStr(failThreshold); - const failures = alerts.filter((a) => a.ratio > failThreshold); + + //Only consider it as failure if the benchmark performed better irrespective of the alert threshold + // For ex if there is a massive performance improvement between commits + // or the alert threshold and failure threshold is lower the failure might be triggered. + // This check ensures that the check is failure only if the performance is not better + const failures = alerts.filter((a) => !a.isPerformanceBetter).filter((a) => a.ratio > failThreshold); if (failures.length > 0) { core.debug('Mark this workflow as fail since one or more fatal alerts found'); if (failThreshold !== alertThreshold) { diff --git a/test/write.spec.ts b/test/write.spec.ts index faa0231ca..d85ea0699 100644 --- a/test/write.spec.ts +++ b/test/write.spec.ts @@ -727,6 +727,31 @@ describe.each(['https://github.com', 'https://github.enterprise.corp'])('writeBe }, error: undefined, }, + { + it: 'does not raise an alert when the benchmark is better irrespective of the alertThreshold', + config: { ...defaultCfg, alertThreshold: 0.01, failThreshold: 0.01 }, + data: { + lastUpdate, + repoUrl, + entries: { + 'Test benchmark': [ + { + commit: commit('prev commit id'), + date: lastUpdate - 1000, + tool: 'go', + benches: [bench('bench_fib_10', 14)], + }, + ], + }, + }, + added: { + commit: commit('current commit id'), + date: lastUpdate, + tool: 'go', + benches: [bench('bench_fib_10', 11)], // Exceeds 1.0 alertthreshold + }, + error: undefined, + }, ]; for (const t of normalCases) {