regression_report_v2.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
################################################################################
#  Licensed to the Apache Software Foundation (ASF) under one
#  or more contributor license agreements.  See the NOTICE file
#  distributed with this work for additional information
#  regarding copyright ownership.  The ASF licenses this file
#  to you under the Apache License, Version 2.0 (the
#  "License"); you may not use this file except in compliance
#  with the License.  You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
# limitations under the License.
################################################################################
import argparse
import json
import urllib
import urllib2

from regression_report import loadBenchmarkNames

"""
The regression detection algorithm calculates the regression ratio as the ratio of change between the current
throughput and the maximum throughput observed in the most recent numBaselineSamples samples. A regression alert is
triggered if the regression ratio exceeds max(minRegressionRatio, minInstabilityMultiplier * lastStandardDeviation)

Please refer to https://docs.google.com/document/d/1Bvzvq79Ll5yxd1UtC0YzczgFbZPAgPcN3cI0MjVkIag for more detail.
"""

ENVIRONMENT = 2
MIN_SAMPLE_SIZE_LIMIT = 5

"""
Returns a list of benchmark results
"""
def loadHistoryData(codespeedUrl, exe, benchmark, baselineSize):
    url = codespeedUrl + 'timeline/json/?' + urllib.urlencode(
        {'exe': exe, 'ben': benchmark, 'env': ENVIRONMENT, 'revs': baselineSize})
    f = urllib2.urlopen(url)
    response = f.read()
    f.close()
    timelines = json.loads(response)['timelines'][0]
    result = timelines['branches']['master'][exe]
    lessIsbBetter = (timelines['lessisbetter'] == " (less is better)")
    return result, lessIsbBetter

def detectRegression(urlToBenchmark, stds, scores, baselineSize, minRegressionRatio, minInstabilityMultiplier,
                     direction):

    sustainable_x = [min(scores[i - 3: i]) for i in range(3, min(len(scores), baselineSize))]
    baseline_throughput = max(sustainable_x)
    current_throughput = max(scores[-3:])
    current_instability = stds[-1] / current_throughput
    if direction * (1 - current_throughput / baseline_throughput) > max(minRegressionRatio,  direction * minInstabilityMultiplier * current_instability):
        print "<%s|%s> baseline=%s current_value=%s" % (urlToBenchmark, benchmark, direction * baseline_throughput, direction * current_throughput)

def checkBenchmark(args, exe, benchmark):
    results, lessIsbBetter = loadHistoryData(args.codespeedUrl, exe, benchmark, args.numBaselineSamples + 3)
    results = list(reversed(results))
    scores = [score for (date, score, deviation, commit, branch) in results]
    stds = [deviation for (date, score, deviation, commit, branch) in results]

    urlToBenchmark = args.codespeedUrl + 'timeline/#/?' + urllib.urlencode({
        'ben': benchmark,
        'exe': exe,
        'env': ENVIRONMENT,
        'revs': args.numDisplaySamples,
        'equid': 'off',
        'quarts': 'on',
        'extr': 'on'})

    if len(results) < MIN_SAMPLE_SIZE_LIMIT:
        return

    direction = 1
    if lessIsbBetter:
        scores = [-1 * score for score in scores]
        direction = -1
    detectRegression(urlToBenchmark, stds, scores, args.numBaselineSamples, args.minRegressionRatio,
                     args.minInstabilityMultiplier, direction)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Regression report based on Max/Min value')
    parser.add_argument('--num-baseline-samples', dest='numBaselineSamples', required=False, default=30, type=int,
                        help='The maximum number of recent samples across which the maximum achieved throughput would be '
                        'used as the baseline for regression detection.')
    parser.add_argument('--num-display-samples', dest='numDisplaySamples', required=False, default=200,
                        type=int,
                        help='Number of samples to display in regression report for human inspection. Not all values '
                             'are working.')
    parser.add_argument('--min-regression-ratio', dest='minRegressionRatio', required=False,
                        default=0.04, type=float,
                        help='A regression should be alerted only if the ratio of change between the baseline '
                             'throughput and the current throughput exceeds the configured value.')
    parser.add_argument('--min-instability-multiplier', dest='minInstabilityMultiplier', required=False,
                        default=2, type=float,
                        help="Min instability multiplier to measure deviation.")
    parser.add_argument('--codespeed-url', dest='codespeedUrl', default="http://codespeed.dak8s.net:8000/",
                        help='The codespeed url.')

    args = parser.parse_args()
    execToBenchmarks = loadBenchmarkNames(args.codespeedUrl)
    for exe, benchmarks in execToBenchmarks.items():
        for benchmark in benchmarks:
            checkBenchmark(args, exe, benchmark)