From ed74f9f3f753179566577e7ded281b0812dd4d73 Mon Sep 17 00:00:00 2001 From: brockhof Date: Tue, 4 Feb 2020 10:41:02 +0100 Subject: [PATCH 1/4] changed usage of aRT back to ERT (wherever I found it), addressing #1935 --- code-postprocessing/cocopp/bestalg.py | 20 +- code-postprocessing/cocopp/captions.py | 18 +- code-postprocessing/cocopp/cococommands.py | 2 +- code-postprocessing/cocopp/comp2/ppfig2.py | 6 +- .../cocopp/comp2/pprldistr2.py | 19 +- code-postprocessing/cocopp/comp2/ppscatter.py | 20 +- code-postprocessing/cocopp/compall/ppfigs.py | 10 +- .../cocopp/compall/ppperfprof.py | 8 +- .../cocopp/compall/pprldmany.py | 8 +- .../cocopp/compall/pptables.py | 18 +- code-postprocessing/cocopp/firstsession.py | 2 +- .../cocopp/latex_commands_for_html.html | 286 +++++++++--------- code-postprocessing/cocopp/ppfig.py | 14 +- code-postprocessing/cocopp/ppfigdim.py | 20 +- code-postprocessing/cocopp/ppfigparam.py | 14 +- code-postprocessing/cocopp/pplogloss.py | 134 ++++---- code-postprocessing/cocopp/pprldistr.py | 4 +- code-postprocessing/cocopp/pproc.py | 34 +-- code-postprocessing/cocopp/pptable.py | 20 +- code-postprocessing/cocopp/pptex.py | 4 +- .../cocopp/preparetexforhtml.py | 6 +- code-postprocessing/cocopp/rungeneric1.py | 8 +- code-postprocessing/cocopp/rungenericmany.py | 4 +- code-postprocessing/cocopp/toolsstats.py | 8 +- 24 files changed, 352 insertions(+), 335 deletions(-) diff --git a/code-postprocessing/cocopp/bestalg.py b/code-postprocessing/cocopp/bestalg.py index 949bd2227..0897ef634 100644 --- a/code-postprocessing/cocopp/bestalg.py +++ b/code-postprocessing/cocopp/bestalg.py @@ -88,7 +88,7 @@ class BestAlgSet(DataSet): numbers of function evaluations for evals or function values for funvals. - Known bug: algorithms where the aRT is NaN or Inf are not taken into + Known bug: algorithms where the ERT is NaN or Inf are not taken into account!? """ @@ -148,7 +148,7 @@ def __init__(self, dict_alg, algId='Virtual Best Algorithm'): sortedAlgs = list(dict_alg.keys()) # algorithms will be sorted along sortedAlgs which is now a fixed list - # Align aRT + # Align ERT erts = list(np.transpose(np.vstack([dict_alg[i].target, dict_alg[i].ert])) for i in sortedAlgs) res = readalign.alignArrayData(readalign.HArrayMultiReader(erts)) @@ -168,7 +168,7 @@ def __init__(self, dict_alg, algId='Virtual Best Algorithm'): continue # TODO: don't disregard these entries if tmpert == currentbestert: # TODO: what do we do in case of ties? - # look at function values corresponding to the aRT? + # look at function values corresponding to the ERT? # Look at the function evaluations? the success ratio? pass elif tmpert < currentbestert: @@ -308,11 +308,11 @@ def createDictInstance(self): return dictinstance def detERT(self, targets): - """Determine the average running time to reach target values. + """Determine the expected running time to reach target values. :keyword list targets: target function values of interest - :returns: list of average running times corresponding to the + :returns: list of expected running times corresponding to the targets. """ @@ -644,7 +644,7 @@ def getAllContributingAlgorithmsToBest(algnamelist, target_lb=1e-8, target_ub=1e2): """Computes first the artificial best algorithm from given algorithm list algnamelist, constructed by extracting for each target/function pair - thalgorithm with best aRT among the given ones. Returns then the list + thalgorithm with best ERT among the given ones. Returns then the list of algorithms that are contributing to the definition of the best algorithm, separated by dimension, and sorted by importance (i.e. with respect to the number of target/function pairs where each algorithm is @@ -713,9 +713,9 @@ def extractBestAlgorithms(args=algs2009, f_factor=2, """Returns (and prints) per dimension a list of algorithms within algorithm list args that contains an algorithm if for any dimension/target/function pair this algorithm: - - is the best algorithm wrt aRT - - its own aRT lies within a factor f_factor of the best aRT - - there is no algorithm within a factor of f_factor of the best aRT + - is the best algorithm wrt ERT + - its own ERT lies within a factor f_factor of the best ERT + - there is no algorithm within a factor of f_factor of the best ERT and the current algorithm is the second best. """ @@ -750,7 +750,7 @@ def extractBestAlgorithms(args=algs2009, f_factor=2, # add best for this target: selectedAlgsPerProblemDF.append(best.algs[i]) - # add second best or all algorithms that have an aRT + # add second best or all algorithms that have an ERT # within a factor of f_factor of the best: secondbest_ERT = np.infty secondbest_str = '' diff --git a/code-postprocessing/cocopp/captions.py b/code-postprocessing/cocopp/captions.py index b374011d4..a375e8a9d 100644 --- a/code-postprocessing/cocopp/captions.py +++ b/code-postprocessing/cocopp/captions.py @@ -81,7 +81,7 @@ def get_reference_algorithm_text(best_algorithm_mandatory=True): return text -def get_best_art_text(): +def get_best_ert_text(): text = '' testbed = testbedsettings.current_testbed if testbed.reference_algorithm_filename: @@ -90,19 +90,19 @@ def get_best_art_text(): or testbed.name == testbedsettings.testbed_name_bi): if testbed.reference_algorithm_displayname: if "best 2009" in testbed.reference_algorithm_displayname: - text = "best \\aRT\ measured during BBOB-2009" + text = "best \\ERT\ measured during BBOB-2009" elif "best 2010" in testbed.reference_algorithm_displayname: - text = "best \\aRT\ measured during BBOB-2010" + text = "best \\ERT\ measured during BBOB-2010" elif "best 2012" in testbed.reference_algorithm_displayname: - text = "best \\aRT\ measured during BBOB-2012" + text = "best \\ERT\ measured during BBOB-2012" elif "best 2013" in testbed.reference_algorithm_displayname: - text = "best \\aRT\ measured during BBOB-2013" + text = "best \\ERT\ measured during BBOB-2013" elif "best 2016" in testbed.reference_algorithm_displayname: - text = "best \\aRT\ measured during BBOB-2016" + text = "best \\ERT\ measured during BBOB-2016" elif "best 2009-16" in testbed.reference_algorithm_displayname: - text = "best \\aRT\ measured during BBOB-2009-16" + text = "best \\ERT\ measured during BBOB-2009-16" else: - text = "the \\aRT\ of the reference algorithm" + text = "the \\ERT\ of the reference algorithm" else: raise NotImplementedError('reference algorithm not supported for this testbed') else: @@ -161,7 +161,7 @@ def get_light_brown_line_text(testbedname): '!!NUM-OF-TARGETS-IN-ECDF!!': lambda: str(len(testbedsettings.current_testbed.pprldmany_target_values)), '!!TARGET-RANGES-IN-ECDF!!': lambda: str(testbedsettings.current_testbed.pprldmany_target_range_latex), '!!TOTAL-NUM-OF-FUNCTIONS!!': lambda: str(testbedsettings.current_testbed.last_function_number - testbedsettings.current_testbed.first_function_number + 1), - '!!BEST-ART!!': lambda: get_best_art_text(), + '!!BEST-ERT!!': lambda: get_best_ert_text(), '!!NBTARGETS-SCATTER!!': lambda: str(len(testbedsettings.current_testbed.ppscatter_target_values)), '!!NBLOW!!': lambda: get_nblow(), '!!NBUP!!': lambda: get_nbup() diff --git a/code-postprocessing/cocopp/cococommands.py b/code-postprocessing/cocopp/cococommands.py index 58c201399..3c68107f5 100644 --- a/code-postprocessing/cocopp/cococommands.py +++ b/code-postprocessing/cocopp/cococommands.py @@ -47,7 +47,7 @@ Number of trials: 15 Final target Df: 1e-08 min / max number of evals per trial: 224 / 333 - evals/DIM: best 15% 50% 85% max | aRT/DIM nsucc + evals/DIM: best 15% 50% 85% max | ERT/DIM nsucc ---Df---|-----------------------------------------|---------------- 1.0e+03 | 0 0 0 0 0 | 0.5 15 1.0e+01 | 0 0 2 8 10 | 2.9 15 diff --git a/code-postprocessing/cocopp/comp2/ppfig2.py b/code-postprocessing/cocopp/comp2/ppfig2.py index ea85b24ee..ff4fcad80 100644 --- a/code-postprocessing/cocopp/comp2/ppfig2.py +++ b/code-postprocessing/cocopp/comp2/ppfig2.py @@ -1,7 +1,7 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -"""Creates aRT-ratio comparison figures (ECDF) and convergence figures for the comparison of 2 algorithms. +"""Creates ERT-ratio comparison figures (ECDF) and convergence figures for the comparison of 2 algorithms. Scale up figures for two algorithms can be done with compall/ppfigs.py @@ -130,7 +130,7 @@ def beautify(xmin=None): # Annotate figure ax.set_xlabel('log10(Delta ftarget)') - ax.set_ylabel(r'log10(aRT1/aRT0) or ~#succ') # TODO: replace hard-coded 15 + ax.set_ylabel(r'log10(ERT1/ERT0) or ~#succ') # TODO: replace hard-coded 15 ax.grid(True) #Tick label handling @@ -243,7 +243,7 @@ def annotate(entry0, entry1, dim, minfvalue=1e-8, nbtests=1): transform=trans, clip_on=False) def main(dsList0, dsList1, minfvalue=1e-8, outputdir=''): - """Returns aRT1/aRT0 comparison figure.""" + """Returns ERT1/ERT0 comparison figure.""" #plt.rc("axes", labelsize=20, titlesize=24) #plt.rc("xtick", labelsize=20) diff --git a/code-postprocessing/cocopp/comp2/pprldistr2.py b/code-postprocessing/cocopp/comp2/pprldistr2.py index 076b9b19e..176fa2775 100644 --- a/code-postprocessing/cocopp/comp2/pprldistr2.py +++ b/code-postprocessing/cocopp/comp2/pprldistr2.py @@ -38,7 +38,7 @@ def beautify(handles): axisHandle = plt.gca() axisHandle.set_xscale('log') - plt.axvline(1, ls='-', color='k'); # symmetry line for aRT1/aRT0 = 1 + plt.axvline(1, ls='-', color='k'); # symmetry line for ERT1/ERT0 = 1 xlim = min(max(numpy.abs(numpy.log10(plt.xlim()))), numpy.ceil(numpy.log10(sys.float_info.max))-1) # correction of # numerical issues @@ -60,6 +60,12 @@ def beautify(handles): newxticks.append('%d' % round(numpy.log10(i))) axisHandle.set_xticklabels(newxticks) + print('......................') + print('old xticks: ') + print(xticks) + print('new xticks: ') + print(newxticks) + # Prolong to the boundary... xmin, xmax = plt.xlim() for i in handles: @@ -82,11 +88,22 @@ def beautify(handles): # Inverted xticks x = axisHandle.get_xticks() + + print('xticks old:') + print(x) + # Operation for reverting the ticks for x < 1 x[x<1] = sorted(1/(x[x<1]*numpy.power(10, -2*numpy.floor(numpy.log10(x[x<1]))-1))) x = x[(xxmin)] # why? axisHandle.set_xticks(x) + print('xticks new:') + print(x) + + if not len(x) == len(newxticks): + 1/0 + + def computeERT(fevals, maxevals): data = fevals.copy() success = (numpy.isnan(data)==False) diff --git a/code-postprocessing/cocopp/comp2/ppscatter.py b/code-postprocessing/cocopp/comp2/ppscatter.py index af4d22183..5b9c80144 100644 --- a/code-postprocessing/cocopp/comp2/ppscatter.py +++ b/code-postprocessing/cocopp/comp2/ppscatter.py @@ -2,9 +2,9 @@ # -*- coding: utf-8 -*- """Scatter Plots. -For two algorithms, this generates the scatter plot of log(aRT1(df)) vs. -log(aRT0(df)), where aRT0(df) is the aRT of the reference algorithm, -aRT1(df) is the aRT of the algorithm of concern, both for target +For two algorithms, this generates the scatter plot of log(ERT1(df)) vs. +log(ERT0(df)), where ERT0(df) is the ERT of the reference algorithm, +ERT1(df) is the ERT of the algorithm of concern, both for target precision df. Different symbols are used for different dimension (see @@ -20,8 +20,8 @@ """ from __future__ import absolute_import -"""For two algorithms, aRTs(given target function value) can also be -plotted in a scatter plot (log(aRT0) vs. log(aRT1)), which results in a +"""For two algorithms, ERTs(given target function value) can also be +plotted in a scatter plot (log(ERT0) vs. log(ERT1)), which results in a very attractive presentation, see the slides of Frank Hutter at http://www.msr-inria.inria.fr/events-news/first-search-biology-day. The advantage is that the absolute values do not get lost. The disadvantage @@ -57,14 +57,14 @@ def prepare_figure_caption(): - caption_start_fixed = r"""Average running time (\aRT\ in $\log_{10}$ of number of function evaluations) + caption_start_fixed = r"""Expected running time (\ERT\ in $\log_{10}$ of number of function evaluations) of \algorithmA\ ($y$-axis) versus \algorithmB\ ($x$-axis) for $!!NBTARGETS-SCATTER!!$ target values $!!DF!! \in [!!NBLOW!!, !!NBUP!!]$ in each dimension on functions #1. """ - caption_start_rlbased = r"""Average running time (\aRT\ in $\log_{10}$ of number of function evaluations) + caption_start_rlbased = r"""Expected running time (\ERT\ in $\log_{10}$ of number of function evaluations) of \algorithmA\ ($y$-axis) versus \algorithmB\ ($x$-axis) for $!!NBTARGETS-SCATTER!!$ runlength-based target values for budgets between $!!NBLOW!!$ and $!!NBUP!!$ evaluations. - Each runlength-based target $!!F!!$-value is chosen such that the \aRT{}s of + Each runlength-based target $!!F!!$-value is chosen such that the \ERT{}s of !!THE-REF-ALG!! for the given and a slightly easier target bracket the reference budget. """ @@ -177,14 +177,14 @@ def main(dsList0, dsList1, outputdir, settings): entry1 = dictDim1[d][0] # should be only one element except (IndexError, KeyError): continue - if linewidth: # plot all reliable aRT values as a line + if linewidth: # plot all reliable ERT values as a line all_targets = np.array(sorted(set(entry0.target).union(entry1.target), reverse=True)) assert entry0.detSuccessRates([all_targets[0]]) == 1.0 assert entry1.detSuccessRates([all_targets[0]]) == 1.0 all_targets = all_targets[np.where(all_targets <= targets((f, d))[0])[0]] # xdata_all = np.array(entry0.detERT(all_targets)) ydata_all = np.array(entry1.detERT(all_targets)) - # idx of reliable targets: last index where success rate >= 1/2 and aRT <= maxevals + # idx of reliable targets: last index where success rate >= 1/2 and ERT <= maxevals idx = [] for ari in (np.where(entry0.detSuccessRates(all_targets) >= 0.5)[0], np.where(entry1.detSuccessRates(all_targets) >= 0.5)[0], diff --git a/code-postprocessing/cocopp/compall/ppfigs.py b/code-postprocessing/cocopp/compall/ppfigs.py index 74151b660..57867004b 100644 --- a/code-postprocessing/cocopp/compall/ppfigs.py +++ b/code-postprocessing/cocopp/compall/ppfigs.py @@ -1,6 +1,6 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -"""Creates aRTs and convergence figures for multiple algorithms.""" +"""Creates ERTs and convergence figures for multiple algorithms.""" from __future__ import absolute_import, print_function import os import matplotlib.pyplot as plt @@ -49,15 +49,15 @@ def fix_styles(plotting_styles, line_styles): def prepare_scaling_figure_caption(): - scaling_figure_caption_start_fixed = (r"""Average running time (\aRT\ in number of $f$-evaluations + scaling_figure_caption_start_fixed = (r"""Expected running time (\ERT\ in number of $f$-evaluations as $\log_{10}$ value), divided by dimension for target function value $!!PPFIGS-FTARGET!!$ versus dimension. Slanted grid lines indicate quadratic scaling with the dimension. """ ) - scaling_figure_caption_start_rlbased = (r"""Average running time (\aRT\ in number of $f$-evaluations + scaling_figure_caption_start_rlbased = (r"""Expected running time (\ERT\ in number of $f$-evaluations as $\log_{10}$ value) divided by dimension versus dimension. The target function value is chosen such that !!THE-REF-ALG!! just failed to achieve - an \aRT\ of $!!PPFIGS-FTARGET!!\times\DIM$. """ + an \ERT\ of $!!PPFIGS-FTARGET!!\times\DIM$. """ ) scaling_figure_caption_end = ( @@ -434,7 +434,7 @@ def generateData(dataSet, target): def main(dictAlg, html_file_prefix, sorted_algorithms=None, output_dir='ppdata', latex_commands_file=''): - """From a DataSetList, returns figures showing the scaling: aRT/dim vs dim. + """From a DataSetList, returns figures showing the scaling: ERT/dim vs dim. One function and one target per figure. diff --git a/code-postprocessing/cocopp/compall/ppperfprof.py b/code-postprocessing/cocopp/compall/ppperfprof.py index 74383399b..f1eda15ae 100755 --- a/code-postprocessing/cocopp/compall/ppperfprof.py +++ b/code-postprocessing/cocopp/compall/ppperfprof.py @@ -82,7 +82,7 @@ def beautify(): a.set_xscale('log') #Tick label handling - plt.xlabel('log10 of (aRT / aRTref)') + plt.xlabel('log10 of (ERT / ERTref)') plt.ylabel('Proportion of functions') logxticks() beautifyECDF() @@ -123,9 +123,9 @@ def plot(dsList, dsref, targets=defaulttargets, istoolsstats=False, **kwargs): """Generates a graph showing the performance profile of an algorithm. We display the empirical cumulative distribution function ECDF of - the bootstrapped distribution of the average running time (aRT) + the bootstrapped distribution of the expected running time (ERT) for an algorithm to reach the function value :py:data:`targets` - normalized by the aRT of the reference algorithm for these + normalized by the ERT of the reference algorithm for these targets. :param DataSetList dsList: data set for one algorithm @@ -142,7 +142,7 @@ def plot(dsList, dsref, targets=defaulttargets, istoolsstats=False, **kwargs): for entry in dsList: for t in targets: # TODO: alternative: min(dsref[(entry.dim, entry.funcId)].detEvals((t,))[0]) - # is the min from the alg with the best aRT + # is the min from the alg with the best ERT flg_ert = 1 if flg_ert: normalizer = dsref[(entry.dim, entry.funcId)].detERT((t,))[0] diff --git a/code-postprocessing/cocopp/compall/pprldmany.py b/code-postprocessing/cocopp/compall/pprldmany.py index 25f47681a..e07dca0f3 100644 --- a/code-postprocessing/cocopp/compall/pprldmany.py +++ b/code-postprocessing/cocopp/compall/pprldmany.py @@ -1,11 +1,11 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -"""Generates figure of the bootstrap distribution of aRT. +"""Generates figure of the bootstrap distribution of ERT. The main method in this module generates figures of Empirical Cumulative Distribution Functions of the bootstrap distribution of -the Average Running Time (aRT) divided by the dimension for many +the Expected Running Time (ERT) divided by the dimension for many algorithms. The outputs show the ECDFs of the running times of the simulated runs @@ -20,7 +20,7 @@ import cocopp - # Empirical cumulative distribution function of bootstrapped aRT figure + # Empirical cumulative distribution function of bootstrapped ERT figure ds = cocopp.load(cocopp.bbob.get('2009/BIPOP-CMA-ES')) figure() cocopp.compall.pprldmany.plot(ds) # must rather call main instead of plot? @@ -555,7 +555,7 @@ def main(dictAlg, order=None, outputdir='.', info='default', From a dictionary of :py:class:`DataSetList` sorted by algorithms, generates the cumulative distribution function of the bootstrap - distribution of aRT for algorithms on multiple functions for + distribution of ERT for algorithms on multiple functions for multiple targets altogether. :param dict dictAlg: dictionary of :py:class:`DataSetList` instances diff --git a/code-postprocessing/cocopp/compall/pptables.py b/code-postprocessing/cocopp/compall/pptables.py index c85f638f3..9f61df254 100644 --- a/code-postprocessing/cocopp/compall/pptables.py +++ b/code-postprocessing/cocopp/compall/pptables.py @@ -23,15 +23,15 @@ def get_table_caption(): """ table_caption_one = r"""% - Average runtime (\aRT\ in number of function - evaluations) divided by the respective !!BEST-ART!! in + Expected runtime (\ERT\ in number of function + evaluations) divided by the respective !!BEST-ERT!! in #1. - This \aRT\ ratio and, in braces as dispersion measure, the half difference between + This \ERT\ ratio and, in braces as dispersion measure, the half difference between 10 and 90\%-tile of bootstrapped run lengths appear for each algorithm and """ table_caption_one_noreference = r"""% - Average runtime (\aRT) to reach given targets, measured - in number of function evaluations, in #1. For each function, the \aRT\ + Expected runtime (\ERT) to reach given targets, measured + in number of function evaluations, in #1. For each function, the \ERT\ and, in braces as dispersion measure, the half difference between 10 and 90\%-tile of (bootstrapped) runtimes is shown for the different target !!DF!!-values as shown in the top row. @@ -39,13 +39,13 @@ def get_table_caption(): $!!FOPT!! + """ + testbedsettings.current_testbed.hardesttargetlatex + r"""$. """ table_caption_two1 = r"""% - target, the corresponding reference \aRT\ + target, the corresponding reference \ERT\ in the first row. The different target !!DF!!-values are shown in the top row. \#succ is the number of trials that reached the (final) target $!!FOPT!! + """ + testbedsettings.current_testbed.hardesttargetlatex + r"""$. """ table_caption_two2 = r"""% - run-length based target, the corresponding reference \aRT\ + run-length based target, the corresponding reference \ERT\ (preceded by the target !!DF!!-value in \textit{italics}) in the first row. \#succ is the number of trials that reached the target value of the last column. """ @@ -461,7 +461,7 @@ def main(dict_alg, sorted_algs, output_dir='.', function_targets_line=True, late extraeol.append(r'\hline') # extraeol.append(r'\hline\arrayrulecolor{tableShade}') - # line with function name and potential aRT values of reference algorithm + # line with function name and potential ERT values of reference algorithm curline = [r'\textbf{f%d}' % df[1]] replaceValue = 'f%d, %d-D' % (df[1], df[0]) curlineHtml = [item.replace('REPLACEH', replaceValue) for item in curlineHtml] @@ -572,7 +572,7 @@ def main(dict_alg, sorted_algs, output_dir='.', function_targets_line=True, late tmpevals = numpy.array(sorted(tmpevals))[0:min(len(tmpevals), len(bestevals))] bestevals = numpy.array(sorted(bestevals))[0:min(len(tmpevals), len(bestevals))] - # The conditions are now that aRT < aRT_best and + # The conditions are now that ERT < ERT_best and # all(sorted(FEvals_best) > sorted(FEvals_current)). if numpy.isinf(refalgert[j]) or all(tmpevals < bestevals): nbstars = -numpy.ceil(numpy.log10(nbtests * p)) diff --git a/code-postprocessing/cocopp/firstsession.py b/code-postprocessing/cocopp/firstsession.py index cd4b961cc..f0d6c7d12 100644 --- a/code-postprocessing/cocopp/firstsession.py +++ b/code-postprocessing/cocopp/firstsession.py @@ -143,7 +143,7 @@ pprldistr.plot(ds) pprldistr.beautify() # resize the window to view whole figure -# Empirical cumulative distribution function of bootstrapped aRT figure +# Empirical cumulative distribution function of bootstrapped ERT figure from .compall import pprldmany ds = cocopp.load(glob.glob('BBOB2009pythondata/BIPOP-CMA-ES/ppdata_f0*_20.pickle')) figure() diff --git a/code-postprocessing/cocopp/latex_commands_for_html.html b/code-postprocessing/cocopp/latex_commands_for_html.html index 15c358c2a..ef62f0f28 100644 --- a/code-postprocessing/cocopp/latex_commands_for_html.html +++ b/code-postprocessing/cocopp/latex_commands_for_html.html @@ -34,10 +34,10 @@
##bbobppfigslegendrlbased## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value) divided by dimension versus dimension. The target function value is chosen such that !!THE-REF-ALG!! just failed to achieve - an aRT of !!PPFIGS−FTARGET!!×DIM. Different symbols correspond to different algorithms given in the legend of f1 and f24. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). + an ERT of !!PPFIGS−FTARGET!!×DIM. Different symbols correspond to different algorithms given in the legend of f1 and f24. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six).
##bbobpprldistrlegendrlbased## @@ -80,15 +80,15 @@ ##bbobppfigdimlegendrlbased## Scaling of runtime with dimension to reach certain target values ∆f. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for targets just not reached by the best algorithm from BBOB 2009 + plotted as log10 values versus dimension. Shown is the ERT for targets just not reached by the best algorithm from BBOB 2009 within the given budget k×DIM, where k is shown in the - legend. Numbers above aRT-symbols (if appearing) indicate the number + legend. Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. The light thick line with diamonds indicates the best algorithm from BBOB 2009 for the most difficult target. Slanted grid lines indicate a scaling with O(DIM) compared to O(1) when using the respective reference algorithm. @@ -96,10 +96,10 @@
##bbobpptablecaptionrlbased## - Average running time (aRT in number of function - evaluations) divided by the aRT of the best algorithm from BBOB 2009 in different dimensions. This aRT  ratio and, in braces as dispersion measure, the half difference between 90 and + Expected running time (ERT in number of function + evaluations) divided by the ERT of the best algorithm from BBOB 2009 in different dimensions. This ERT  ratio and, in braces as dispersion measure, the half difference between 90 and 10%-tile of bootstrapped run lengths appear in the second row of each cell, - the best aRT  in the first. The different target ∆f-values are shown in the top row. + the best ERT  in the first. The different target ∆f-values are shown in the top row. #succ is the number of trials that reached the (final) target fopt + 10−8. The median number of conducted function evaluations is additionally given in @@ -113,12 +113,12 @@
##bbobpptablesmanylegendrlbased## - Average runtime (aRT in number of function - evaluations) divided by the respective best aRT measured during BBOB-2009 in + Expected runtime (ERT in number of function + evaluations) divided by the respective best ERT measured during BBOB-2009 in different dimensions. - This aRT ratio and, in braces as dispersion measure, the half difference between + This ERT ratio and, in braces as dispersion measure, the half difference between 10 and 90%-tile of bootstrapped run lengths appear for each algorithm and - run-length based target, the corresponding reference aRT  (preceded by the target ∆f-value in italics) in the first row. + run-length based target, the corresponding reference ERT  (preceded by the target ∆f-value in italics) in the first row. #succ is the number of trials that reached the target value of the last column. The median number of conducted function evaluations is additionally given in italics, if the target in the last column was never reached. @@ -131,10 +131,10 @@
##bbobppscatterlegendrlbased## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! runlength-based target values for budgets between !!NBLOW!! and !!NBUP!! evaluations. - Each runlength-based target !!F!!-value is chosen such that the aRTs of + Each runlength-based target !!F!!-value is chosen such that the ERTs of !!THE-REF-ALG!! for the given and a slightly easier target bracket the reference budget. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -148,17 +148,17 @@
##bbobloglosstablecaptionrlbased## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target f-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if the best algorithm from BBOB 2009 reached a better target within the budget, - divided by the aRT of the best algorithm from BBOB 2009 to reach ft. + divided by the ERT of the best algorithm from BBOB 2009 to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -166,7 +166,7 @@
##bbobloglossfigurecaptionrlbased## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -180,7 +180,7 @@
##bbobppfigslegendfixed## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value), divided by dimension for target function value !!PPFIGS−FTARGET!! versus dimension. Slanted grid lines indicate quadratic scaling with the dimension. Different symbols correspond to different algorithms given in the legend of f1 and f24. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). @@ -227,26 +227,26 @@ ##bbobppfigdimlegendfixed## Scaling of runtime with dimension to reach certain target values ∆f. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for fixed values of ∆f + plotted as log10 values versus dimension. Shown is the ERT for fixed values of ∆f = 10k with k given in the legend. - Numbers above aRT-symbols (if appearing) indicate the number of trials + Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. The light thick line with diamonds indicates the best algorithm from BBOB 2009 for the most difficult target. Horizontal lines mean linear scaling, slanted grid lines depict quadratic scaling.
##bbobpptablecaptionfixed## - Average running time (aRT in number of function - evaluations) divided by the aRT of the best algorithm from BBOB 2009 in different dimensions. This aRT  ratio and, in braces as dispersion measure, the half difference between 90 and + Expected running time (ERT in number of function + evaluations) divided by the ERT of the best algorithm from BBOB 2009 in different dimensions. This ERT  ratio and, in braces as dispersion measure, the half difference between 90 and 10%-tile of bootstrapped run lengths appear in the second row of each cell, - the best aRT  (preceded by the target ∆f-value in italics) in the first. + the best ERT  (preceded by the target ∆f-value in italics) in the first. #succ is the number of trials that reached the target value of the last column. The median number of conducted function evaluations is additionally given in italics, if the target in the last column was never reached. @@ -259,12 +259,12 @@
##bbobpptablesmanylegendfixed## - Average runtime (aRT in number of function - evaluations) divided by the respective best aRT measured during BBOB-2009 in + Expected runtime (ERT in number of function + evaluations) divided by the respective best ERT measured during BBOB-2009 in different dimensions. - This aRT ratio and, in braces as dispersion measure, the half difference between + This ERT ratio and, in braces as dispersion measure, the half difference between 10 and 90%-tile of bootstrapped run lengths appear for each algorithm and - target, the corresponding reference aRT  in the first row. The different target ∆f-values are shown in the top row. + target, the corresponding reference ERT  in the first row. The different target ∆f-values are shown in the top row. #succ is the number of trials that reached the (final) target fopt+ 10−8. The median number of conducted function evaluations is additionally given in @@ -278,7 +278,7 @@
##bbobppscatterlegendfixed## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! target values !!DF!! ∈ [!!NBLOW!!, !!NBUP!!] in each dimension on functions f1 - f24. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -292,17 +292,17 @@
##bbobloglosstablecaptionfixed## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target f-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if the best algorithm from BBOB 2009 reached a better target within the budget, - divided by the aRT of the best algorithm from BBOB 2009 to reach ft. + divided by the ERT of the best algorithm from BBOB 2009 to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -310,7 +310,7 @@
##bbobloglossfigurecaptionfixed## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -324,7 +324,7 @@
##bbobppfigslegendbiobjfixed## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value), divided by dimension for target function value !!PPFIGS−FTARGET!! versus dimension. Slanted grid lines indicate quadratic scaling with the dimension. Different symbols correspond to different algorithms given in the legend of f1 and f55. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). @@ -369,26 +369,26 @@ ##bbobppfigdimlegendbiobjfixed## Scaling of runtime with dimension to reach certain target values ∆I. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for fixed values of ∆I + plotted as log10 values versus dimension. Shown is the ERT for fixed values of ∆I = 10k with k given in the legend. - Numbers above aRT-symbols (if appearing) indicate the number of trials + Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. The light thick line with diamonds indicates the best algorithm from BBOB 2016 for the most difficult target. Horizontal lines mean linear scaling, slanted grid lines depict quadratic scaling.
##bbobpptablecaptionbiobjfixed## - Average running time (aRT in number of function - evaluations) divided by the aRT of the best algorithm from BBOB 2016 in different dimensions. This aRT  ratio and, in braces as dispersion measure, the half difference between 90 and + Expected running time (ERT in number of function + evaluations) divided by the ERT of the best algorithm from BBOB 2016 in different dimensions. This ERT  ratio and, in braces as dispersion measure, the half difference between 90 and 10%-tile of bootstrapped run lengths appear in the second row of each cell, - the best aRT  (preceded by the target ∆I-value in italics) in the first. + the best ERT  (preceded by the target ∆I-value in italics) in the first. #succ is the number of trials that reached the target value of the last column. The median number of conducted function evaluations is additionally given in italics, if the target in the last column was never reached. @@ -401,12 +401,12 @@
##bbobpptablesmanylegendbiobjfixed## - Average runtime (aRT in number of function - evaluations) divided by the respective best aRT measured during BBOB-2016 in + Expected runtime (ERT in number of function + evaluations) divided by the respective best ERT measured during BBOB-2016 in different dimensions. - This aRT ratio and, in braces as dispersion measure, the half difference between + This ERT ratio and, in braces as dispersion measure, the half difference between 10 and 90%-tile of bootstrapped run lengths appear for each algorithm and - target, the corresponding reference aRT  in the first row. The different target ∆I-values are shown in the top row. + target, the corresponding reference ERT  in the first row. The different target ∆I-values are shown in the top row. #succ is the number of trials that reached the (final) target Iref+ 10−5. The median number of conducted function evaluations is additionally given in @@ -420,7 +420,7 @@
##bbobppscatterlegendbiobjfixed## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! target values !!DF!! ∈ [!!NBLOW!!, !!NBUP!!] in each dimension on functions f1 - f55. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -434,17 +434,17 @@
##bbobloglosstablecaptionbiobjfixed## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target IHVCOCO-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if the best algorithm from BBOB 2016 reached a better target within the budget, - divided by the aRT of the best algorithm from BBOB 2016 to reach ft. + divided by the ERT of the best algorithm from BBOB 2016 to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -452,7 +452,7 @@
##bbobloglossfigurecaptionbiobjfixed## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -466,10 +466,10 @@
##bbobppfigslegendbiobjrlbased## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value) divided by dimension versus dimension. The target function value is chosen such that !!THE-REF-ALG!! just failed to achieve - an aRT of !!PPFIGS−FTARGET!!×DIM. Different symbols correspond to different algorithms given in the legend of f1 and f55. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). + an ERT of !!PPFIGS−FTARGET!!×DIM. Different symbols correspond to different algorithms given in the legend of f1 and f55. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six).
##bbobpprldistrlegendbiobjrlbased## @@ -513,15 +513,15 @@ ##bbobppfigdimlegendbiobjrlbased## Scaling of runtime with dimension to reach certain target values ∆I. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for targets just not reached by the best algorithm from BBOB 2016 + plotted as log10 values versus dimension. Shown is the ERT for targets just not reached by the best algorithm from BBOB 2016 within the given budget k×DIM, where k is shown in the - legend. Numbers above aRT-symbols (if appearing) indicate the number + legend. Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. The light thick line with diamonds indicates the best algorithm from BBOB 2016 for the most difficult target. Slanted grid lines indicate a scaling with O(DIM) compared to O(1) when using the respective reference algorithm. @@ -529,10 +529,10 @@
##bbobpptablecaptionbiobjrlbased## - Average running time (aRT in number of function - evaluations) divided by the aRT of the best algorithm from BBOB 2016 in different dimensions. This aRT  ratio and, in braces as dispersion measure, the half difference between 90 and + Expected running time (ERT in number of function + evaluations) divided by the ERT of the best algorithm from BBOB 2016 in different dimensions. This ERT  ratio and, in braces as dispersion measure, the half difference between 90 and 10%-tile of bootstrapped run lengths appear in the second row of each cell, - the best aRT  in the first. The different target ∆I-values are shown in the top row. + the best ERT  in the first. The different target ∆I-values are shown in the top row. #succ is the number of trials that reached the (final) target Iref + 10−5. The median number of conducted function evaluations is additionally given in @@ -546,12 +546,12 @@
##bbobpptablesmanylegendbiobjrlbased## - Average runtime (aRT in number of function - evaluations) divided by the respective best aRT measured during BBOB-2016 in + Expected runtime (ERT in number of function + evaluations) divided by the respective best ERT measured during BBOB-2016 in different dimensions. - This aRT ratio and, in braces as dispersion measure, the half difference between + This ERT ratio and, in braces as dispersion measure, the half difference between 10 and 90%-tile of bootstrapped run lengths appear for each algorithm and - run-length based target, the corresponding reference aRT  (preceded by the target ∆I-value in italics) in the first row. + run-length based target, the corresponding reference ERT  (preceded by the target ∆I-value in italics) in the first row. #succ is the number of trials that reached the target value of the last column. The median number of conducted function evaluations is additionally given in italics, if the target in the last column was never reached. @@ -564,10 +564,10 @@
##bbobppscatterlegendbiobjrlbased## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! runlength-based target values for budgets between !!NBLOW!! and !!NBUP!! evaluations. - Each runlength-based target !!F!!-value is chosen such that the aRTs of + Each runlength-based target !!F!!-value is chosen such that the ERTs of !!THE-REF-ALG!! for the given and a slightly easier target bracket the reference budget. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -581,17 +581,17 @@
##bbobloglosstablecaptionbiobjrlbased## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target IHVCOCO-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if the best algorithm from BBOB 2016 reached a better target within the budget, - divided by the aRT of the best algorithm from BBOB 2016 to reach ft. + divided by the ERT of the best algorithm from BBOB 2016 to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -599,7 +599,7 @@
##bbobloglossfigurecaptionbiobjrlbased## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -613,7 +613,7 @@
##bbobppfigslegendbiobjextfixed## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value), divided by dimension for target function value !!PPFIGS−FTARGET!! versus dimension. Slanted grid lines indicate quadratic scaling with the dimension. Different symbols correspond to different algorithms given in the legend of f1 and f92. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). @@ -658,24 +658,24 @@ ##bbobppfigdimlegendbiobjextfixed## Scaling of runtime with dimension to reach certain target values ∆I. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for fixed values of ∆I + plotted as log10 values versus dimension. Shown is the ERT for fixed values of ∆I = 10k with k given in the legend. - Numbers above aRT-symbols (if appearing) indicate the number of trials + Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. Horizontal lines mean linear scaling, slanted grid lines depict quadratic scaling.
##bbobpptablecaptionbiobjextfixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆I-values as shown in the top row. @@ -687,8 +687,8 @@
##bbobpptablesmanylegendbiobjextfixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations, in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations, in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆I-values as shown in the top row. @@ -705,7 +705,7 @@
##bbobppscatterlegendbiobjextfixed## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! target values !!DF!! ∈ [!!NBLOW!!, !!NBUP!!] in each dimension on functions f1 - f92. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -719,17 +719,17 @@
##bbobloglosstablecaptionbiobjextfixed## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target IHVCOCO-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if reached a better target within the budget, - divided by the aRT of to reach ft. + divided by the ERT of to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -737,7 +737,7 @@
##bbobloglossfigurecaptionbiobjextfixed## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -751,7 +751,7 @@
##bbobppfigslegendconstrainedfixed## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value), divided by dimension for target function value !!PPFIGS−FTARGET!! versus dimension. Slanted grid lines indicate quadratic scaling with the dimension. Different symbols correspond to different algorithms given in the legend of f1 and f48. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). @@ -796,24 +796,24 @@ ##bbobppfigdimlegendconstrainedfixed## Scaling of runtime with dimension to reach certain target values ∆f. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for fixed values of ∆f + plotted as log10 values versus dimension. Shown is the ERT for fixed values of ∆f = 10k with k given in the legend. - Numbers above aRT-symbols (if appearing) indicate the number of trials + Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. Horizontal lines mean linear scaling, slanted grid lines depict quadratic scaling.
##bbobpptablecaptionconstrainedfixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆f-values as shown in the top row. @@ -825,8 +825,8 @@
##bbobpptablesmanylegendconstrainedfixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations, in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations, in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆f-values as shown in the top row. @@ -843,7 +843,7 @@
##bbobppscatterlegendconstrainedfixed## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! target values !!DF!! ∈ [!!NBLOW!!, !!NBUP!!] in each dimension on functions f1 - f48. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -857,17 +857,17 @@
##bbobloglosstablecaptionconstrainedfixed## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target f-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if reached a better target within the budget, - divided by the aRT of to reach ft. + divided by the ERT of to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -875,7 +875,7 @@
##bbobloglossfigurecaptionconstrainedfixed## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -889,7 +889,7 @@
##bbobppfigslegendlargescalefixed## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value), divided by dimension for target function value !!PPFIGS−FTARGET!! versus dimension. Slanted grid lines indicate quadratic scaling with the dimension. Different symbols correspond to different algorithms given in the legend of f1 and f24. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). @@ -931,24 +931,24 @@ ##bbobppfigdimlegendlargescalefixed## Scaling of runtime with dimension to reach certain target values ∆f. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for fixed values of ∆f + plotted as log10 values versus dimension. Shown is the ERT for fixed values of ∆f = 10k with k given in the legend. - Numbers above aRT-symbols (if appearing) indicate the number of trials + Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. Horizontal lines mean linear scaling, slanted grid lines depict quadratic scaling.
##bbobpptablecaptionlargescalefixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆f-values as shown in the top row. @@ -960,8 +960,8 @@
##bbobpptablesmanylegendlargescalefixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations, in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations, in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆f-values as shown in the top row. @@ -978,7 +978,7 @@
##bbobppscatterlegendlargescalefixed## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! target values !!DF!! ∈ [!!NBLOW!!, !!NBUP!!] in each dimension on functions f1 - f24. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -992,17 +992,17 @@
##bbobloglosstablecaptionlargescalefixed## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target f-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if reached a better target within the budget, - divided by the aRT of to reach ft. + divided by the ERT of to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -1010,7 +1010,7 @@
##bbobloglossfigurecaptionlargescalefixed## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -1024,7 +1024,7 @@
##bbobppfigslegendmixintfixed## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value), divided by dimension for target function value !!PPFIGS−FTARGET!! versus dimension. Slanted grid lines indicate quadratic scaling with the dimension. Different symbols correspond to different algorithms given in the legend of f1 and f24. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). @@ -1066,24 +1066,24 @@ ##bbobppfigdimlegendmixintfixed## Scaling of runtime with dimension to reach certain target values ∆f. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for fixed values of ∆f + plotted as log10 values versus dimension. Shown is the ERT for fixed values of ∆f = 10k with k given in the legend. - Numbers above aRT-symbols (if appearing) indicate the number of trials + Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. Horizontal lines mean linear scaling, slanted grid lines depict quadratic scaling.
##bbobpptablecaptionmixintfixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆f-values as shown in the top row. @@ -1095,8 +1095,8 @@
##bbobpptablesmanylegendmixintfixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations, in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations, in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆f-values as shown in the top row. @@ -1113,7 +1113,7 @@
##bbobppscatterlegendmixintfixed## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! target values !!DF!! ∈ [!!NBLOW!!, !!NBUP!!] in each dimension on functions f1 - f24. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -1127,17 +1127,17 @@
##bbobloglosstablecaptionmixintfixed## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target f-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if reached a better target within the budget, - divided by the aRT of to reach ft. + divided by the ERT of to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -1145,7 +1145,7 @@
##bbobloglossfigurecaptionmixintfixed## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -1159,7 +1159,7 @@
##bbobppfigslegendbiobjmixintfixed## -Average running time (aRT in number of f-evaluations +Expected running time (ERT in number of f-evaluations as log10 value), divided by dimension for target function value !!PPFIGS−FTARGET!! versus dimension. Slanted grid lines indicate quadratic scaling with the dimension. Different symbols correspond to different algorithms given in the legend of f1 and f24. Light symbols give the maximum number of function evaluations from the longest trial divided by dimension. Black stars indicate a statistically better result compared to all other algorithms with p < 0.01 and Bonferroni correction number of dimensions (six). @@ -1201,24 +1201,24 @@ ##bbobppfigdimlegendbiobjmixintfixed## Scaling of runtime with dimension to reach certain target values ∆f. - Lines: average runtime (aRT); + Lines: expected runtime (ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross (×): maximum number of f-evaluations in any trial. Notched boxes: interquartile range with median of simulated runs; All values are divided by dimension and - plotted as log10 values versus dimension. Shown is the aRT for fixed values of ∆f + plotted as log10 values versus dimension. Shown is the ERT for fixed values of ∆f = 10k with k given in the legend. - Numbers above aRT-symbols (if appearing) indicate the number of trials + Numbers above ERT-symbols (if appearing) indicate the number of trials reaching the respective target. Horizontal lines mean linear scaling, slanted grid lines depict quadratic scaling.
##bbobpptablecaptionbiobjmixintfixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆f-values as shown in the top row. @@ -1230,8 +1230,8 @@
##bbobpptablesmanylegendbiobjmixintfixed## - Average runtime (aRT) to reach given targets, measured - in number of function evaluations, in different dimensions. For each function, the aRT  + Expected runtime (ERT) to reach given targets, measured + in number of function evaluations, in different dimensions. For each function, the ERT  and, in braces as dispersion measure, the half difference between 10 and 90%-tile of (bootstrapped) runtimes is shown for the different target ∆f-values as shown in the top row. @@ -1248,7 +1248,7 @@
##bbobppscatterlegendbiobjmixintfixed## -Average running time (aRT in log10 of number of function evaluations) +Expected running time (ERT in log10 of number of function evaluations) of algorithmA (y-axis) versus algorithmB (x-axis) for !!NBTARGETS−SCATTER!! target values !!DF!! ∈ [!!NBLOW!!, !!NBUP!!] in each dimension on functions f1 - f24. Markers on the upper or right edge indicate that the respective target value was never reached. Markers represent dimension: @@ -1262,17 +1262,17 @@
##bbobloglosstablecaptionbiobjmixintfixed## - aRT loss ratio versus the budget in number of f-evaluations + ERT loss ratio versus the budget in number of f-evaluations divided by dimension. For each given budget FEvals, the target value ft is computed as the best target f-value reached within the budget by the given algorithm. - Shown is then the aRT to reach ft for the given algorithm + Shown is then the ERT to reach ft for the given algorithm or the budget, if reached a better target within the budget, - divided by the aRT of to reach ft. + divided by the ERT of to reach ft. Line: geometric mean. Box-Whisker error bar: 25-75%-ile with median - (box), 10-90%-ile (caps), and minimum and maximum aRT loss ratio + (box), 10-90%-ile (caps), and minimum and maximum ERT loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also the following figure for results on each function subgroup.??COCOVERSION?? @@ -1280,7 +1280,7 @@
##bbobloglossfigurecaptionbiobjmixintfixed## - aRT loss ratios (see the previous figure for details). + ERT loss ratios (see the previous figure for details).
Each cross (+) represents a single function, the line @@ -1293,5 +1293,5 @@ TEX by TTH, -version 4.08.
On 15 Feb 2019, 17:38. +version 4.08.
On 30 Jan 2020, 13:26. diff --git a/code-postprocessing/cocopp/ppfig.py b/code-postprocessing/cocopp/ppfig.py index 6b0928eed..baf93f421 100644 --- a/code-postprocessing/cocopp/ppfig.py +++ b/code-postprocessing/cocopp/ppfig.py @@ -349,7 +349,7 @@ def save_single_functions_html(filename, f.write(caption_string_format % '\n##bbobECDFslegend##') elif htmlPage is HtmlPage.PPTABLE: - current_header = 'aRT in number of function evaluations' + current_header = 'ERT in number of function evaluations' f.write("

%s

\n" % current_header) for index, dimension in enumerate(dimensions): f.write(write_dimension_links(dimension, dimensions, index)) @@ -403,7 +403,7 @@ def save_single_functions_html(filename, elif htmlPage is HtmlPage.PPLOGLOSS: dimensions = testbedsettings.current_testbed.rldDimsOfInterest if testbedsettings.current_testbed.reference_algorithm_filename: - current_header = 'aRT loss ratios' + current_header = 'ERT loss ratios' f.write("

%s

\n" % current_header) dimension_list = '-D, '.join(str(x) for x in dimensions) + '-D' @@ -457,9 +457,9 @@ def write_dimension_links(dimension, dimensions, index): def write_tables(f, caption_string_format, best_alg_exists, html_key, legend_key, dimensions): - current_header = 'Table showing the aRT in number of function evaluations' + current_header = 'Table showing the ERT in number of function evaluations' if best_alg_exists: - current_header += ' divided by the best aRT measured during BBOB-2009' + current_header += ' divided by the best ERT measured during BBOB-2009' f.write("\n

%s

\n" % current_header) for index, dimension in enumerate(dimensions): @@ -670,7 +670,7 @@ def beautify(): for i in tmp: tmp2.append('%d' % round(np.log10(i))) axisHandle.set_yticklabels(tmp2) - axisHandle.set_ylabel('log10 of aRT') + axisHandle.set_ylabel('log10 of ERT') def generateData(dataSet, targetFuncValue): @@ -701,7 +701,7 @@ def generateData(dataSet, targetFuncValue): else: med = np.nan - # prepare to compute runlengths / aRT with restarts (AKA SP1) + # prepare to compute runlengths / ERT with restarts (AKA SP1) data[np.isnan(data)] = dataSet.maxevals[np.isnan(data)] res = [] @@ -763,7 +763,7 @@ class StrippedUpDS(): if succ: tmp = np.vstack(succ) - # aRT + # ERT res.extend(plt.plot(tmp[:, 0], tmp[:, 1], **kwargs)) # median tmp2 = plt.plot(tmp[:, 0], tmp[:, -1], **kwargs) diff --git a/code-postprocessing/cocopp/ppfigdim.py b/code-postprocessing/cocopp/ppfigdim.py index 4fe616288..77c8416ff 100644 --- a/code-postprocessing/cocopp/ppfigdim.py +++ b/code-postprocessing/cocopp/ppfigdim.py @@ -3,10 +3,10 @@ """Generate performance scaling figures. -The figures show the scaling of the performance in terms of aRT w.r.t. +The figures show the scaling of the performance in terms of ERT w.r.t. dimensionality on a log-log scale. On the y-axis, data is represented as a number of function evaluations divided by dimension, this is in order -to compare at a glance with a linear scaling for which aRT is +to compare at a glance with a linear scaling for which ERT is proportional to the dimension and would therefore be represented by a horizontal line in the figure. @@ -89,7 +89,7 @@ def scaling_figure_caption(): caption_text = r"""% Scaling of runtime with dimension to reach certain target values !!DF!!. - Lines: average runtime (\aRT); + Lines: expected runtime (\ERT); Cross (+): median runtime of successful runs to reach the most difficult target that was reached at least once (but not always); Cross ({\color{red}$\times$}): maximum number of @@ -99,17 +99,17 @@ def scaling_figure_caption(): """ caption_part_absolute_targets = (r"""% - Shown is the \aRT\ for fixed values of $!!DF!! = 10^k$ with $k$ given + Shown is the \ERT\ for fixed values of $!!DF!! = 10^k$ with $k$ given in the legend. - Numbers above \aRT-symbols (if appearing) indicate the number of trials + Numbers above \ERT-symbols (if appearing) indicate the number of trials reaching the respective target. """ + # TODO: add here "(out of XYZ trials)" r"""!!LIGHT-THICK-LINE!! Horizontal lines mean linear scaling, slanted grid lines depict quadratic scaling.""") caption_part_rlbased_targets = r"""% - Shown is the \aRT\ for targets just not reached by !!THE-REF-ALG!! + Shown is the \ERT\ for targets just not reached by !!THE-REF-ALG!! within the given budget $k\times\DIM$, where $k$ is shown in the - legend. Numbers above \aRT-symbols (if appearing) indicate the number + legend. Numbers above \ERT-symbols (if appearing) indicate the number of trials reaching the respective target. !!LIGHT-THICK-LINE!! Slanted grid lines indicate a scaling with $\mathcal O$$(\DIM)$ compared to $\mathcal O$$(1)$ when using the respective reference algorithm. @@ -316,7 +316,7 @@ def plot_a_bar(x, y, def plot(dsList, valuesOfInterest=None, styles=styles): - """From a DataSetList, plot a figure of aRT/dim vs dim. + """From a DataSetList, plot a figure of ERT/dim vs dim. There will be one set of graphs per function represented in the input data sets. Most usually the data sets of different functions @@ -377,7 +377,7 @@ def plot(dsList, valuesOfInterest=None, styles=styles): if len(succ) > 0: tmp = np.vstack(succ) - # aRT + # ERT if genericsettings.scaling_figures_with_boxes: for dim in dimensions: # to find finite simulated runlengths we need to have at least one successful run @@ -511,7 +511,7 @@ def plot_previous_algorithms(func, target=None): # lambda x: [1e-8]): return res def main(dsList, _valuesOfInterest, outputdir): - """From a DataSetList, returns a convergence and aRT/dim figure vs dim. + """From a DataSetList, returns a convergence and ERT/dim figure vs dim. If available, uses data of a reference algorithm as specified in ``:py:genericsettings.py``. diff --git a/code-postprocessing/cocopp/ppfigparam.py b/code-postprocessing/cocopp/ppfigparam.py index 1cf30ac78..4c64112dc 100755 --- a/code-postprocessing/cocopp/ppfigparam.py +++ b/code-postprocessing/cocopp/ppfigparam.py @@ -1,9 +1,9 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -"""Generate aRT vs param. figures. +"""Generate ERT vs param. figures. -The figures will show the performance in terms of aRT on a log scale +The figures will show the performance in terms of ERT on a log scale w.r.t. parameter. On the y-axis, data is represented as a number of function evaluations. Crosses (+) give the median number of function evaluations for the smallest reached target function value @@ -89,7 +89,7 @@ def beautify(): plt.ylabel('Run Lengths') def plot(dsList, param='dim', targets=(10., 1., 1e-1, 1e-2, 1e-3, 1e-5, 1e-8)): - """Generate plot of aRT vs param.""" + """Generate plot of ERT vs param.""" dictparam = dsList.dictByParam(param) params = sorted(dictparam) # sorted because we draw lines @@ -103,7 +103,7 @@ def plot(dsList, param='dim', targets=(10., 1., 1e-1, 1e-2, 1e-3, 1e-5, 1e-8)): rawdata[p] = dictparam[p][0].detEvals(targets) # expect dictparam[p] to have only one element - # plot lines for aRT + # plot lines for ERT xpltdata = params for i, t in enumerate(targets): ypltdata = [] @@ -112,7 +112,7 @@ def plot(dsList, param='dim', targets=(10., 1., 1e-1, 1e-2, 1e-3, 1e-5, 1e-8)): unsucc = np.isnan(data) assert len(dictparam[p]) == 1 data[unsucc] = dictparam[p][0].maxevals - # compute aRT + # compute ERT ert, srate, succ = toolsstats.sp(data, issuccessful=(unsucc == False)) ypltdata.append(ert) res.extend(plt.plot(xpltdata, ypltdata, markersize=20, @@ -155,7 +155,7 @@ def plot(dsList, param='dim', targets=(10., 1., 1e-1, 1e-2, 1e-3, 1e-5, 1e-8)): unsucc = np.isnan(data) assert len(dictparam[p]) == 1 data[unsucc] = dictparam[p][0].maxevals - # compute aRT + # compute ERT ert, srate, succ = toolsstats.sp(data, issuccessful=(unsucc == False)) if srate == 1.: break @@ -168,7 +168,7 @@ def plot(dsList, param='dim', targets=(10., 1., 1e-1, 1e-2, 1e-3, 1e-5, 1e-8)): def main(dsList, _targets=(10., 1., 1e-1, 1e-2, 1e-3, 1e-5, 1e-8), param=('dim', 'Dimension'), is_normalized=True, outputdir='.'): - """Generates figure of aRT vs. param. + """Generates figure of ERT vs. param. This script will generate as many figures as there are functions. For a given function and a given parameter value there should be diff --git a/code-postprocessing/cocopp/pplogloss.py b/code-postprocessing/cocopp/pplogloss.py index f48849f11..f4d9fce9c 100644 --- a/code-postprocessing/cocopp/pplogloss.py +++ b/code-postprocessing/cocopp/pplogloss.py @@ -1,11 +1,11 @@ #! /usr/bin/env python # -*- coding: utf-8 -*- -"""Module for computing aRT loss ratio +"""Module for computing ERT loss ratio -This module outputs figures and tables showing aRT loss ratios. -Comparisons are based on computing the ratio between an aRT value and a -reference (best) aRT value (or the inverse) +This module outputs figures and tables showing ERT loss ratios. +Comparisons are based on computing the ratio between an ERT value and a +reference (best) ERT value (or the inverse) """ @@ -28,14 +28,14 @@ from . import testbedsettings """ -aRT loss ratio of an algorithm A for comparison to a reference/best algorithm. +ERT loss ratio of an algorithm A for comparison to a reference/best algorithm. This works only as comparison to a set of algorithms that reach at least the same target values. Let f=f_A(EVALS) be the smallest target value such that the -average running time of algorithm A was smaller than or equal to EVALS. -Let aRT_A=EVALS, if aRT_ref(next difficult f) < EVALS and -aRT_A=aRT_A(f_A(EVALS)) otherwise (we have aRT_A(f_A(EVALS)) <= EVALS). -The aRT loss ratio for algorithm A is defined as: - Loss_A = stat_fcts(exp(CrE_A) * aRT_A / aRT_ref(f)) +expected running time of algorithm A was smaller than or equal to EVALS. +Let ERT_A=EVALS, if ERT_ref(next difficult f) < EVALS and +ERT_A=ERT_A(f_A(EVALS)) otherwise (we have ERT_A(f_A(EVALS)) <= EVALS). +The ERT loss ratio for algorithm A is defined as: + Loss_A = stat_fcts(exp(CrE_A) * ERT_A / ERT_ref(f)) + where f is a function of EVALS and stat_fcts is the desired statistics over the values from all functions (or a subgroup of functions), for @@ -46,15 +46,15 @@ 10% and 90% (the box covers the line) and a single point for min, max. For a function subgroup the Box-Whisker is replaced with the four or five actual points with the function number written. - Caption: aRT loss ratio: average running time, aRT (measured in number - of function evaluations), divided by the best aRT seen in the reference + Caption: ERT loss ratio: expected running time, ERT (measured in number + of function evaluations), divided by the best ERT seen in the reference algorithm for the respectively same function and target function value, plotted versus number of function evaluations for the functions $f_1$--$f_{24}$ in dimension $D=XXX$, corrected by the parameter-crafting-effort $\exp(CrE)==YYY$. Line: geometric mean over all functions. Box-Whisker error bars: 25-75\%-percentile range with median - (box), 10-90\%-percentile range (line), and minimum and maximum aRT loss - ratio (points). Alternative Box-Whisker sentence: Points: aRT loss ratio + (box), 10-90\%-percentile range (line), and minimum and maximum ERT loss + ratio (points). Alternative Box-Whisker sentence: Points: ERT loss ratio for each function. + The problem: how to find out CrE_A? Possible solution: ask for input in the script and put the given number into the caption and put exp(CrE_A) @@ -62,83 +62,83 @@ + This should make a collection of graphs for all functions and all subgroups which gives an additional page in the 'single algorithm' template. Respective tables could be side-by-side the graphs. - + Example for how to read the graph: a loss ratio of 4 for aRT=20D means, - that the function value reached with aRT=20D could be reached with the - respective reference algorithm in aRT_ref=5D function evaluations on average. + + Example for how to read the graph: a loss ratio of 4 for ERT=20D means, + that the function value reached with ERT=20D could be reached with the + respective reference algorithm in ERT_ref=5D function evaluations on average. Therefore, given a budget of 20*D function evaluations, the reference algorithm could have further improved the function value using the remaining 15*D ($75\%=1-1/4$) function evaluations. -Details: if aRT_A = aRT_A(f_A(EVALS)) always, the x-axis of plots between -different algorithms becomes incomparable. Also could aRT_A < aRT_ref, -even though aRT_ref reaches a better f-value for the given EVALS. +Details: if ERT_A = ERT_A(f_A(EVALS)) always, the x-axis of plots between +different algorithms becomes incomparable. Also could ERT_A < ERT_ref, +even though ERT_ref reaches a better f-value for the given EVALS. """ """OLD STUFF: -aRT loss ratio: average running time, aRT (measured in number - of function evaluations), divided by the reference aRT seen in BBOB-best2009 for +ERT loss ratio: expected running time, ERT (measured in number + of function evaluations), divided by the reference ERT seen in BBOB-best2009 for the respectively same function and target function value, plotted versus number of function evaluations for the functions $f_1$--$f_{24}$ in dimension $D=XXX$, corrected by the parameter-crafting-effort $\exp(CrE)==YYY$. Line: geometric mean over all functions. Box-Whisker error bars: 25-75\%-percentile range with median (box), - 10-90\%-percentile range (line), and minimum and maximum aRT loss ratio + 10-90\%-percentile range (line), and minimum and maximum ERT loss ratio (points). Table: -\aRT\ loss ratio (see also Figure~\ref{fig:aRTgraphs}) vs.\ a given budget +\ERT\ loss ratio (see also Figure~\ref{fig:ERTgraphs}) vs.\ a given budget $\FEvals$. Each cross ({\color{blue}$+$}) represents a single function. The target value \ftarget\ used for a given \FEvals\ is the smallest (best) recorded -function value such that $\aRT(\ftarget)\le\FEvals$ for the presented algorithm. -Shown is \FEvals\ divided by the respective best $\aRT(\ftarget)$ from BBOB-2009 +function value such that $\ERT(\ftarget)\le\FEvals$ for the presented algorithm. +Shown is \FEvals\ divided by the respective best $\ERT(\ftarget)$ from BBOB-2009 for functions $f_1$--$f_{24}$ in 5-D and 20-D. Line: geometric mean. Box-Whisker error bar: 25-75\%-ile with median (box), 10-90\%-ile (caps), and minimum and -maximum \aRT\ loss ratio (points). The vertical line gives the maximal number of +maximum \ERT\ loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. -\aRT\ loss ratio. The aRT of the considered algorithm, the budget, is shown in -the first column. For the loss ratio the budget is divided by the aRT for the -respective best result from BBOB-2009 (see also Table~\ref{tab:aRTloss}). +\ERT\ loss ratio. The ERT of the considered algorithm, the budget, is shown in +the first column. For the loss ratio the budget is divided by the ERT for the +respective best result from BBOB-2009 (see also Table~\ref{tab:ERTloss}). The last row $\text{RL}_{\text{US}}/\text{D}$ gives the number of function evaluations in unsuccessful runs divided by dimension. Shown are the smallest, 10\%-ile, 25\%-ile, 50\%-ile, 75\%-ile and 90\%-ile value (smaller values are -better). The aRT Loss ratio equals to one for the respective best algorithm from +better). The ERT Loss ratio equals to one for the respective best algorithm from BBOB-2009. Typical median values are between ten and hundred. -\aRT\ loss ratio. The aRT of the considered algorithm, the budget, is shown in -the first column. For the loss ratio the budget is divided by the aRT for the -respective best result from BBOB-2009 (see also Figure~\ref{fig:aRTlogloss}). +\ERT\ loss ratio. The ERT of the considered algorithm, the budget, is shown in +the first column. For the loss ratio the budget is divided by the ERT for the +respective best result from BBOB-2009 (see also Figure~\ref{fig:ERTlogloss}). The last row $\text{RL}_{\text{US}}/\text{D}$ gives the number of function evaluations in unsuccessful runs divided by dimension. Shown are the smallest, 10\%-ile, 25\%-ile, 50\%-ile, 75\%-ile and 90\%-ile value (smaller values are -better). The aRT Loss ratio equals to one for the respective best algorithm +better). The ERT Loss ratio equals to one for the respective best algorithm from BBOB-2009. Typical median values are between ten and hundred. -such that $\aRT(\ftarget)\le\FEvals$ for the - Shown is \FEvals\ divided by the respective best $\aRT(\ftarget)$ from BBOB-2009 +such that $\ERT(\ftarget)\le\FEvals$ for the + Shown is \FEvals\ divided by the respective best $\ERT(\ftarget)$ from BBOB-2009 % for functions $f_1$--$f_{24}$ in 5-D and 20-D. % - % Each \aRT\ is multiplied by $\exp(\CrE)$ correcting for the parameter crafting effort. + % Each \ERT\ is multiplied by $\exp(\CrE)$ correcting for the parameter crafting effort. """ def table_caption(): table_caption = r"""% - \aRT\ loss ratio versus the budget in number of $f$-evaluations + \ERT\ loss ratio versus the budget in number of $f$-evaluations divided by dimension. For each given budget \FEvals, the target value \ftarget\ is computed as the best target $!!F!!$-value reached within the budget by the given algorithm. - Shown is then the \aRT\ to reach \ftarget\ for the given algorithm + Shown is then the \ERT\ to reach \ftarget\ for the given algorithm or the budget, if !!THE-REF-ALG!! reached a better target within the budget, - divided by the \aRT\ of !!THE-REF-ALG!! to reach \ftarget. + divided by the \ERT\ of !!THE-REF-ALG!! to reach \ftarget. Line: geometric mean. Box-Whisker error bar: 25-75\%-ile with median - (box), 10-90\%-ile (caps), and minimum and maximum \aRT\ loss ratio + (box), 10-90\%-ile (caps), and minimum and maximum \ERT\ loss ratio (points). The vertical line gives the maximal number of function evaluations in a single trial in this function subset. See also - Figure~\ref{fig:aRTlogloss} for results on each function subgroup.\cocoversion + Figure~\ref{fig:ERTlogloss} for results on each function subgroup.\cocoversion """ table_caption = captions.replace(table_caption) @@ -148,7 +148,7 @@ def table_caption(): def figure_caption(): caption = r"""% - \aRT\ loss ratios (see Figure~\ref{tab:aRTloss} for details). + \ERT\ loss ratios (see Figure~\ref{tab:ERTloss} for details). Each cross ({\color{blue}$+$}) represents a single function, the line is the geometric mean. @@ -180,7 +180,7 @@ def detf(entry, evals): """Determines a function value given a number of evaluations. Let A be the algorithm considered. Let f=f_A(evals) be the smallest - target value such that the average running time of algorithm A was + target value such that the expected running time of algorithm A was smaller than or equal to evals. :keyword DataSet entry: data set @@ -216,11 +216,11 @@ def generateData(dsList, evals, CrE_A): refalgentry = refalgentries[(D, fun)] - #aRT_A + #ERT_A f_A = detf(entry, evals) - aRT_ref = detERT(refalgentry, f_A) - aRT_A = detERT(entry, f_A) + ERT_ref = detERT(refalgentry, f_A) + ERT_A = detERT(entry, f_A) nextreff = [] for i in f_A: if i == 0.: @@ -232,21 +232,21 @@ def generateData(dsList, evals, CrE_A): except IndexError: nextreff.append(i * 10.**(-0.2)) # TODO: this is a hack - aRT_ref_nextreff = detERT(refalgentry, nextreff) + ERT_ref_nextreff = detERT(refalgentry, nextreff) - for i in range(len(aRT_A)): + for i in range(len(ERT_A)): # nextreff[i] >= f_thresh: this is tested because if it is not true - # aRT_ref_nextreff[i] is supposed to be infinite. - if nextreff[i] >= f_thresh and aRT_ref_nextreff[i] < evals[i]: # is different from the specification... - aRT_A[i] = evals[i] + # ERT_ref_nextreff[i] is supposed to be infinite. + if nextreff[i] >= f_thresh and ERT_ref_nextreff[i] < evals[i]: # is different from the specification... + ERT_A[i] = evals[i] # For test purpose: #if fun % 10 == 0: - # aRT_A[-2] = 1. - # aRT_ref[-2] = np.inf - aRT_A = np.array(aRT_A) - aRT_ref = np.array(aRT_ref) - loss_A = np.exp(CrE_A) * aRT_A / aRT_ref + # ERT_A[-2] = 1. + # ERT_ref[-2] = np.inf + ERT_A = np.array(ERT_A) + ERT_ref = np.array(ERT_ref) + loss_A = np.exp(CrE_A) * ERT_A / ERT_ref assert (np.isnan(loss_A) == False).all() #set_trace() #if np.isnan(loss_A).any() or np.isinf(loss_A).any() or (loss_A == 0.).any(): @@ -421,7 +421,7 @@ def boxplot(x, notch=0, sym='b+', positions=None, widths=None): medians=medians, fliers=fliers) def plot(xdata, ydata): - """Plot the aRT log loss figures. + """Plot the ERT log loss figures. Two cases: box-whisker plot is used for representing the data of all functions, otherwise all data is represented using crosses. @@ -517,12 +517,12 @@ def beautify(): plt.yticks(ydata, yticklabels) plt.xlabel('log10 of FEvals / dimension') - plt.ylabel('log10 of aRT loss ratio') + plt.ylabel('log10 of ERT loss ratio') #a.yaxis.grid(True, which='minor') a.yaxis.grid(True, which='major') def generateTable(dsList, CrE=0., outputdir='.', info='default'): - """Generates aRT loss ratio tables. + """Generates ERT loss ratio tables. :param DataSetList dsList: input data set :param float CrE: crafting effort (see COCO documentation) @@ -562,7 +562,7 @@ def generateTable(dsList, CrE=0., outputdir='.', info='default'): def generateSingleTableTex(dsList, funcs, mFE, d, prcOfInterest, EVALS, data, outputdir='.', info='default'): - """Generates single aRT loss ratio table. + """Generates single ERT loss ratio table. :param DataSetList dsList: input data set :param funcs: @@ -655,11 +655,11 @@ def generateSingleTableTex(dsList, funcs, mFE, d, prcOfInterest, EVALS, data, f.write(res) f.close() if genericsettings.verbose: - print("Wrote aRT loss ratio table in %s." % filename) + print("Wrote ERT loss ratio table in %s." % filename) def generateSingleTableHtml(dsList, funcs, mFE, d, prcOfInterest, EVALS, data, outputdir='.', info='default'): - """Generates single aRT loss ratio table. + """Generates single ERT loss ratio table. :param DataSetList dsList: input data set :param funcs: @@ -780,11 +780,11 @@ def generateSingleTableHtml(dsList, funcs, mFE, d, prcOfInterest, EVALS, data, '
Data produced with COCO %s' % (toolsdivers.get_version_label(None))) if genericsettings.verbose: - print("Wrote aRT loss ratio table in %s." % filename) + print("Wrote ERT loss ratio table in %s." % filename) def generateFigure(dsList, CrE=0., isStoringXRange=True, outputdir='.', info='default'): - """Generates aRT loss ratio figures. + """Generates ERT loss ratio figures. :param DataSetList dsList: input data set :param float CrE: crafting effort (see COCO documentation) @@ -866,7 +866,7 @@ def generateFigure(dsList, CrE=0., isStoringXRange=True, outputdir='.', #plt.rcdefaults() def main(dsList, CrE=0., isStoringXRange=True, outputdir='.', info='default'): - """Generates aRT loss ratio boxplot figures. + """Generates ERT loss ratio boxplot figures. Calls method generateFigure. diff --git a/code-postprocessing/cocopp/pprldistr.py b/code-postprocessing/cocopp/pprldistr.py index 06dbc1a68..43013e656 100644 --- a/code-postprocessing/cocopp/pprldistr.py +++ b/code-postprocessing/cocopp/pprldistr.py @@ -36,7 +36,7 @@ cocopp.pprldistr.plot(ds) cocopp.pprldistr.beautify() # resize the window to view whole figure -CAVEAT: the naming conventions in this module mix up ART (an estimate +CAVEAT: the naming conventions in this module mix up ERT (an estimate of the expected running length) and run lengths. """ @@ -537,7 +537,7 @@ def plotFVDistr(dsList, budget, min_f=None, **plotArgs): vals[vals <= 0] = min(np.append(vals[vals > 0], [min_f])) # works also when vals[vals > 0] is empty if genericsettings.runlength_based_targets: NotImplementedError('related function vals with respective budget ' - + '(e.g. ART(val)) see pplogloss.generateData()') + + '(e.g. ERT(val)) see pplogloss.generateData()') x.extend(vals) nn += ds.nbRuns() diff --git a/code-postprocessing/cocopp/pproc.py b/code-postprocessing/cocopp/pproc.py index 4f0048744..63486bf19 100644 --- a/code-postprocessing/cocopp/pproc.py +++ b/code-postprocessing/cocopp/pproc.py @@ -217,7 +217,7 @@ class RunlengthBasedTargetValues(TargetValues): >>> assert t[-1] == 1.00000000e-08 returns a list of target f-values for F1 in 20-D, based on the - aRT values ``[0.5,...,50]``. + ERT values ``[0.5,...,50]``. Details: The computation starts from the smallest budget and the resulting f-target must always be at least a factor of ``force_different_targets_factor`` smaller @@ -332,7 +332,7 @@ def __len__(self): def __call__(self, fun_dim=None, discretize=None): """Get all target values for the respective function and dimension - and reference aRT values (passed during initialization). `fun_dim` + and reference ERT values (passed during initialization). `fun_dim` is a tuple ``(fun_nb, dimension)`` like ``(1, 20)`` for the 20-D sphere. @@ -343,7 +343,7 @@ def __call__(self, fun_dim=None, discretize=None): where f are the values of the ``DataSet`` ``target`` attribute. The next difficult target is chosen not smaller as target / 10**0.2. - Returned are the aRT for targets that, within the given budget, the + Returned are the ERT for targets that, within the given budget, the reference algorithm just failed to achieve. """ @@ -397,7 +397,7 @@ def __call__(self, fun_dim=None, discretize=None): targets = [] if genericsettings.test: for rl in self.run_lengths: - # choose largest target not achieved by reference aRT + # choose largest target not achieved by reference ERT indices = np.nonzero(ds.ert[:end] > np.max((1, rl * (fun_dim[1] if self.times_dimension else 1))))[0] if len(indices): # larger ert exists targets.append(np.max((ds.target[indices[0]], # first missed target @@ -429,7 +429,7 @@ def __call__(self, fun_dim=None, discretize=None): old_targets = targets targets = [] for rl in self.run_lengths: - # choose best target achieved by reference aRT times step_to_next_difficult_target + # choose best target achieved by reference ERT times step_to_next_difficult_target indices = np.nonzero(ds.ert[:end] <= np.max((1, rl * (fun_dim[1] if self.times_dimension else 1))))[0] if not len(indices): warnings.warn(' too easy run length ' + str(rl) + @@ -667,7 +667,7 @@ class DataSet(object): Number of trials: 15 Final target Df: 1e-08 min / max number of evals per trial: 5676 / 6346 - evals/DIM: best 15% 50% 85% max | aRT/DIM nsucc + evals/DIM: best 15% 50% 85% max | ERT/DIM nsucc ---Df---|-----------------------------------------|---------------- 1.0e+03 | 102 126 170 205 235 | 164.2 15 1.0e+01 | 278 306 364 457 480 | 374.5 15 @@ -678,7 +678,7 @@ class DataSet(object): >>> import numpy as np >>> idx = list(range(0, 50, 10)) + [-1] - >>> # get aRT average runtime for some targets + >>> # get ERT (expected running time) for some targets >>> t = np.array([idx, ds.target[idx], ds.ert[idx]]).T # doctest:+ELLIPSIS >>> assert t[0][0] == 0 >>> assert t[0][2] == 1 @@ -997,7 +997,7 @@ def __init__(self, header, comment, data, indexfile): '*.info file and in the data files.') self._cut_data() - # Compute aRT + # Compute ERT self.computeERTfromEvals() @property @@ -1249,7 +1249,7 @@ def info(self, targets=None): sinfo += '\nFinal target Df: ' + str(self.precision) # sinfo += '\nmin / max number of evals: ' + str(int(min(self.evals[0]))) + ' / ' + str(int(max(self.maxevals))) sinfo += '\nmin / max number of evals per trial: ' + str(int(min(self.maxevals))) + ' / ' + str(int(max(self.maxevals))) - sinfo += '\n evals/DIM: best 15% 50% 85% max | aRT/DIM nsucc' + sinfo += '\n evals/DIM: best 15% 50% 85% max | ERT/DIM nsucc' sinfo += '\n ---Df---|-----------------------------------------|----------------' evals = self.detEvals(targets, copy=False) nsucc = self.detSuccesses(targets) @@ -1464,13 +1464,13 @@ def generateRLData(self, targets): # alternative output sorted by targets def detAverageEvals(self, targets): - """Determine the average number of f-evals for each target + """Determine the average number of f-evals for each target in ``targets`` list. If a target is not reached within trial itrail, self.maxevals[itrial] contributes to the average. - Equals to sum(evals(target)) / nbruns. If aRT is finite - this equals to aRT * psucc == (sum(evals) / ntrials / psucc) * psucc, - where aRT, psucc, and evals are a function of target. + Equals to sum(evals(target)) / nbruns. If ERT is finite + this equals to ERT * psucc == (sum(evals) / ntrials / psucc) * psucc, + where ERT, psucc, and evals are a function of target. """ assert not any(np.isnan(self.evals[:][0])) # target value cannot be nan @@ -1511,12 +1511,12 @@ def detSuccessRates(self, targets): return np.array(self.detSuccesses(targets)) / float(self.nbRuns()) def detERT(self, targets): - """Determine the average running time to reach target values. + """Determine the expected running time (ERT) to reach target values. The value is numpy.inf, if the target was never reached. :keyword list targets: target function values of interest - :returns: list of average running times (# f-evals) for the + :returns: list of expected running times (# f-evals) for the respective targets. """ @@ -1541,7 +1541,7 @@ def detERT(self, targets): break res[t] = prevline.copy() # is copy necessary? Yes. - # Return a list of aRT corresponding to the input targets in + # Return a list of ERT corresponding to the input targets in # targets, sorted along targets return list(res[i][1] for i in targets) @@ -2421,7 +2421,7 @@ def run_length_distributions(self, dimension, target_values, def get_all_data_lines(self, target_value, fct, dim): """return a list of all data lines in ``self`` for each algorithm and a list of the respective - computed aRTs. + computed ERTs. Example ------- diff --git a/code-postprocessing/cocopp/pptable.py b/code-postprocessing/cocopp/pptable.py index 7a57578ea..cbfea1d7c 100644 --- a/code-postprocessing/cocopp/pptable.py +++ b/code-postprocessing/cocopp/pptable.py @@ -3,9 +3,9 @@ """Module for generating tables used by rungeneric1.py. -The generated tables give the aRT and in brackets the 10th to 90th +The generated tables give the ERT and in brackets the 10th to 90th percentile range divided by two of 100 simulated runs divided by the -aRT of a reference algorithm (given in the respective first row and as +ERT of a reference algorithm (given in the respective first row and as indicated in testbedsettings.py) for different target precisions for different functions. If the reference algorithm did not reach the target precision, the absolute values are @@ -39,7 +39,7 @@ # tspec = {'col%d' % i: {'what': 'fname', 'header': r'$\Delta f$', 'format': None}} # for t in targets: # i = i + 1 -# tspec.update({'col%d' % i: {'what': 'aRT ratio for df=%e' % t, +# tspec.update({'col%d' % i: {'what': 'ERT ratio for df=%e' % t, # 'header': r'\multicolumn{2}{@{}c@{}}{1e%+d}' % (int(np.log10(t)), # 'format': writeFEval}}) # i = i + 1 @@ -55,11 +55,11 @@ def get_table_caption(): """ table_caption_start = r"""% - Average running time (\aRT\ in number of function - evaluations) divided by the \aRT\ of !!THE-REF-ALG!! in #1. This \aRT\ + Expected running time (\ERT\ in number of function + evaluations) divided by the \ERT\ of !!THE-REF-ALG!! in #1. This \ERT\ ratio and, in braces as dispersion measure, the half difference between 90 and 10\%-tile of bootstrapped run lengths appear in the second row of each cell, - the best \aRT\ + the best \ERT\ """ table_caption_rlbased = (r"""% in the first. The different target !!DF!!-values are shown in the top row. @@ -80,8 +80,8 @@ def get_table_caption(): functions (!!TOTAL-NUM-OF-FUNCTIONS!!).\cocoversion """ table_caption_no_reference_algorithm = r"""% - Average runtime (\aRT) to reach given targets, measured - in number of function evaluations in #1. For each function, the \aRT\ + Expected runtime (\ERT) to reach given targets, measured + in number of function evaluations in #1. For each function, the \ERT\ and, in braces as dispersion measure, the half difference between 10 and 90\%-tile of (bootstrapped) runtimes is shown for the different target !!DF!!-values as shown in the top row. @@ -109,7 +109,7 @@ def get_table_caption(): def main(dsList, dims_of_interest, outputdir, latex_commands_file): - """Generate a table of ratio aRT/aRTref vs target precision. + """Generate a table of ratio ERT/ERTref vs target precision. 1 table per dimension will be generated. @@ -261,7 +261,7 @@ def main(dsList, dims_of_interest, outputdir, latex_commands_file): evals = np.array(sorted(evals))[0:min(len(evals), len(refevals))] refevals = np.array(sorted(refevals))[0:min(len(evals), len(refevals))] - #The conditions for significance are now that aRT < aRT_ref and + #The conditions for significance are now that ERT < ERT_ref and # all(sorted(FEvals_ref) > sorted(FEvals_current)). if ((nbtests * p) < 0.05 and ert - refalgdata[i] < 0. and z < 0. and (np.isinf(refalgdata[i]) or all(evals < refevals))): diff --git a/code-postprocessing/cocopp/pptex.py b/code-postprocessing/cocopp/pptex.py index ffb4b11ee..9658f1c48 100644 --- a/code-postprocessing/cocopp/pptex.py +++ b/code-postprocessing/cocopp/pptex.py @@ -257,7 +257,7 @@ def writeFEvalsMaxSymbols(fevals, maxsymbols, isscientific=False): 2) float notation :returns: string representation of a number of function evaluations - or aRT. + or ERT. """ @@ -326,7 +326,7 @@ def writeFEvalsMaxPrec(entry, SIG, maxfloatrepr=1e5): exponent) :returns: string representation of a number of function evaluations - or aRT. + or ERT. """ #CAVE: what if entry is smaller than 10**(-SIG)? diff --git a/code-postprocessing/cocopp/preparetexforhtml.py b/code-postprocessing/cocopp/preparetexforhtml.py index 583ee7bdd..e60d64e05 100644 --- a/code-postprocessing/cocopp/preparetexforhtml.py +++ b/code-postprocessing/cocopp/preparetexforhtml.py @@ -29,7 +29,7 @@ % pre-defined commands \\newcommand{\\DIM}{\ensuremath{\mathrm{DIM}}} -\\newcommand{\\aRT}{\ensuremath{\mathrm{aRT}}} +\\newcommand{\\ERT}{\ensuremath{\mathrm{ERT}}} \\newcommand{\\FEvals}{\ensuremath{\mathrm{FEvals}}} \\newcommand{\\nruns}{\ensuremath{\mathrm{Nruns}}} \\newcommand{\\Dfb}{\ensuremath{\Delta f_{\mathrm{best}}}} @@ -122,10 +122,10 @@ def main(latex_commands_for_html): # 7. pplogloss f.writelines(prepare_providecommand('bbobloglosstablecaption', scenario, - pplogloss.table_caption().replace('Figure~\\ref{fig:aRTlogloss}', + pplogloss.table_caption().replace('Figure~\\ref{fig:ERTlogloss}', 'the following figure'))) f.writelines(prepare_providecommand('bbobloglossfigurecaption', scenario, - pplogloss.figure_caption().replace('Figure~\\ref{tab:aRTloss}', + pplogloss.figure_caption().replace('Figure~\\ref{tab:ERTloss}', 'the previous figure'))) # prepare tags for later HTML preparation diff --git a/code-postprocessing/cocopp/rungeneric1.py b/code-postprocessing/cocopp/rungeneric1.py index 07154209e..a13164baa 100644 --- a/code-postprocessing/cocopp/rungeneric1.py +++ b/code-postprocessing/cocopp/rungeneric1.py @@ -85,8 +85,8 @@ def main(argv=None): The default setting is "color". --tab-only, --fig-only, --rld-only, --los-only these options can be used to output respectively the TeX - tables, convergence and aRTs graphs figures, run length - distribution figures, aRT loss ratio figures only. A + tables, convergence and ERTs graphs figures, run length + distribution figures, ERT loss ratio figures only. A combination of any two of these options results in no output. --conv @@ -349,7 +349,7 @@ def main(argv=None): values_of_interest = testbedsettings.current_testbed.ppfigdim_target_values if prepare_figures: print("Scaling figures...") - # aRT/dim vs dim. + # ERT/dim vs dim. #plt.rc("axes", **inset.rcaxeslarger) #plt.rc("xtick", **inset.rcticklarger) #plt.rc("ytick", **inset.rcticklarger) @@ -440,7 +440,7 @@ def main(argv=None): print_done() if prepare_log_loss: - print("aRT loss ratio figures and tables...") + print("ERT loss ratio figures and tables...") for ng, sliceNoise in dsList.dictByNoise().items(): if ng == 'noiselessall': testbed = 'noiseless' diff --git a/code-postprocessing/cocopp/rungenericmany.py b/code-postprocessing/cocopp/rungenericmany.py index 26fbef492..12ada97fc 100644 --- a/code-postprocessing/cocopp/rungenericmany.py +++ b/code-postprocessing/cocopp/rungenericmany.py @@ -112,7 +112,7 @@ def main(argv=None): --tab-only, --rld-only, --fig-only these options can be used to output respectively the comparison TeX tables, the run lengths distributions or the - figures of aRT/dim vs dim only. A combination of any two or + figures of ERT/dim vs dim only. A combination of any two or more of these options results in no output. --no-rld-single-fcts do not generate runlength distribution figures for each @@ -408,7 +408,7 @@ def main(argv=None): parentFileName=genericsettings.many_algorithm_file_name ) - # ECDFs of aRT ratios + # ECDFs of ERT ratios dic_dim0 = ds_list0.dictByDim() dic_dim1 = ds_list1.dictByDim() for dim in set(dic_dim0.keys()) & set(dic_dim1.keys()): diff --git a/code-postprocessing/cocopp/toolsstats.py b/code-postprocessing/cocopp/toolsstats.py index 1cf8567f1..ae5ee0f37 100644 --- a/code-postprocessing/cocopp/toolsstats.py +++ b/code-postprocessing/cocopp/toolsstats.py @@ -118,8 +118,8 @@ def sp1(data, maxvalue=np.Inf, issuccessful=None): def sp(data, maxvalue=np.Inf, issuccessful=None, allowinf=True): """sp(data, issuccessful=None) computes the sum of the function evaluations over all runs divided by the number of success, - the so-called success performance which estimates the average - runtime aRT. + the so-called success performance which estimates the expected + runtime ERT. Input: data -- array contains, e.g., number of function @@ -185,8 +185,8 @@ def drawSP_from_dataset(data_set, ftarget, percentiles, samplesize=genericsettin idx_nan = np.isnan(evals) # nan == did not reach ftarget return drawSP(evals[~idx_nan], data_set.maxevals[idx_nan], percentiles, samplesize) - The expected value of ``all_sampled_values_sorted`` is the average - runtime aRT, as obtained by ``data_set.detERT([ftarget])[0]``. + The expected value of ``all_sampled_values_sorted`` is the expected + runtime ERT, as obtained by ``data_set.detERT([ftarget])[0]``. """ try: From 60c443ff2b3cfdd2e352a46cfa94f895bb783bec Mon Sep 17 00:00:00 2001 From: brockhof Date: Tue, 11 Feb 2020 10:48:35 +0100 Subject: [PATCH 2/4] removed bug and turned off the changes in the xticks of pprldistr2.py (potentially still wrong, but should at least let the nightly tests pass again) --- .../cocopp/comp2/pprldistr2.py | 38 +++++-------------- 1 file changed, 10 insertions(+), 28 deletions(-) diff --git a/code-postprocessing/cocopp/comp2/pprldistr2.py b/code-postprocessing/cocopp/comp2/pprldistr2.py index 176fa2775..1c537bad9 100644 --- a/code-postprocessing/cocopp/comp2/pprldistr2.py +++ b/code-postprocessing/cocopp/comp2/pprldistr2.py @@ -39,12 +39,12 @@ def beautify(handles): axisHandle = plt.gca() axisHandle.set_xscale('log') plt.axvline(1, ls='-', color='k'); # symmetry line for ERT1/ERT0 = 1 - xlim = min(max(numpy.abs(numpy.log10(plt.xlim()))), - numpy.ceil(numpy.log10(sys.float_info.max))-1) # correction of - # numerical issues - # with bbob-biobj - # test - xlim = (min(0.1, 10.**(-xlim)), max(10., 10.**(xlim))) + + xlim = numpy.minimum(numpy.maximum(plt.xlim(), 1e-9), 1e9) + xlim = numpy.minimum(numpy.maximum(xlim, 10), 0.1) + xlim = max(numpy.abs(numpy.log10(xlim))) + xlim = (10 ** (-xlim), 10 ** xlim) + plt.axhline(0.5, ls=':', color='k', lw=2) # symmetry line at y=0.5 plt.xlim(xlim) plt.yticks(numpy.array((0., 0.25, 0.5, 0.75, 1.0)), @@ -53,18 +53,7 @@ def beautify(handles): axisHandle.set_xlabel('log10 of FEvals ratio') axisHandle.set_ylabel('proportion of trials') axisHandle.grid(True) - xticks = axisHandle.get_xticks() - newxticks = [] - for i in xticks: - if i > 0 and i < numpy.inf: - newxticks.append('%d' % round(numpy.log10(i))) - axisHandle.set_xticklabels(newxticks) - - print('......................') - print('old xticks: ') - print(xticks) - print('new xticks: ') - print(newxticks) + # Prolong to the boundary... xmin, xmax = plt.xlim() @@ -89,19 +78,12 @@ def beautify(handles): # Inverted xticks x = axisHandle.get_xticks() - print('xticks old:') - print(x) - # Operation for reverting the ticks for x < 1 - x[x<1] = sorted(1/(x[x<1]*numpy.power(10, -2*numpy.floor(numpy.log10(x[x<1]))-1))) - x = x[(xxmin)] # why? - axisHandle.set_xticks(x) + #x[x<1] = sorted(1/(x[x<1]*numpy.power(10, -2*numpy.floor(numpy.log10(x[x<1]))-1))) + #x = x[(xxmin)] # why? + #axisHandle.set_xticks(x) - print('xticks new:') - print(x) - if not len(x) == len(newxticks): - 1/0 def computeERT(fevals, maxevals): From 09fb93ba765e6dcbe008954a1457d8e2cc2fec71 Mon Sep 17 00:00:00 2001 From: brockhof Date: Tue, 25 Feb 2020 14:15:25 +0100 Subject: [PATCH 3/4] changed maintainer to @nikohansen and myself in setup.py.in (was Dejan before) --- code-postprocessing/setup.py.in | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code-postprocessing/setup.py.in b/code-postprocessing/setup.py.in index 05a434e23..7803d45d4 100644 --- a/code-postprocessing/setup.py.in +++ b/code-postprocessing/setup.py.in @@ -32,8 +32,8 @@ setup( ]}, url = 'https://github.com/numbbo/coco', license = 'BSD', - maintainer = 'Dejan Tusar', - maintainer_email = 'dejan.tusar@inria.fr', + maintainer = 'Dimo Brockhoff and Nikolaus Hansen', + maintainer_email = 'dimo.brockhoff@inria.fr', # author = ['Nikolaus Hansen', 'Raymond Ros', 'Dejan Tusar'], description = 'Benchmarking framework for all types of black-box optimization algorithms, postprocessing. ', long_description = '...', From e6736962640db9200c2de21f5ee39a900d259e46 Mon Sep 17 00:00:00 2001 From: brockhof Date: Tue, 24 Mar 2020 16:37:19 +0100 Subject: [PATCH 4/4] corrected issues #1939 and #1550 (code written and proof-read jointly by @nikohansen, @ttusar, @kostasvar) --- .../cocopp/comp2/pprldistr2.py | 20 ++++++++----------- 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/code-postprocessing/cocopp/comp2/pprldistr2.py b/code-postprocessing/cocopp/comp2/pprldistr2.py index 1c537bad9..db67c9d5a 100644 --- a/code-postprocessing/cocopp/comp2/pprldistr2.py +++ b/code-postprocessing/cocopp/comp2/pprldistr2.py @@ -40,8 +40,9 @@ def beautify(handles): axisHandle.set_xscale('log') plt.axvline(1, ls='-', color='k'); # symmetry line for ERT1/ERT0 = 1 - xlim = numpy.minimum(numpy.maximum(plt.xlim(), 1e-9), 1e9) - xlim = numpy.minimum(numpy.maximum(xlim, 10), 0.1) + xlim = plt.xlim() + xlim = numpy.maximum(xlim[0], 1e-9), numpy.minimum(xlim[1], 1e9) + xlim = numpy.minimum(xlim[0], 1. / 10.01), numpy.maximum(xlim[1], 10.01) xlim = max(numpy.abs(numpy.log10(xlim))) xlim = (10 ** (-xlim), 10 ** xlim) @@ -66,7 +67,7 @@ def beautify(handles): if len(xdata) == 0 or len(ydata) == 0: continue if not hasattr(xdata, 'dtype') or xdata.dtype != float: - xdata = numpy.array(xdata, dtype=float) + xdata = numpy.array(xdata, dtype=float) xdata = numpy.insert(xdata, 0, xmin) xdata = numpy.insert(xdata, len(xdata), xmax) ydata = numpy.insert(ydata, 0, ydata[0]) @@ -75,15 +76,10 @@ def beautify(handles): toolsdivers.legend(loc='best') - # Inverted xticks - x = axisHandle.get_xticks() - - # Operation for reverting the ticks for x < 1 - #x[x<1] = sorted(1/(x[x<1]*numpy.power(10, -2*numpy.floor(numpy.log10(x[x<1]))-1))) - #x = x[(xxmin)] # why? - #axisHandle.set_xticks(x) - - + x = numpy.asarray(axisHandle.get_xticks()) + axisHandle.set_xticklabels([str(int(numpy.log10(xx))) for xx in x]) + axisHandle.set_xticks(x) + plt.xlim(xlim) def computeERT(fevals, maxevals):