From eca4b09e1a4913a7bd874a96c5cb7b5eaa6f51f3 Mon Sep 17 00:00:00 2001
From: Jonas Rauber <git@jonasrauber.de>
Date: Thu, 27 Sep 2018 09:20:58 +0000
Subject: [PATCH 1/6] support for early stopping when reaching a certain
 perturbation size

---
 foolbox/adversarial.py  | 29 +++++++++++++++++++++++++++++
 foolbox/attacks/base.py | 24 +++++++++++++++++++++---
 2 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/foolbox/adversarial.py b/foolbox/adversarial.py
index aba527fa..b350fb28 100644
--- a/foolbox/adversarial.py
+++ b/foolbox/adversarial.py
@@ -6,9 +6,16 @@
 import numpy as np
 import numbers
 
+from .distances import Distance
 from .distances import MSE
 
 
+class StopAttack(Exception):
+    """Exception thrown to request early stopping of an attack
+    if a given (optional!) threshold is reached."""
+    pass
+
+
 class Adversarial(object):
     """Defines an adversarial that should be found and stores the result.
 
@@ -29,6 +36,13 @@ class Adversarial(object):
         The ground-truth label of the original image.
     distance : a :class:`Distance` class
         The measure used to quantify similarity between images.
+    threshold : float or :class:`Distance`
+        If not None, the attack will stop as soon as the adversarial
+        perturbation has a size smaller than this threshold. Can be
+        an instance of the :class:`Distance` class passed to the distance
+        argument, or a float assumed to have the same unit as the
+        the given distance. If None, the attack will simply minimize
+        the distance as good as possible.
 
     """
     def __init__(
@@ -38,6 +52,7 @@ def __init__(
             original_image,
             original_class,
             distance=MSE,
+            threshold=None,
             verbose=False):
 
         self.__model = model
@@ -46,6 +61,11 @@ def __init__(
         self.__original_image_for_distance = original_image
         self.__original_class = original_class
         self.__distance = distance
+
+        if threshold is not None and not isinstance(threshold, Distance):
+            threshold = distance(value=threshold)
+        self.__threshold = threshold
+
         self.verbose = verbose
 
         self.__best_adversarial = None
@@ -152,6 +172,12 @@ def normalized_distance(self, image):
             image,
             bounds=self.bounds())
 
+    def reached_threshold(self):
+        """Returns True if a threshold is given and the currently
+        best adversarial distance is smaller than the threshold."""
+        return self.__threshold is not None \
+            and self.__best_distance <= self.__threshold
+
     def __new_adversarial(self, image, predictions, in_bounds):
         image = image.copy()  # to prevent accidental inplace changes
         distance = self.normalized_distance(image)
@@ -167,6 +193,9 @@ def __new_adversarial(self, image, predictions, in_bounds):
             self._best_prediction_calls = self._total_prediction_calls
             self._best_gradient_calls = self._total_gradient_calls
 
+            if self.reached_threshold():
+                raise StopAttack
+
             return True, distance
         return False, distance
 
diff --git a/foolbox/attacks/base.py b/foolbox/attacks/base.py
index 0d7f146b..e3e4bb80 100644
--- a/foolbox/attacks/base.py
+++ b/foolbox/attacks/base.py
@@ -1,4 +1,5 @@
 import warnings
+import logging
 import functools
 import sys
 import abc
@@ -10,6 +11,7 @@
     ABC = abc.ABCMeta('ABC', (), {})
 
 from ..adversarial import Adversarial
+from ..adversarial import StopAttack
 from ..criteria import Misclassification
 
 
@@ -85,7 +87,14 @@ def wrapper(self, input_or_adv, label=None, unpack=True, **kwargs):
                                      ' with a model and a criterion or it'
                                      ' needs to be called with an Adversarial'
                                      ' instance.')
-                a = Adversarial(model, criterion, input_or_adv, label)
+                try:
+                    a = Adversarial(model, criterion, input_or_adv, label)
+                except StopAttack:
+                    # during initialization, the original input is checked;
+                    # if a threshold is specified and the original input is
+                    # misclassified, this can already cause a StopAttack
+                    # exception
+                    assert a.distance.value == 0.
 
         assert a is not None
 
@@ -93,9 +102,18 @@ def wrapper(self, input_or_adv, label=None, unpack=True, **kwargs):
             warnings.warn('Not running the attack because the original input'
                           ' is already misclassified and the adversarial thus'
                           ' has a distance of 0.')
+        elif a.reached_threshold():
+            warnings.warn('Not running the attack because the given treshold'
+                          ' is already reached')
         else:
-            _ = call_fn(self, a, label=None, unpack=None, **kwargs)
-            assert _ is None, 'decorated __call__ method must return None'
+            try:
+                _ = call_fn(self, a, label=None, unpack=None, **kwargs)
+                assert _ is None, 'decorated __call__ method must return None'
+            except StopAttack:
+                # if a threshold is specified, StopAttack will be thrown
+                # when the treshold is reached; thus we can do early
+                # stopping of the attack
+                logging.info('threshold reached, stopping attack')
 
         if a.image is None:
             warnings.warn('{} did not find an adversarial, maybe the model'

From be410bf7cd988f3e5c2fc72ad337dc048bd5d7b5 Mon Sep 17 00:00:00 2001
From: Jonas Rauber <git@jonasrauber.de>
Date: Thu, 27 Sep 2018 11:20:54 +0000
Subject: [PATCH 2/6] added tests and more documentation

---
 foolbox/adversarial.py        |  6 ++++-
 foolbox/tests/test_attacks.py | 51 +++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/foolbox/adversarial.py b/foolbox/adversarial.py
index b350fb28..29958506 100644
--- a/foolbox/adversarial.py
+++ b/foolbox/adversarial.py
@@ -42,7 +42,11 @@ class Adversarial(object):
         an instance of the :class:`Distance` class passed to the distance
         argument, or a float assumed to have the same unit as the
         the given distance. If None, the attack will simply minimize
-        the distance as good as possible.
+        the distance as good as possible. Note that the threshold only
+        influences early stopping of the attack; the returned adversarial
+        does not necessarily have smaller perturbation size than this
+        threshold; the `reached_threshold()` method can be used to check
+        if the threshold has been reached.
 
     """
     def __init__(
diff --git a/foolbox/tests/test_attacks.py b/foolbox/tests/test_attacks.py
index cc427fea..aa0158a3 100644
--- a/foolbox/tests/test_attacks.py
+++ b/foolbox/tests/test_attacks.py
@@ -57,3 +57,54 @@ def test_base_attack(model, criterion, image, label):
     attack = attacks.FGSM()
     with pytest.raises(ValueError):
         attack(image, label=wrong_label)
+
+
+def test_early_stopping(model, criterion, image, label):
+    attack = attacks.FGSM()
+
+    adv = Adversarial(model, criterion, image, label)
+    attack(adv)
+    assert adv.distance.value == 0
+    assert not adv.reached_threshold()  # because no threshold specified
+
+    wrong_label = label + 1
+
+    adv = Adversarial(model, criterion, image, wrong_label)
+    attack(adv)
+    assert adv.distance.value > 0
+    assert not adv.reached_threshold()  # because no threshold specified
+
+    c = adv._total_prediction_calls
+    d = adv.distance.value
+    large_d = 10 * d
+    small_d = d / 2
+
+    adv = Adversarial(model, criterion, image, wrong_label,
+                      threshold=adv._distance(value=large_d))
+    attack(adv)
+    assert 0 < adv.distance.value <= large_d
+    assert adv.reached_threshold()
+    assert adv._total_prediction_calls < c
+
+    adv = Adversarial(model, criterion, image, wrong_label,
+                      threshold=large_d)
+    attack(adv)
+    assert 0 < adv.distance.value <= large_d
+    assert adv.reached_threshold()
+    assert adv._total_prediction_calls < c
+
+    adv = Adversarial(model, criterion, image, wrong_label,
+                      threshold=small_d)
+    attack(adv)
+    assert small_d < adv.distance.value <= large_d
+    assert not adv.reached_threshold()
+    assert adv._total_prediction_calls == c
+    assert adv.distance.value == d
+
+    adv = Adversarial(model, criterion, image, wrong_label,
+                      threshold=adv._distance(value=large_d))
+    attack(adv)
+    assert adv.reached_threshold()
+    c = adv._total_prediction_calls
+    attack(adv)
+    assert adv._total_prediction_calls == c  # no new calls

From 78a7a78137eb0b8c2fde7e073c2b77eef638130c Mon Sep 17 00:00:00 2001
From: Jonas Rauber <git@jonasrauber.de>
Date: Thu, 27 Sep 2018 11:35:12 +0000
Subject: [PATCH 3/6] fixed tests

---
 foolbox/tests/test_attacks.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/foolbox/tests/test_attacks.py b/foolbox/tests/test_attacks.py
index aa0158a3..e93ec72b 100644
--- a/foolbox/tests/test_attacks.py
+++ b/foolbox/tests/test_attacks.py
@@ -59,17 +59,21 @@ def test_base_attack(model, criterion, image, label):
         attack(image, label=wrong_label)
 
 
-def test_early_stopping(model, criterion, image, label):
+def test_early_stopping(bn_model, bn_criterion, bn_image, bn_label):
     attack = attacks.FGSM()
 
-    adv = Adversarial(model, criterion, image, label)
+    model = bn_model
+    criterion = bn_criterion
+    image = bn_image
+    label = bn_label
+
+    wrong_label = label + 1
+    adv = Adversarial(model, criterion, image, wrong_label)
     attack(adv)
     assert adv.distance.value == 0
     assert not adv.reached_threshold()  # because no threshold specified
 
-    wrong_label = label + 1
-
-    adv = Adversarial(model, criterion, image, wrong_label)
+    adv = Adversarial(model, criterion, image, label)
     attack(adv)
     assert adv.distance.value > 0
     assert not adv.reached_threshold()  # because no threshold specified
@@ -79,21 +83,21 @@ def test_early_stopping(model, criterion, image, label):
     large_d = 10 * d
     small_d = d / 2
 
-    adv = Adversarial(model, criterion, image, wrong_label,
+    adv = Adversarial(model, criterion, image, label,
                       threshold=adv._distance(value=large_d))
     attack(adv)
     assert 0 < adv.distance.value <= large_d
     assert adv.reached_threshold()
     assert adv._total_prediction_calls < c
 
-    adv = Adversarial(model, criterion, image, wrong_label,
+    adv = Adversarial(model, criterion, image, label,
                       threshold=large_d)
     attack(adv)
     assert 0 < adv.distance.value <= large_d
     assert adv.reached_threshold()
     assert adv._total_prediction_calls < c
 
-    adv = Adversarial(model, criterion, image, wrong_label,
+    adv = Adversarial(model, criterion, image, label,
                       threshold=small_d)
     attack(adv)
     assert small_d < adv.distance.value <= large_d
@@ -101,7 +105,7 @@ def test_early_stopping(model, criterion, image, label):
     assert adv._total_prediction_calls == c
     assert adv.distance.value == d
 
-    adv = Adversarial(model, criterion, image, wrong_label,
+    adv = Adversarial(model, criterion, image, label,
                       threshold=adv._distance(value=large_d))
     attack(adv)
     assert adv.reached_threshold()

From 3736a06f867fd53d27e78a5e55cf0d0340d3f9e1 Mon Sep 17 00:00:00 2001
From: Jonas Rauber <git@jonasrauber.de>
Date: Thu, 27 Sep 2018 13:19:23 +0000
Subject: [PATCH 4/6] support directly specifying distance and threshold during
 Attack init

---
 foolbox/attacks/adef_attack.py     |  4 +--
 foolbox/attacks/base.py            | 50 +++++++++++++++++++++++++-----
 foolbox/attacks/boundary_attack.py |  4 ---
 3 files changed, 43 insertions(+), 15 deletions(-)

diff --git a/foolbox/attacks/adef_attack.py b/foolbox/attacks/adef_attack.py
index 71de9e92..04481ac6 100644
--- a/foolbox/attacks/adef_attack.py
+++ b/foolbox/attacks/adef_attack.py
@@ -6,7 +6,6 @@
 
 from .base import Attack
 from .base import call_decorator
-from ..criteria import Misclassification
 
 
 def _transpose_image(image):
@@ -176,8 +175,7 @@ class ADefAttack(Attack):
     .. [2]_ https://gitlab.math.ethz.ch/tandrig/ADef/tree/master
     """
 
-    def __init__(self, model=None, criterion=Misclassification()):
-        super(ADefAttack, self).__init__(model=model, criterion=criterion)
+    def _initialize(self):
         self.vector_field = None
 
     @call_decorator
diff --git a/foolbox/attacks/base.py b/foolbox/attacks/base.py
index e3e4bb80..cc8490f9 100644
--- a/foolbox/attacks/base.py
+++ b/foolbox/attacks/base.py
@@ -13,6 +13,7 @@
 from ..adversarial import Adversarial
 from ..adversarial import StopAttack
 from ..criteria import Misclassification
+from ..distances import MSE
 
 
 class Attack(ABC):
@@ -24,12 +25,27 @@ class Attack(ABC):
 
     Parameters
     ----------
-    model : :class:`adversarial.Model`
-        The default model to which the attack is applied if it is not called
-        with an :class:`Adversarial` instance.
-    criterion : :class:`adversarial.Criterion`
-        The default criterion that defines what is adversarial if the attack
-        is not called with an :class:`Adversarial` instance.
+    model : a :class:`Model` instance
+        The model that should be fooled by the adversarial.
+        Ignored if the attack is called with an :class:`Adversarial` instance.
+    criterion : a :class:`Criterion` instance
+        The criterion that determines which images are adversarial.
+        Ignored if the attack is called with an :class:`Adversarial` instance.
+    distance : a :class:`Distance` class
+        The measure used to quantify similarity between images.
+        Ignored if the attack is called with an :class:`Adversarial` instance.
+    threshold : float or :class:`Distance`
+        If not None, the attack will stop as soon as the adversarial
+        perturbation has a size smaller than this threshold. Can be
+        an instance of the :class:`Distance` class passed to the distance
+        argument, or a float assumed to have the same unit as the
+        the given distance. If None, the attack will simply minimize
+        the distance as good as possible. Note that the threshold only
+        influences early stopping of the attack; the returned adversarial
+        does not necessarily have smaller perturbation size than this
+        threshold; the `reached_threshold()` method can be used to check
+        if the threshold has been reached.
+        Ignored if the attack is called with an :class:`Adversarial` instance.
 
     Notes
     -----
@@ -38,9 +54,24 @@ class Attack(ABC):
 
     """
 
-    def __init__(self, model=None, criterion=Misclassification()):
+    def __init__(self,
+                 model=None, criterion=Misclassification(),
+                 distance=MSE, threshold=None):
         self._default_model = model
         self._default_criterion = criterion
+        self._default_distance = distance
+        self._default_threshold = threshold
+
+        # to customize the initialization in subclasses, please
+        # try to overwrite _initialize instead of __init__ if
+        # possible
+        self._initialize()
+
+    def _initialize(self):
+        """Additional initializer that can be overwritten by
+        subclasses without redefining the full __init__ method
+        including all arguments and documentation."""
+        pass
 
     @abstractmethod
     def __call__(self, input_or_adv, label=None, unpack=True, **kwargs):
@@ -82,13 +113,16 @@ def wrapper(self, input_or_adv, label=None, unpack=True, **kwargs):
             else:
                 model = self._default_model
                 criterion = self._default_criterion
+                distance = self._default_distance
+                threshold = self._default_threshold
                 if model is None or criterion is None:
                     raise ValueError('The attack needs to be initialized'
                                      ' with a model and a criterion or it'
                                      ' needs to be called with an Adversarial'
                                      ' instance.')
                 try:
-                    a = Adversarial(model, criterion, input_or_adv, label)
+                    a = Adversarial(model, criterion, input_or_adv, label,
+                                    distance=distance, threshold=threshold)
                 except StopAttack:
                     # during initialization, the original input is checked;
                     # if a threshold is specified and the original input is
diff --git a/foolbox/attacks/boundary_attack.py b/foolbox/attacks/boundary_attack.py
index 95f03914..a1ba7f0b 100644
--- a/foolbox/attacks/boundary_attack.py
+++ b/foolbox/attacks/boundary_attack.py
@@ -16,7 +16,6 @@
 from .base import Attack
 from .base import call_decorator
 from .blended_noise import BlendedUniformNoiseAttack
-from ..criteria import Misclassification
 
 import numpy as np
 from numpy.linalg import norm
@@ -52,9 +51,6 @@ class BoundaryAttack(Attack):
 
     """
 
-    def __init__(self, model=None, criterion=Misclassification()):
-        super(BoundaryAttack, self).__init__(model=model, criterion=criterion)
-
     @call_decorator
     def __call__(
             self,

From 30abeb1c15cc8eccddd0b37044faced3db23898b Mon Sep 17 00:00:00 2001
From: Jonas Rauber <git@jonasrauber.de>
Date: Thu, 27 Sep 2018 13:20:18 +0000
Subject: [PATCH 5/6] additional test

---
 foolbox/tests/test_attacks.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/foolbox/tests/test_attacks.py b/foolbox/tests/test_attacks.py
index e93ec72b..23b1ee50 100644
--- a/foolbox/tests/test_attacks.py
+++ b/foolbox/tests/test_attacks.py
@@ -73,6 +73,11 @@ def test_early_stopping(bn_model, bn_criterion, bn_image, bn_label):
     assert adv.distance.value == 0
     assert not adv.reached_threshold()  # because no threshold specified
 
+    adv = Adversarial(model, criterion, image, wrong_label, threshold=1e10)
+    attack(adv)
+    assert adv.distance.value == 0
+    assert adv.reached_threshold()
+
     adv = Adversarial(model, criterion, image, label)
     attack(adv)
     assert adv.distance.value > 0

From dd743194781553e2643f3821fec237f8a9ae9775 Mon Sep 17 00:00:00 2001
From: Jonas Rauber <git@jonasrauber.de>
Date: Thu, 27 Sep 2018 14:18:24 +0000
Subject: [PATCH 6/6] catch StopAttack exception in initialization earlier

---
 foolbox/adversarial.py  |  8 +++++++-
 foolbox/attacks/base.py | 11 ++---------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/foolbox/adversarial.py b/foolbox/adversarial.py
index 29958506..8167f64d 100644
--- a/foolbox/adversarial.py
+++ b/foolbox/adversarial.py
@@ -83,7 +83,13 @@ def __init__(
         self._best_gradient_calls = 0
 
         # check if the original image is already adversarial
-        self.predictions(original_image)
+        try:
+            self.predictions(original_image)
+        except StopAttack:
+            # if a threshold is specified and the original input is
+            # misclassified, this can already cause a StopAttack
+            # exception
+            assert self.distance.value == 0.
 
     def _reset(self):
         self.__best_adversarial = None
diff --git a/foolbox/attacks/base.py b/foolbox/attacks/base.py
index cc8490f9..5e0b77c5 100644
--- a/foolbox/attacks/base.py
+++ b/foolbox/attacks/base.py
@@ -120,15 +120,8 @@ def wrapper(self, input_or_adv, label=None, unpack=True, **kwargs):
                                      ' with a model and a criterion or it'
                                      ' needs to be called with an Adversarial'
                                      ' instance.')
-                try:
-                    a = Adversarial(model, criterion, input_or_adv, label,
-                                    distance=distance, threshold=threshold)
-                except StopAttack:
-                    # during initialization, the original input is checked;
-                    # if a threshold is specified and the original input is
-                    # misclassified, this can already cause a StopAttack
-                    # exception
-                    assert a.distance.value == 0.
+                a = Adversarial(model, criterion, input_or_adv, label,
+                                distance=distance, threshold=threshold)
 
         assert a is not None