johnmyleswhite · raffg · Jan 9, 2020 · Jan 9, 2020 · Jan 9, 2020
diff --git a/python/algorithms/epsilon_greedy/annealing.py b/python/algorithms/epsilon_greedy/annealing.py
@@ -1,10 +1,6 @@
 import random
 import math
 
-def ind_max(x):
-  m = max(x)
-  return x.index(m)
-
 class AnnealingEpsilonGreedy():
   def __init__(self, counts, values):
     self.counts = counts
@@ -21,7 +17,7 @@ def select_arm(self):
     epsilon = 1 / math.log(t + 0.0000001)
 
     if random.random() > epsilon:
-      return ind_max(self.values)
+      return random.choice([i for i, v in enumerate(self.values) if v == max(self.values)])
     else:
       return random.randrange(len(self.values))
 

diff --git a/python/algorithms/epsilon_greedy/standard.py b/python/algorithms/epsilon_greedy/standard.py
@@ -1,9 +1,5 @@
 import random
 
-def ind_max(x):
-  m = max(x)
-  return x.index(m)
-
 class EpsilonGreedy():
   def __init__(self, epsilon, counts, values):
     self.epsilon = epsilon
@@ -18,7 +14,7 @@ def initialize(self, n_arms):
 
   def select_arm(self):
     if random.random() > self.epsilon:
-      return ind_max(self.values)
+      return random.choice([i for i, v in enumerate(self.values) if v == max(self.values)])
     else:
       return random.randrange(len(self.values))
 

diff --git a/python/algorithms/hedge/hedge.py b/python/algorithms/hedge/hedge.py
@@ -25,8 +25,16 @@ def initialize(self, n_arms):
     return
 
   def select_arm(self):
-    z = sum([math.exp(v / self.temperature) for v in self.values])
-    probs = [math.exp(v / self.temperature) / z for v in self.values]
+    try:
+      z = sum([math.exp(v / self.temperature) for v in self.values])
+    except OverflowError:
+      z = math.inf
+    probs = []
+    for v in self.values:
+      try:
+        probs.append(math.exp(v / self.temperature) / z)
+      except OverflowError:
+        probs.append(math.inf)
     return categorical_draw(probs)
 
   def update(self, chosen_arm, reward):

diff --git a/python/algorithms/ucb/ucb1.py b/python/algorithms/ucb/ucb1.py
@@ -1,8 +1,5 @@
 import math
-
-def ind_max(x):
-  m = max(x)
-  return x.index(m)
+import random
 
 class UCB1():
   def __init__(self, counts, values):
@@ -26,7 +23,7 @@ def select_arm(self):
     for arm in range(n_arms):
       bonus = math.sqrt((2 * math.log(total_counts)) / float(self.counts[arm]))
       ucb_values[arm] = self.values[arm] + bonus
-    return ind_max(ucb_values)
+    return random.choice([i for i, v in enumerate(ucb_values) if v == max(ucb_values)])
 
   def update(self, chosen_arm, reward):
     self.counts[chosen_arm] = self.counts[chosen_arm] + 1

diff --git a/python/algorithms/ucb/ucb2.py b/python/algorithms/ucb/ucb2.py
@@ -1,8 +1,5 @@
 import math
-
-def ind_max(x):
-  m = max(x)
-  return x.index(m)
+import random
 
 class UCB2(object):
   def __init__(self, alpha, counts, values):
@@ -60,7 +57,7 @@ def select_arm(self):
       bonus = self.__bonus(total_counts, self.r[arm])
       ucb_values[arm] = self.values[arm] + bonus
 
-    chosen_arm = ind_max(ucb_values)
+    chosen_arm = random.choice([i for i, v in enumerate(ucb_values) if v == max(ucb_values)])
     self.__set_arm(chosen_arm)
     return chosen_arm