forked from iank/erasmus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
agent.py
101 lines (75 loc) · 2.61 KB
/
agent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from math import sin, cos, pi, exp
import numpy as np
import ImageDraw
def getDDvf(im, x, y, theta, epsilon):
'''Approximate (w/ epsilon) the directional derivative of f at (x,y)
along a vector defined by theta.'''
x2 = x + epsilon * cos(theta)
y2 = y + epsilon * sin(theta)
x = max(x, 0)
y = max(y, 0)
x2 = max(x2, 0)
y2 = max(y2, 0)
x2 = min(x2, im.shape[1] - 1)
y2 = min(y2, im.shape[0] - 1)
x2 = round(x2)
y2 = round(y2)
p1 = im[y, x]
p2 = im[y2, x2]
return (p2 - p1) / epsilon
def getX(im, x, y, thetas, epsilon):
'''Get gradient for n evenly-spaced radial directions around [x,y]'''
return [getDDvf(im, x, y, theta, epsilon) for theta in thetas]
def h_ax(alpha, x):
'''h_alpha(x) = 2*pi(sigmoid(x)-0.5)'''
return 2 * pi * (1.0 / (1 + exp(np.dot(alpha, x))) - 0.5)
class Agent:
def __init__(self, alpha):
self.alpha = alpha
self.paths = None
self.finished = None
def render_paths(self, im):
path_img = im.copy()
draw = ImageDraw.Draw(path_img)
for path in self.paths:
draw.line(path, fill=(255, 0, 0))
return path_img
def reset_paths(self):
self.paths = []
def score(self, im, x, y, epsilon, lam, n, maxN, Q):
'''Compute the score of this agent on the provided image.'''
thetas = np.linspace(0, 2 * np.pi, n+1)[:-1]
path = [(x, y)]
cost = 0.0
self.finished = False
for i in range(maxN):
# Compute model output
X = getX(im, x, y, thetas, epsilon)
phi_hat = h_ax(self.alpha, X)
# Compute next position based on model output
x_next = round(x + epsilon * cos(phi_hat))
y_next = round(y + epsilon * sin(phi_hat))
# Clamp next position to the image boundary
x_next = np.clip(x_next, 0, im.shape[1] - 1)
y_next = np.clip(y_next, 0, im.shape[0] - 1)
# Update the cost
a = im[y, x]
b = im[y_next, x_next]
delta_n = float(b - a)
if delta_n >= 0:
g_n = delta_n
else:
g_n = - delta_n / 2
cost += (g_n ** 2)
path.append((x_next, y_next))
# Stop if we've reached the goal
if x_next == im.shape[1] - 1:
self.finished = True
break
x, y = x_next, y_next
# Add the penalty if the goal wasn't reached
if not self.finished:
cost += Q
cost += lam * len(path)
self.paths.append(path)
return cost