-
Notifications
You must be signed in to change notification settings - Fork 5
/
optimize_PAC_bound.py
74 lines (47 loc) · 1.9 KB
/
optimize_PAC_bound.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import cvxpy as cvx
import numpy as np
import time
# Optimize PAC bound using Relative Entropy Programming
def optimize_PAC_bound(costs_precomputed, p0, delta):
# Number of actions
L = len(p0)
# Number of environments
m = np.shape(costs_precomputed)[0]
# Discretize lambdas
lambdas = np.linspace(0,1,100)
# Initialize vectors for storing optimal solutions
taus = np.zeros(len(lambdas))
ps = len(lambdas)*[None]
for k in range(len(lambdas)):
lambda0 = lambdas[k]
# Create cost function variable
tau = cvx.Variable()
# Create variable for probability vector
p = cvx.Variable(L)
cost_empirical = (1/m)*cvx.sum(costs_precomputed*p)
# Constraints
constraints = [lambda0**2 >= (cvx.sum(cvx.kl_div(p, p0)) + np.log(2*np.sqrt(m)/delta))/(2*m), lambda0 == (tau - cost_empirical), p >= 0, cvx.sum(p) == 1]
prob = cvx.Problem(cvx.Minimize(tau), constraints)
# Solve problem
opt = prob.solve(verbose=False, solver=cvx.SCS) # , max_iters=3000)
# Store optimal value and optimizer
if (opt > 1.0):
taus[k] = 1.0
ps[k] = p.value
else:
taus[k] = opt
ps[k] = p.value
# Find minimizer
min_ind = np.argmin(taus)
tau_opt = taus[min_ind]
p_opt = ps[min_ind]
return tau_opt, p_opt, taus
# Compute kl inverse using Relative Entropy Programming
def kl_inverse(q, c):
p_bernoulli = cvx.Variable(2)
q_bernoulli = np.array([q,1-q])
constraints = [c >= cvx.sum(cvx.kl_div(q_bernoulli,p_bernoulli)), 0 <= p_bernoulli[0], p_bernoulli[0] <= 1, p_bernoulli[1] == 1.0-p_bernoulli[0]]
prob = cvx.Problem(cvx.Maximize(p_bernoulli[0]), constraints)
# Solve problem
opt = prob.solve(verbose=False, solver=cvx.SCS) # solver=cvx.ECOS
return p_bernoulli.value[0]