forked from ghurault/ssm-eczema-biomarkers
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SSM.stan
146 lines (123 loc) · 5.15 KB
/
SSM.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
functions {
real normal_lb_ub_rng(real mu, real sigma, real lb, real ub) {
// Sample from truncated normal of mean mu, standard deviation sigma, lower bound lb and upper bound ub
real u;
real p1;
real p2;
if (is_nan(mu) || is_inf(mu)) {
reject("normal_lb_ub_rng: mu must be finite; ", "found mu = ", mu);
} else if (is_nan(sigma) || is_inf(sigma) || sigma < 0) {
reject("normal_lb_ub_rng: sigma must be finite and non-negative; ", "found sigma = ", sigma);
} else if (lb >= ub) {
reject("normal_lb_ub_rng: lb must be less than ub; ", "found lb = ", lb, "and ub = ", ub);
} else {
p1 = normal_cdf(lb, mu, sigma);
p2 = normal_cdf(ub, mu, sigma);
if (p1 >= p2) {
reject("normal_lb_ub_rng: p1 >= p2. p1 = ", p1, " and p2 = ", p2, ". mu = ", mu, "and sigma = ", sigma);
} else {
u = uniform_rng(p1, p2);
}
}
return (sigma * inv_Phi(u)) + mu; // inverse cdf
}
real soft_uniform_lpdf(real x, real lb, real ub) {
return(log(inv_logit(x - lb) - inv_logit(x - ub)) - log(ub - lb));
}
}
data {
int<lower = 0> N_obs; // Number of non-missing observations
int<lower = 0> N_mis; // Number of missing observations
int<lower = 0> N_pt; // Number of patients
real<lower = 0> max_score; // Maximum value that the score can take
int<lower = 1, upper = N_obs + N_mis> idx_obs[N_obs]; // Index of non-missing observations
int<lower = 1, upper = N_obs + N_mis> idx_mis[N_mis]; // Index of missing observations
real<lower = 0, upper = max_score> S_obs[N_obs]; // Observed score
int<lower = 0, upper = N_mis> N_test; // Number of predictions to evaluate (predictions are set to missing in the data)
int<lower = 1, upper = N_obs + N_mis> idx_test[N_test]; // Index of test observations
real<lower = 0, upper = max_score> S_test[N_test]; // Observed score for predictions
int<lower = 0, upper = 1> run; // Switch to evaluate the likelihood
int<lower = 0, upper = 1> rep; // Switch to generate replications
}
transformed data {
int N = N_obs + N_mis; // Total number of observations
int mult = N / N_pt; // Number of timepoints per patient (same by construction)
int start[N_pt]; // index of first observation for patient each patient
int end[N_pt]; // index of last observation for patient each patient
for (k in 1:N_pt) {
start[k] = (k - 1) * mult + 1;
end[k] = k * mult;
}
}
parameters {
real S_lat_eta[N]; // Error term for S_lat, cf. non-centered parametrisation
// Autocorrelation parameter
real<lower = 0, upper = 1> alpha[N_pt]; // Autocorrelation parameter
real<lower = 0, upper = 1> mu_alpha; // Population mean of alpha
real<lower = 0> phi_alpha; // Population pseudo "sample size" of alpha
// Population autoregression mean (for S_lat)
real mu_inf; // Population mean
real<lower = 0> sigma_inf; // Population std
real eta_inf[N_pt]; // Error term
real<lower = 0> sigma_tot; // total noise std
real<lower = 0, upper = 1> rho2; // proportion of the stochastic noise variance in the total noise variance
}
transformed parameters {
real S_lat[N]; // Latent S
real S_inf[N_pt];
real b[N_pt];
real sigma_meas = sqrt(rho2) * sigma_tot; // measurement noise std
real sigma_lat = sqrt(1 - rho2) * sigma_tot; // stochastic noise std
real MDC = 1.96 * sigma_meas; // minimum detectable change (95% level)
for (k in 1:N_pt) {
S_inf[k] = mu_inf + sigma_inf * eta_inf[k];
b[k] = S_inf[k] * (1 - alpha[k]);
}
// Non-centered parametrisation
for (k in 1:N_pt) {
S_lat[start[k]] = max_score * (0.5 + 0.25 * S_lat_eta[start[k]]); // prior covering the full range of the score
for (t in (start[k] + 1):end[k]) {
S_lat[t] = alpha[k] * S_lat[t - 1] + b[k] + sigma_lat * S_lat_eta[t];
}
}
}
model {
S_lat_eta ~ std_normal();
eta_inf ~ std_normal();
mu_inf / max_score ~ normal(0.5, 0.25);
sigma_inf / max_score ~ normal(0, 0.125);
sigma_tot / max_score ~ lognormal(-log(20), 0.5 * log(5)); // 95% CI is [.01, 0.25] * max_score
rho2 ~ beta(4, 2); // process noise expected to be small compared to the measurement noise
mu_alpha ~ beta(2, 2);
phi_alpha ~ lognormal(1 * log(10), 0.5 * log(10)); // mass between 1 and 100
alpha ~ beta(mu_alpha * phi_alpha, (1 - mu_alpha) * phi_alpha);
for (i in 1:N) {
// S_lat[i] ~ soft_uniform(-1, max_score + 1);
S_lat[i] ~ soft_uniform(-.01 * max_score, 1.01 * max_score);
}
if (run == 1) {
for (i in 1:N_obs) {
S_obs[i] ~ normal(S_lat[idx_obs[i]], sigma_meas) T[0, max_score];
}
}
}
generated quantities {
real S_rep[N]; // Replications of S_meas[t + 1] given S_lat[t]
real S_pred[N_test]; // Predictive sample of S_test
real lpd[N_test]; // Log predictive density
// Replications
if (rep == 1) {
for (i in 1:N) {
S_rep[i] = normal_lb_ub_rng(S_lat[i], sigma_meas, 0, max_score);
}
S_pred = S_rep[idx_test];
}
// Log predictive density
{
real Z; // Normalisation constant
for (i in 1:N_test) {
Z = normal_cdf(max_score, S_lat[idx_test[i]], sigma_meas) - normal_cdf(0, S_lat[idx_test[i]], sigma_meas);
lpd[i] = normal_lpdf(S_test[i] | S_lat[idx_test[i]], sigma_meas) - log(Z);
}
}
}