Skip to content

Commit

Permalink
Merge pull request #25 from AngusMcLure/period-sampling
Browse files Browse the repository at this point in the history
Period sampling
  • Loading branch information
AngusMcLure authored Dec 4, 2023
2 parents 2f72f5c + 5c75bd2 commit 2e4878e
Show file tree
Hide file tree
Showing 16 changed files with 789 additions and 243 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,6 @@ RoxygenNote: 7.2.3
Imports:
glue,
hypergeo,
purrr
purrr,
distributions3,
dplyr
16 changes: 8 additions & 8 deletions InProgress/optimise_prevalence_period_sampling.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

fi_pool_imperfect_cluster_unequal <- function(catch.dist, pool.strat, prevalence, sensitivity, specificity,
correlation,form = 'beta', real.scale = FALSE, max.iter = 200){

catch <- max(catch.dist$min-1, 0)
terminate <- FALSE
FI <- matrix(0, 2,2)
Expand Down Expand Up @@ -66,7 +66,7 @@ fi_pool_imperfect_cluster_unequal <- function(catch.dist, pool.strat, prevalence
plot(catches, FI.incr[1,1,])
plot(catches, FI.incr[1,2,])
plot(catches, FI.incr[2,2,])

FI
}

Expand All @@ -85,10 +85,10 @@ optimise_N_prevalence <- function(prevalence, cost.unit, cost.pool,
N.opt <- max(if(correlation == 0){1}else{2}, ceiling(n/max.s)) - 1
cost.opt <- Inf
while(N.opt<n){
cost.new <- unit_fi_cost_clustered(n/(N.opt+1),N.opt+1,prevalence,correlation,
sensitivity,specificity,
cost.unit + cost.collect/catch.collect,
cost.pool,cost.location,form)
cost.new <- cost_fi_cluster(n/(N.opt+1),N.opt+1,prevalence,correlation,
sensitivity,specificity,
cost.unit + cost.collect/catch.collect,
cost.pool,cost.location,form)
if(cost.new > cost.opt){
break
}
Expand All @@ -108,7 +108,7 @@ optimise_NP_prevalence <- function(prevalence, cost.unit, cost.pool,
max.s = 50, max.P = 20){
#print(c(theta = prevalence, sens = sensitivity, spec = specificity, unit = cost.unit, test = cost.pool, location = cost.location , rho = correlation, N = N, form = form, max.s = max.s))
theta <- prevalence

if(correlation == 0){
stop('If there is no correlation between units at locations, then this means sampling at a single random location is a identical to sampling from the whole population. This is assumption unlikely to be true in most settings, but would mean that sampling at a single site is the most cost-effective strategy')
}else{
Expand All @@ -120,7 +120,7 @@ optimise_NP_prevalence <- function(prevalence, cost.unit, cost.pool,
correlation, P.opt+1, form = 'beta',
sensitivity, specificity,
max.s, max.N)

if(opt.new$cost > opt$cost){
break
}else{
Expand Down
109 changes: 0 additions & 109 deletions InProgress/util_unequal_catches.R

This file was deleted.

8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
# Generated by roxygen2: do not edit by hand

export(design_effect)
export(design_effect_random)
export(fi_pool)
export(fi_pool_cluster)
export(fi_pool_cluster_random)
export(fi_pool_random)
export(nb_catch)
export(optimise_random_prevalence)
export(optimise_sN_prevalence)
export(optimise_s_prevalence)
export(pois_catch)
export(pool_max_size)
export(pool_target_number)
Original file line number Diff line number Diff line change
Expand Up @@ -524,11 +524,11 @@ $$
where
$$
\begin{align*}
\frac{\part\theta}{\part \mu} &= \int_0^1 Normal(h(p)|\mu,\sigma) \frac{h(p) - \mu}{\sigma} p dp\\
\frac{\part\theta}{\part \mu} &= \int_0^1 Normal(h(p)|\mu,\sigma) \frac{h(p) - \mu}{\sigma^2} p dp\\
\frac{\part\theta}{\part \sigma} &= \int_0^1 Normal(h(p)|\mu,\sigma) \frac{(h(p) - \mu)^2 - \sigma^2}{\sigma^3} p dp\\
\frac{\part\rho}{\part \mu} &= \left[\int_0^1 Normal(h(p)|\mu,\sigma) \frac{h(p) - \mu}{\sigma} p^2 dp - \frac{\part\theta}{\part \mu}(2\theta + \rho(1-2\theta))\right]\left[\theta(1-\theta)\right]^{-1}\\
\frac{\part\rho}{\part \mu} &= \left[\int_0^1 Normal(h(p)|\mu,\sigma) \frac{h(p) - \mu}{\sigma^2} p^2 dp - \frac{\part\theta}{\part \mu}(2\theta + \rho(1-2\theta))\right]\left[\theta(1-\theta)\right]^{-1}\\
\frac{\part\rho}{\part \sigma} &= \left[\int_0^1 Normal(h(p)|\mu,\sigma) \frac{(h(p) - \mu)^2 - \sigma^2}{\sigma^3} p^2 dp - \frac{\part\theta}{\part \sigma}(2\theta + \rho(1-2\theta))\right]\left[\theta(1-\theta)\right]^{-1}.
\end{align*}
Expand Down
51 changes: 51 additions & 0 deletions R/catch_distributions.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#' Tools for defining catch-size distributions
#'
#' `nb_catch()` and `pois_catch()` are convenience functions for defining the
#' distribution of catche sizes (aka cluster sizes) for the negative binomial
#' and Poisson distributions respectively
#'
#' @param mean numeric The mean number of units per cluster (catch)
#' @param variance numeric The variance of the number of units per cluster
#' (catch)
#'
#' @return An object of class `distr` summarising the distribution of
#' cluster/catch sizes
#' @rdname catch_distributions
#' @export
#'
#' @examples
#' nb_catch(10,20)
#'
#' pois_catch(10)

nb_catch <- function(mean,variance){
if(mean>variance){stop('variance must be greater than or equal to mean')}
distributions3::NegativeBinomial(size = mean^2/(variance - mean), p = mean/variance)
}

#' @rdname catch_distributions
#' @export
pois_catch <- function(mean){
distributions3::Poisson(lambda = mean)
}

## Functions for implementing common pooling strategies



# ## No longer actually used: but this help you multiply out the product over k of
# ## (1-(1-p)^s_k)^y_k as polynomial in (1-p) which lets you write out likelihood
# ## in terms of sums of beta functions. The output is the coefficients of the
# ## polynomial starting at c_0
# det_poly <- function(s,y){
# if(length(s) != length(y) || !all((c(s,y) %% 1) == 0)){
# stop('s and y must be integer vectors of common length')
# }
#
# K <- length(s)
# out <- 1
# for(k in 1:K){
# out <- out * polynom::polynomial(c(1,rep(0,s[k]-1),-1))^y[k]
# }
# coef(out)
# }
38 changes: 33 additions & 5 deletions R/design_effect.R
Original file line number Diff line number Diff line change
@@ -1,20 +1,30 @@
#' Calculate the design effect for pooled testing.
#'
#' This function calculates the design effect (D) for survey designs using pool
#' These functions calculate the design effect (D) for survey designs using pool
#' testing compared to a simple random survey with individual tests of the same
#' number of units. This allows the comparison of the Fisher Information per
#' unit sampled across different pooling and sampling strategies. A design
#' effect `D>1` (`D<1`) indicates that the pooling/sampling strategy reduces
#' (increases) the Fisher information per unit; the total sample size will have
#' to be multiplied by a factor of D to achieve the same degree of precision in
#' estimating prevalence as a simple random survey with individual tests.
#' Supports both cluster and simple random sampling with perfect or imperfect
#' tests.
#' estimating prevalence as a simple random survey with individual tests. The
#' functions support cluster and simple random sampling with perfect or
#' imperfect tests, and either fixed sample sizes (`design_effect()`) or random
#' sample sizes (`design_effect_random()`).
#'
#' @param pool_size numeric The number of units per pool. Must be a numeric
#' value greater than or equal to 0.
#' @param pool_number numeric The number of pools per cluster. Must be a numeric
#' value greater than or equal to 0.
#' @param catch_dist An object of class `distribution` (e.g. produced by
#' `nb_catch()`) defining the distribution of the possible catch. If
#' `correlation = 0` the catch is for the whole survey. For `correlation > 0`
#' the catch is per cluster (i.e. cluster size).
#' @param pool_strat function Defines a rule for how a number of units will be
#' divided into pools. Must take a single numeric argument and return a named
#' list of pool sizes and pool numbers. `pool_max_size()` and
#' `pool_target_number` provide convenience functions for defining common
#' pooling strategies.
#' @param prevalence numeric The proportion of units that carry the marker of
#' interest (i.e. true positive). Must be be a numeric value between 0 and 1,
#' inclusive of both.
Expand Down Expand Up @@ -51,7 +61,7 @@ design_effect <- function(pool_size,
correlation,
sensitivity,
specificity,
form = "beta") {
form = "beta"){

check_input("pool_size", pool_size)
check_input("pool_number", pool_number)
Expand All @@ -67,3 +77,21 @@ design_effect <- function(pool_size,
correlation, sensitivity, specificity, form)
)[1, 1]
}


#' @rdname design_effect
#' @export
design_effect_random <- function(catch_dist,
pool_strat,
prevalence,
correlation,
sensitivity,
specificity,
form = "beta") {

mean(catch_dist) * fi_pool(pool_size = 1, prevalence, sensitivity, specificity) *
solve(fi_pool_cluster_random(
catch_dist, pool_strat, prevalence,
correlation, sensitivity, specificity, form)
)[1, 1]
}
Loading

0 comments on commit 2e4878e

Please sign in to comment.