Merge pull request #25 from AngusMcLure/period-sampling

Period sampling
AngusMcLure · Dec 4, 2023 · 2e4878e · 2e4878e
2 parents 2f72f5c + 5c75bd2
commit 2e4878e
Show file tree

Hide file tree

Showing 16 changed files with 789 additions and 243 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -20,4 +20,6 @@ RoxygenNote: 7.2.3
 Imports: 
     glue,
     hypergeo,
-    purrr
+    purrr,
+    distributions3,
+    dplyr
diff --git a/InProgress/optimise_prevalence_period_sampling.R b/InProgress/optimise_prevalence_period_sampling.R
@@ -29,7 +29,7 @@
 
 fi_pool_imperfect_cluster_unequal <- function(catch.dist, pool.strat, prevalence, sensitivity, specificity,
                                               correlation,form = 'beta', real.scale = FALSE, max.iter = 200){
-
+  
   catch <- max(catch.dist$min-1, 0)
   terminate <- FALSE
   FI <- matrix(0, 2,2)
@@ -66,7 +66,7 @@ fi_pool_imperfect_cluster_unequal <- function(catch.dist, pool.strat, prevalence
   plot(catches, FI.incr[1,1,])
   plot(catches, FI.incr[1,2,])
   plot(catches, FI.incr[2,2,])
-
+  
   FI
 }
 
@@ -85,10 +85,10 @@ optimise_N_prevalence <- function(prevalence, cost.unit, cost.pool,
   N.opt <- max(if(correlation == 0){1}else{2}, ceiling(n/max.s)) - 1
   cost.opt <- Inf
   while(N.opt<n){
-    cost.new <- unit_fi_cost_clustered(n/(N.opt+1),N.opt+1,prevalence,correlation,
-                                       sensitivity,specificity,
-                                       cost.unit + cost.collect/catch.collect,
-                                       cost.pool,cost.location,form)
+    cost.new <- cost_fi_cluster(n/(N.opt+1),N.opt+1,prevalence,correlation,
+                                sensitivity,specificity,
+                                cost.unit + cost.collect/catch.collect,
+                                cost.pool,cost.location,form)
     if(cost.new > cost.opt){
       break
     }
@@ -108,7 +108,7 @@ optimise_NP_prevalence <- function(prevalence, cost.unit, cost.pool,
                                    max.s = 50, max.P = 20){
   #print(c(theta = prevalence, sens = sensitivity, spec = specificity, unit = cost.unit, test = cost.pool, location = cost.location , rho = correlation, N = N, form = form, max.s = max.s))
   theta <- prevalence
-
+  
   if(correlation == 0){
     stop('If there is no correlation between units at locations, then this means sampling at a single random location is a identical to sampling from the whole population. This is assumption unlikely to be true in most settings, but would mean that sampling at a single site is the most cost-effective strategy')
   }else{
@@ -120,7 +120,7 @@ optimise_NP_prevalence <- function(prevalence, cost.unit, cost.pool,
                                        correlation, P.opt+1, form = 'beta',
                                        sensitivity, specificity,
                                        max.s, max.N)
-
+      
       if(opt.new$cost > opt$cost){
         break
       }else{

diff --git a/InProgress/util_unequal_catches.R b/InProgress/util_unequal_catches.R
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,7 +1,15 @@
 # Generated by roxygen2: do not edit by hand
 
 export(design_effect)
+export(design_effect_random)
 export(fi_pool)
 export(fi_pool_cluster)
+export(fi_pool_cluster_random)
+export(fi_pool_random)
+export(nb_catch)
+export(optimise_random_prevalence)
 export(optimise_sN_prevalence)
 export(optimise_s_prevalence)
+export(pois_catch)
+export(pool_max_size)
+export(pool_target_number)
diff --git a/Paper/Designing pool-tested surveys for detection estimation of prevalence.md b/Paper/Designing pool-tested surveys for detection estimation of prevalence.md
@@ -524,11 +524,11 @@ $$
 where
 $$
 \begin{align*}
-\frac{\part\theta}{\part \mu} &= \int_0^1 Normal(h(p)|\mu,\sigma) \frac{h(p) - \mu}{\sigma} p dp\\
+\frac{\part\theta}{\part \mu} &= \int_0^1 Normal(h(p)|\mu,\sigma) \frac{h(p) - \mu}{\sigma^2} p dp\\
 
 \frac{\part\theta}{\part \sigma} &= \int_0^1 Normal(h(p)|\mu,\sigma) \frac{(h(p) - \mu)^2 - \sigma^2}{\sigma^3} p dp\\
 
-\frac{\part\rho}{\part \mu} &= \left[\int_0^1 Normal(h(p)|\mu,\sigma) \frac{h(p) - \mu}{\sigma} p^2 dp - \frac{\part\theta}{\part \mu}(2\theta + \rho(1-2\theta))\right]\left[\theta(1-\theta)\right]^{-1}\\
+\frac{\part\rho}{\part \mu} &= \left[\int_0^1 Normal(h(p)|\mu,\sigma) \frac{h(p) - \mu}{\sigma^2} p^2 dp - \frac{\part\theta}{\part \mu}(2\theta + \rho(1-2\theta))\right]\left[\theta(1-\theta)\right]^{-1}\\
 
 \frac{\part\rho}{\part \sigma} &= \left[\int_0^1 Normal(h(p)|\mu,\sigma) \frac{(h(p) - \mu)^2 - \sigma^2}{\sigma^3}  p^2 dp - \frac{\part\theta}{\part \sigma}(2\theta + \rho(1-2\theta))\right]\left[\theta(1-\theta)\right]^{-1}.
 \end{align*}

diff --git a/R/catch_distributions.R b/R/catch_distributions.R
@@ -0,0 +1,51 @@
+#' Tools for defining catch-size distributions
+#'
+#' `nb_catch()` and `pois_catch()` are convenience functions for defining the
+#' distribution of catche sizes (aka cluster sizes) for the negative binomial
+#' and Poisson distributions respectively
+#'
+#' @param mean numeric The mean number of units per cluster (catch)
+#' @param variance numeric The variance of the number of units per cluster
+#'   (catch)
+#'
+#' @return An object of class `distr` summarising the distribution of
+#'   cluster/catch sizes
+#' @rdname catch_distributions
+#' @export
+#'
+#' @examples
+#' nb_catch(10,20)
+#'
+#' pois_catch(10)
+
+nb_catch <- function(mean,variance){
+  if(mean>variance){stop('variance must be greater than or equal to mean')}
+  distributions3::NegativeBinomial(size = mean^2/(variance - mean), p = mean/variance)
+}
+
+#' @rdname catch_distributions
+#' @export
+pois_catch <- function(mean){
+  distributions3::Poisson(lambda = mean)
+}
+
+## Functions for implementing common pooling strategies
+
+
+
+# ## No longer actually used: but this help you multiply out the product over k of
+# ## (1-(1-p)^s_k)^y_k as polynomial in (1-p) which lets you write out likelihood
+# ## in terms of sums of beta functions.  The output is the coefficients of the
+# ## polynomial starting at c_0
+# det_poly <- function(s,y){
+#   if(length(s) != length(y) || !all((c(s,y) %% 1) == 0)){
+#     stop('s and y must be integer vectors of common length')
+#   }
+# 
+#   K <- length(s)
+#   out <- 1
+#   for(k in 1:K){
+#     out <- out * polynom::polynomial(c(1,rep(0,s[k]-1),-1))^y[k]
+#   }
+#   coef(out)
+# }
diff --git a/R/design_effect.R b/R/design_effect.R
@@ -1,20 +1,30 @@
 #' Calculate the design effect for pooled testing.
 #'
-#' This function calculates the design effect (D) for survey designs using pool
+#' These functions calculate the design effect (D) for survey designs using pool
 #' testing compared to a simple random survey with individual tests of the same
 #' number of units. This allows the comparison of the Fisher Information per
 #' unit sampled across different pooling and sampling strategies. A design
 #' effect `D>1` (`D<1`) indicates that the pooling/sampling strategy reduces
 #' (increases) the Fisher information per unit; the total sample size will have
 #' to be multiplied by a factor of D to achieve the same degree of precision in
-#' estimating prevalence as a simple random survey with individual tests.
-#' Supports both cluster and simple random sampling with perfect or imperfect
-#' tests.
+#' estimating prevalence as a simple random survey with individual tests. The
+#' functions support cluster and simple random sampling with perfect or
+#' imperfect tests, and either fixed sample sizes (`design_effect()`) or random
+#' sample sizes (`design_effect_random()`).
 #'
 #' @param pool_size numeric The number of units per pool. Must be a numeric
 #'   value greater than or equal to 0.
 #' @param pool_number numeric The number of pools per cluster. Must be a numeric
 #'   value greater than or equal to 0.
+#' @param catch_dist An object of class `distribution` (e.g. produced by
+#'   `nb_catch()`) defining the distribution of the possible catch. If
+#'   `correlation = 0` the catch is for the whole survey. For `correlation > 0`
+#'   the catch is per cluster (i.e. cluster size).
+#' @param pool_strat function Defines a rule for how a number of units will be
+#'   divided into pools. Must take a single numeric argument and return a named
+#'   list of pool sizes and pool numbers. `pool_max_size()` and
+#'   `pool_target_number` provide convenience functions for defining common
+#'   pooling strategies.
 #' @param prevalence numeric The proportion of units that carry the marker of
 #'   interest (i.e. true positive). Must be be a numeric value between 0 and 1,
 #'   inclusive of both.
@@ -51,7 +61,7 @@ design_effect <- function(pool_size,
                           correlation,
                           sensitivity,
                           specificity,
-                          form = "beta") {
+                          form = "beta"){
 
   check_input("pool_size", pool_size)
   check_input("pool_number", pool_number)
@@ -67,3 +77,21 @@ design_effect <- function(pool_size,
       correlation, sensitivity, specificity, form)
     )[1, 1]
 }
+
+
+#' @rdname design_effect
+#' @export
+design_effect_random <- function(catch_dist,
+                                 pool_strat,
+                                 prevalence,
+                                 correlation,
+                                 sensitivity,
+                                 specificity,
+                                 form = "beta") {
+
+  mean(catch_dist) * fi_pool(pool_size = 1, prevalence, sensitivity, specificity) *
+    solve(fi_pool_cluster_random(
+      catch_dist, pool_strat, prevalence,
+      correlation, sensitivity, specificity, form)
+    )[1, 1]
+}