Skip to content

Commit

Permalink
Fix line lengths.
Browse files Browse the repository at this point in the history
  • Loading branch information
tillahoffmann committed Mar 22, 2024
1 parent a9f2f93 commit 270fcf8
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 87 deletions.
3 changes: 2 additions & 1 deletion convert_cycle_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
# See 2nd paragraph, 2nd column, first page of main text (10.1001/jama.2020.3786).
"Wang2020-thresholds": [30, 24.3],
"Wang2020-loads": np.log10([2.6e4, 1.4e6]),
# See supplementary table 4 (10.3346/jkms.2020.35.e86); conversion is for respiratory specimen.
# See supplementary table 4 (10.3346/jkms.2020.35.e86); conversion is for
# respiratory specimen.
"Kim2020b-thresholds": [25.05, 35.09],
"Kim2020b-loads": np.log10([46971053, 61341]),
}
Expand Down
3 changes: 2 additions & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
[flake8]
max-line-length = 100
max-line-length = 88
exclude = PolyChordLite,.eggs
extend-ignore = E203

[rstcheck]
ignore_directives=jinja
Expand Down
50 changes: 26 additions & 24 deletions shedding/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,41 +54,43 @@ def flatten_datasets(datasets, loq_fill_value=np.nan, day_fill_value=np.nan):
datasets : dict
Mapping of datasets.
loq_fill_value : float
Fill value to replace loads below the level of quantification (LOQ) as log10 gene copies per
mL.
Fill value to replace loads below the level of quantification (LOQ) as log10
gene copies per mL.
day_fill_value : float
Fill value to replace missing information about days since symptoms began as days.
Fill value to replace missing information about days since symptoms began as
days.
Returns
-------
data : dict
Data obtained by flattening datasets, including
* **num_patients** (`int`): Total number of patients across all datasets.
* **num_samples** (`ndarray[int]<num_patients>`): Total number of samples across all
datasets.
* **num_samples_by_patient** (`ndarray[int]<num_patients>`): Number of samples for each
patient.
* **num_positives_by_patient** (`ndarray[int]<num_patients>`): Number of positive samples
* **num_samples** (`ndarray[int]<num_patients>`): Total number of samples
across all datasets.
* **num_samples_by_patient** (`ndarray[int]<num_patients>`): Number of samples
for each patient.
* **num_negatives_by_patient** (`ndarray[int]<num_patients>`): Number of negative samples
for each patient.
* **idx** (`ndarray[int]<num_samples>`): Index of the patient from whom each sample was
collected.
* **load** (`ndarray[float]<num_samples>`): Viral RNA load for each sample as gene copies
per mL or `10 ** loq_fill_value` if the concentration is below the level of
quantification.
* **num_positives_by_patient** (`ndarray[int]<num_patients>`): Number of
positive samples for each patient.
* **num_negatives_by_patient** (`ndarray[int]<num_patients>`): Number of
negative samples for each patient.
* **idx** (`ndarray[int]<num_samples>`): Index of the patient from whom each
sample was collected.
* **load** (`ndarray[float]<num_samples>`): Viral RNA load for each sample as
gene copies per mL or `10 ** loq_fill_value` if the concentration is below the
level of quantification.
* **loadln** (`ndarray[float]<num_samples>`): Natural logarithm of **load**.
* **loq** (`ndarray[float]<num_samples>`): Level of quantification for each sample as gene
copies per mL.
* **loq** (`ndarray[float]<num_samples>`): Level of quantification for each
sample as gene copies per mL.
* **loqln** (`ndarray[float]<num_samples>`): Natural logarithm of **loq**.
* **positive** (`ndarray[bool]<num_samples>`): Indicator for each sample as to whether the
RNA load is above the level of quantification.
* **day** (`ndarray[int]<num_samples>`): Day after symptom onset on which the sample was
collected or `day_fill_value` if unavailable.
* **dataset** (`ndarray[str]<num_samples>`): Dataset from which each sample was obtained.
* **patient** (`ndarray[int]<num_samples>`): Patient from which each sample was obtained as
reported in the corresponding dataset.
* **positive** (`ndarray[bool]<num_samples>`): Indicator for each sample as to
whether the RNA load is above the level of quantification.
* **day** (`ndarray[int]<num_samples>`): Day after symptom onset on which the
sample was collected or `day_fill_value` if unavailable.
* **dataset** (`ndarray[str]<num_samples>`): Dataset from which each sample was
obtained.
* **patient** (`ndarray[int]<num_samples>`): Patient from which each sample was
obtained as reported in the corresponding dataset.
"""
# Lookup for integer identifiers for each patient
patient_lookup = {}
Expand Down
104 changes: 55 additions & 49 deletions shedding/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
from scipy import integrate, special
from .util import flush_traceback, logmeanexp

# Importing _util breaks the readthedocs build. Omit if when we're building the documentation.
# Importing _util breaks the readthedocs build. Omit if when we're building the
# documentation.
import os

if not os.environ.get("READTHEDOCS"): # pragma: no cover
Expand Down Expand Up @@ -132,7 +133,8 @@ def vector_to_values(parameters, vector):

def transpose_samples(samples, parameters=None):
"""
Transpose samples from a list of dictionaries to a dictionary of lists and vice versa.
Transpose samples from a list of dictionaries to a dictionary of lists and vice
versa.
Parameters
----------
Expand Down Expand Up @@ -277,7 +279,8 @@ def gengamma_mean(q, mu, sigma, qmin=1e-6):

def _gengamma_loc(q, sigma, mean, qmin=1e-6):
"""
Evaluate the scale of the generalised gamma distribution for given shape, exponent, and mean.
Evaluate the scale of the generalised gamma distribution for given shape, exponent,
and mean.
"""
a = 1 / q**2
cinv = sigma / q
Expand Down Expand Up @@ -394,8 +397,8 @@ def from_uniform(cls, uniform, lower, upper, base):

def from_abc(a, b, c):
"""
Transform from the parametrisation in terms of shape, scale, and exponent to Stacey's
parametrisation.
Transform from the parametrisation in terms of shape, scale, and exponent to
Stacey's parametrisation.
"""
q = 1 / np.sqrt(a)
mu = np.log(a / b) / c
Expand All @@ -405,8 +408,8 @@ def from_abc(a, b, c):

def to_abc(q, mu, sigma):
"""
Transform from Stacey's parametrisation to the parametrisation in terms of shape, scale, and
exponent.
Transform from Stacey's parametrisation to the parametrisation in terms of shape,
scale, and exponent.
"""
a = 1 / q**2
c = q / sigma
Expand All @@ -429,8 +432,8 @@ class Profile(enum.Enum):

def evaluate_offset(self, day, values):
"""
Evaluate the offset from the population-level location parameter due to the shedding
profile.
Evaluate the offset from the population-level location parameter due to the
shedding profile.
Parameters
----------
Expand Down Expand Up @@ -459,15 +462,15 @@ def evaluate_offset(self, day, values):

class SimulationMode(enum.Enum):
"""
Enum to indicate whether simulation should be for `EXISTING_PATIENTS` or `NEW_PATIENTS`. Given
posterior samples, the former can be used to assess the fit of the model to samples collected
from new, unknown patients, and the latter can be used to assess the fit to new samples
collected from existing patients.
Enum to indicate whether simulation should be for `EXISTING_PATIENTS` or
`NEW_PATIENTS`. Given posterior samples, the former can be used to assess the fit of
the model to samples collected from new, unknown patients, and the latter can be
used to assess the fit to new samples collected from existing patients.
Notes
-----
The `NEW_PATIENTS` replication mode can be used to simulate data given hyperparameters at the
population level.
The `NEW_PATIENTS` replication mode can be used to simulate data given
hyperparameters at the population level.
"""

NEW_PATIENTS = "new_patients"
Expand All @@ -476,9 +479,9 @@ class SimulationMode(enum.Enum):

class Transformation:
r"""
A bijective transformation :math:`y = f(x)` for regular variables :math:`x` and transformed
variables :math:`y`. For sampling purposes, the transformation induces a volume compression or
expansion that needs to be accounted for, i.e.
A bijective transformation :math:`y = f(x)` for regular variables :math:`x` and
transformed variables :math:`y`. For sampling purposes, the transformation induces a
volume compression or expansion that needs to be accounted for, i.e.
.. math::
Expand All @@ -499,8 +502,8 @@ def inverse(self, values):

def jacobianlogdet(self, values):
"""
Evaluate the logarithm of the Jacobian determinant to account for sampling in the
transformed space rather than the regular space.
Evaluate the logarithm of the Jacobian determinant to account for sampling in
the transformed space rather than the regular space.
"""
raise NotImplementedError

Expand Down Expand Up @@ -546,8 +549,8 @@ def jacobianlogdet(self, values):

def _augment_values(func):
"""
Decorator to augment the values by adding the population and patient shapes based on the
parametrisation.
Decorator to augment the values by adding the population and patient shapes based on
the parametrisation.
"""

@ft.wraps(func)
Expand Down Expand Up @@ -593,28 +596,30 @@ class Model:
parametrisation : Parametrisation
Parametrisation used by the model (see notes for details).
inflated : bool
Whether there is a "zero-inflated" subpopulation of patients who do not shed RNA.
Whether there is a "zero-inflated" subpopulation of patients who do not shed
RNA.
temporal : str
Whether to include a shedding profile in the fit.
priors : dict
Mapping of parameter names to callable priors.
Notes
-----
The generalised gamma distribution is a versatile distribution for positive continuous data. We
use the parametrisation considered by Stacey with shape parameter :math:`q`, location parameter
:math:`\mu`, and scale parameter :math:`\sigma`. Then if :math:`\gamma` follows a gamma
distribution with shape parameter :math:`a=1/q^2`, the random variable
The generalised gamma distribution is a versatile distribution for positive
continuous data. We use the parametrisation considered by Stacey with shape
parameter :math:`q`, location parameter :math:`\mu`, and scale parameter
:math:`\sigma`. Then if :math:`\gamma` follows a gamma distribution with shape
parameter :math:`a=1/q^2`, the random variable
.. math ::
x = \exp\left(\mu + \frac{\sigma}{q}\log\left(q^2\gamma\right)\right)
follows a generalised gamma distribution.
The generalised gamma distribution includes the regular gamma distribution (:math:`\sigma=q`),
Weibull distribution (:math:`q=1`), and lognormal distribution (:math:`q=0`) as special cases.
The model can be restricted to a particular distribution using the :code:`parametrisation`
parameter.
The generalised gamma distribution includes the regular gamma distribution
(:math:`\sigma=q`), Weibull distribution (:math:`q=1`), and lognormal distribution
(:math:`q=0`) as special cases. The model can be restricted to a particular
distribution using the :code:`parametrisation` parameter.
"""

def __init__(
Expand Down Expand Up @@ -646,8 +651,8 @@ def __init__(
if self.inflated:
self.parameters["rho"] = ()
if self.temporal == Profile.GAMMA:
# Parameters for the gamma distribution. Shape and scale as usual. Offset is just an
# overall shift.
# Parameters for the gamma distribution. Shape and scale as usual. Offset is
# just an overall shift.
self.parameters.update(
{
"profile_offset": (),
Expand Down Expand Up @@ -734,8 +739,8 @@ def sample_params(self, values):
@_augment_values
def _evaluate_sample_log_likelihood(self, values, data, i: int = None):
"""
Evaluate the log likelihood for each sample conditional on all model parameters, assuming
that all patients shed virus.
Evaluate the log likelihood for each sample conditional on all model parameters,
assuming that all patients shed virus.
Parameters
----------
Expand Down Expand Up @@ -773,8 +778,8 @@ def _evaluate_sample_log_likelihood(self, values, data, i: int = None):
@_augment_values
def _evaluate_patient_log_likelihood(self, values, data):
"""
Evaluate the log likelihood for each patient conditional on all model parameters, assuming
that all patients shed virus.
Evaluate the log likelihood for each patient conditional on all model
parameters, assuming that all patients shed virus.
"""
result = self._evaluate_sample_log_likelihood(values, data)
return np.bincount(data["idx"], result, minlength=data["num_patients"])
Expand Down Expand Up @@ -804,8 +809,8 @@ def evaluate_marginal_log_likelihood(
self, values, data, n=1000, eps=1e-6, **kwargs
):
"""
Evaluate the log likelihood of the observed data marginalised with respect to group-level
parameters but conditional on hyperparameters.
Evaluate the log likelihood of the observed data marginalised with respect to
group-level parameters but conditional on hyperparameters.
Parameters
----------
Expand All @@ -814,8 +819,8 @@ def evaluate_marginal_log_likelihood(
data : dict
Data from which the posterior samples were inferred.
n : int or str
Number of samples to use if simulation is required to evaluate the likelihood or `scipy`
for numerical integration.
Number of samples to use if simulation is required to evaluate the
likelihood or `scipy` for numerical integration.
eps: float
Value to clip the patient mean at to avoid underflows.
Expand All @@ -837,8 +842,8 @@ def evaluate_marginal_log_likelihood(
),
eps,
)
# Evaluate the sample log likelihood and marginalise with respect to the patient-level
# attributes
# Evaluate the sample log likelihood and marginalise with respect to the
# patient-level attributes.
sample_likelihood = self._evaluate_sample_log_likelihood(values, data)
sample_likelihood = logmeanexp(sample_likelihood, axis=0)

Expand Down Expand Up @@ -881,9 +886,10 @@ def _func(x, patient):
if not self.inflated:
return patient_likelihood

# Patients that have all-negative samples may be non-shedders. So the data are either
# generated by having some latent indicator z==0 or z==1 but the samples are too small to be
# above the LOQ. So we need to evaluate the mixture distribution.
# Patients that have all-negative samples may be non-shedders. So the data are
# either generated by having some latent indicator z==0 or z==1 but the samples
# are too small to be above the LOQ. So we need to evaluate the mixture
# distribution.
all_negative = data["num_positives_by_patient"] == 0
patient_likelihood = np.where(
all_negative,
Expand Down Expand Up @@ -948,9 +954,9 @@ def simulate(self, values, data, simulation_mode):
data : dict
Data from which the posterior samples were inferred.
simulation_mode : SimulationMode
Whether to simulate only the lowest level of the hierarchical model (e.g. generate new
data from existing groups) or simulate the entire hierarchy (e.g. generate new data from
new groups).
Whether to simulate only the lowest level of the hierarchical model (e.g.
generate new data from existing groups) or simulate the entire hierarchy
(e.g. generate new data from new groups).
Returns
-------
Expand Down
Loading

0 comments on commit 270fcf8

Please sign in to comment.