Fix line lengths.

tillahoffmann · Mar 22, 2024 · 270fcf8 · 270fcf8
1 parent a9f2f93
commit 270fcf8
Show file tree

Hide file tree

Showing 6 changed files with 103 additions and 87 deletions.
diff --git a/convert_cycle_threshold.py b/convert_cycle_threshold.py
@@ -8,7 +8,8 @@
     # See 2nd paragraph, 2nd column, first page of main text (10.1001/jama.2020.3786).
     "Wang2020-thresholds": [30, 24.3],
     "Wang2020-loads": np.log10([2.6e4, 1.4e6]),
-    # See supplementary table 4 (10.3346/jkms.2020.35.e86); conversion is for respiratory specimen.
+    # See supplementary table 4 (10.3346/jkms.2020.35.e86); conversion is for
+    # respiratory specimen.
     "Kim2020b-thresholds": [25.05, 35.09],
     "Kim2020b-loads": np.log10([46971053, 61341]),
 }

diff --git a/setup.cfg b/setup.cfg
@@ -1,6 +1,7 @@
 [flake8]
-max-line-length = 100
+max-line-length = 88
 exclude = PolyChordLite,.eggs
+extend-ignore = E203
 
 [rstcheck]
 ignore_directives=jinja

diff --git a/shedding/data.py b/shedding/data.py
@@ -54,41 +54,43 @@ def flatten_datasets(datasets, loq_fill_value=np.nan, day_fill_value=np.nan):
     datasets : dict
         Mapping of datasets.
     loq_fill_value : float
-        Fill value to replace loads below the level of quantification (LOQ) as log10 gene copies per
-        mL.
+        Fill value to replace loads below the level of quantification (LOQ) as log10
+        gene copies per mL.
     day_fill_value : float
-        Fill value to replace missing information about days since symptoms began as days.
+        Fill value to replace missing information about days since symptoms began as
+        days.
 
     Returns
     -------
     data : dict
         Data obtained by flattening datasets, including
 
         * **num_patients** (`int`): Total number of patients across all datasets.
-        * **num_samples** (`ndarray[int]<num_patients>`): Total number of samples across all
-          datasets.
-        * **num_samples_by_patient** (`ndarray[int]<num_patients>`): Number of samples for each
-          patient.
-        * **num_positives_by_patient** (`ndarray[int]<num_patients>`): Number of positive samples
+        * **num_samples** (`ndarray[int]<num_patients>`): Total number of samples
+          across all datasets.
+        * **num_samples_by_patient** (`ndarray[int]<num_patients>`): Number of samples
           for each patient.
-        * **num_negatives_by_patient** (`ndarray[int]<num_patients>`): Number of negative samples
-          for each patient.
-        * **idx** (`ndarray[int]<num_samples>`): Index of the patient from whom each sample was
-          collected.
-        * **load** (`ndarray[float]<num_samples>`): Viral RNA load for each sample as gene copies
-          per mL or `10 ** loq_fill_value` if the concentration is below the level of
-          quantification.
+        * **num_positives_by_patient** (`ndarray[int]<num_patients>`): Number of
+          positive samples for each patient.
+        * **num_negatives_by_patient** (`ndarray[int]<num_patients>`): Number of
+          negative samples for each patient.
+        * **idx** (`ndarray[int]<num_samples>`): Index of the patient from whom each
+          sample was collected.
+        * **load** (`ndarray[float]<num_samples>`): Viral RNA load for each sample as
+          gene copies per mL or `10 ** loq_fill_value` if the concentration is below the
+          level of quantification.
         * **loadln** (`ndarray[float]<num_samples>`): Natural logarithm of **load**.
-        * **loq** (`ndarray[float]<num_samples>`): Level of quantification for each sample as gene
-          copies per mL.
+        * **loq** (`ndarray[float]<num_samples>`): Level of quantification for each
+          sample as gene copies per mL.
         * **loqln** (`ndarray[float]<num_samples>`): Natural logarithm of **loq**.
-        * **positive** (`ndarray[bool]<num_samples>`): Indicator for each sample as to whether the
-          RNA load is above the level of quantification.
-        * **day** (`ndarray[int]<num_samples>`): Day after symptom onset on which the sample was
-          collected or `day_fill_value` if unavailable.
-        * **dataset** (`ndarray[str]<num_samples>`): Dataset from which each sample was obtained.
-        * **patient** (`ndarray[int]<num_samples>`): Patient from which each sample was obtained as
-          reported in the corresponding dataset.
+        * **positive** (`ndarray[bool]<num_samples>`): Indicator for each sample as to
+          whether the RNA load is above the level of quantification.
+        * **day** (`ndarray[int]<num_samples>`): Day after symptom onset on which the
+          sample was collected or `day_fill_value` if unavailable.
+        * **dataset** (`ndarray[str]<num_samples>`): Dataset from which each sample was
+          obtained.
+        * **patient** (`ndarray[int]<num_samples>`): Patient from which each sample was
+          obtained as reported in the corresponding dataset.
     """
     # Lookup for integer identifiers for each patient
     patient_lookup = {}

diff --git a/shedding/model.py b/shedding/model.py
@@ -6,7 +6,8 @@
 from scipy import integrate, special
 from .util import flush_traceback, logmeanexp
 
-# Importing _util breaks the readthedocs build. Omit if when we're building the documentation.
+# Importing _util breaks the readthedocs build. Omit if when we're building the
+# documentation.
 import os
 
 if not os.environ.get("READTHEDOCS"):  # pragma: no cover
@@ -132,7 +133,8 @@ def vector_to_values(parameters, vector):
 
 def transpose_samples(samples, parameters=None):
     """
-    Transpose samples from a list of dictionaries to a dictionary of lists and vice versa.
+    Transpose samples from a list of dictionaries to a dictionary of lists and vice
+    versa.
 
     Parameters
     ----------
@@ -277,7 +279,8 @@ def gengamma_mean(q, mu, sigma, qmin=1e-6):
 
 def _gengamma_loc(q, sigma, mean, qmin=1e-6):
     """
-    Evaluate the scale of the generalised gamma distribution for given shape, exponent, and mean.
+    Evaluate the scale of the generalised gamma distribution for given shape, exponent,
+    and mean.
     """
     a = 1 / q**2
     cinv = sigma / q
@@ -394,8 +397,8 @@ def from_uniform(cls, uniform, lower, upper, base):
 
 def from_abc(a, b, c):
     """
-    Transform from the parametrisation in terms of shape, scale, and exponent to Stacey's
-    parametrisation.
+    Transform from the parametrisation in terms of shape, scale, and exponent to
+    Stacey's parametrisation.
     """
     q = 1 / np.sqrt(a)
     mu = np.log(a / b) / c
@@ -405,8 +408,8 @@ def from_abc(a, b, c):
 
 def to_abc(q, mu, sigma):
     """
-    Transform from Stacey's parametrisation to the parametrisation in terms of shape, scale, and
-    exponent.
+    Transform from Stacey's parametrisation to the parametrisation in terms of shape,
+    scale, and exponent.
     """
     a = 1 / q**2
     c = q / sigma
@@ -429,8 +432,8 @@ class Profile(enum.Enum):
 
     def evaluate_offset(self, day, values):
         """
-        Evaluate the offset from the population-level location parameter due to the shedding
-        profile.
+        Evaluate the offset from the population-level location parameter due to the
+        shedding profile.
 
         Parameters
         ----------
@@ -459,15 +462,15 @@ def evaluate_offset(self, day, values):
 
 class SimulationMode(enum.Enum):
     """
-    Enum to indicate whether simulation should be for `EXISTING_PATIENTS` or `NEW_PATIENTS`. Given
-    posterior samples, the former can be used to assess the fit of the model to samples collected
-    from new, unknown patients, and the latter can be used to assess the fit to new samples
-    collected from existing patients.
+    Enum to indicate whether simulation should be for `EXISTING_PATIENTS` or
+    `NEW_PATIENTS`. Given posterior samples, the former can be used to assess the fit of
+    the model to samples collected from new, unknown patients, and the latter can be
+    used to assess the fit to new samples collected from existing patients.
 
     Notes
     -----
-    The `NEW_PATIENTS` replication mode can be used to simulate data given hyperparameters at the
-    population level.
+    The `NEW_PATIENTS` replication mode can be used to simulate data given
+    hyperparameters at the population level.
     """
 
     NEW_PATIENTS = "new_patients"
@@ -476,9 +479,9 @@ class SimulationMode(enum.Enum):
 
 class Transformation:
     r"""
-    A bijective transformation :math:`y = f(x)` for regular variables :math:`x` and transformed
-    variables :math:`y`. For sampling purposes, the transformation induces a volume compression or
-    expansion that needs to be accounted for, i.e.
+    A bijective transformation :math:`y = f(x)` for regular variables :math:`x` and
+    transformed variables :math:`y`. For sampling purposes, the transformation induces a
+    volume compression or expansion that needs to be accounted for, i.e.
 
     .. math::
 
@@ -499,8 +502,8 @@ def inverse(self, values):
 
     def jacobianlogdet(self, values):
         """
-        Evaluate the logarithm of the Jacobian determinant to account for sampling in the
-        transformed space rather than the regular space.
+        Evaluate the logarithm of the Jacobian determinant to account for sampling in
+        the transformed space rather than the regular space.
         """
         raise NotImplementedError
 
@@ -546,8 +549,8 @@ def jacobianlogdet(self, values):
 
 def _augment_values(func):
     """
-    Decorator to augment the values by adding the population and patient shapes based on the
-    parametrisation.
+    Decorator to augment the values by adding the population and patient shapes based on
+    the parametrisation.
     """
 
     @ft.wraps(func)
@@ -593,28 +596,30 @@ class Model:
     parametrisation : Parametrisation
         Parametrisation used by the model (see notes for details).
     inflated : bool
-        Whether there is a "zero-inflated" subpopulation of patients who do not shed RNA.
+        Whether there is a "zero-inflated" subpopulation of patients who do not shed
+        RNA.
     temporal : str
         Whether to include a shedding profile in the fit.
     priors : dict
         Mapping of parameter names to callable priors.
 
     Notes
     -----
-    The generalised gamma distribution is a versatile distribution for positive continuous data. We
-    use the parametrisation considered by Stacey with shape parameter :math:`q`, location parameter
-    :math:`\mu`, and scale parameter :math:`\sigma`. Then if :math:`\gamma` follows a gamma
-    distribution with shape parameter :math:`a=1/q^2`, the random variable
+    The generalised gamma distribution is a versatile distribution for positive
+    continuous data. We use the parametrisation considered by Stacey with shape
+    parameter :math:`q`, location parameter :math:`\mu`, and scale parameter
+    :math:`\sigma`. Then if :math:`\gamma` follows a gamma distribution with shape
+    parameter :math:`a=1/q^2`, the random variable
 
     .. math ::
        x = \exp\left(\mu + \frac{\sigma}{q}\log\left(q^2\gamma\right)\right)
 
     follows a generalised gamma distribution.
 
-    The generalised gamma distribution includes the regular gamma distribution (:math:`\sigma=q`),
-    Weibull distribution (:math:`q=1`), and lognormal distribution (:math:`q=0`) as special cases.
-    The model can be restricted to a particular distribution using the :code:`parametrisation`
-    parameter.
+    The generalised gamma distribution includes the regular gamma distribution
+    (:math:`\sigma=q`), Weibull distribution (:math:`q=1`), and lognormal distribution
+    (:math:`q=0`) as special cases. The model can be restricted to a particular
+    distribution using the :code:`parametrisation` parameter.
     """
 
     def __init__(
@@ -646,8 +651,8 @@ def __init__(
         if self.inflated:
             self.parameters["rho"] = ()
         if self.temporal == Profile.GAMMA:
-            # Parameters for the gamma distribution. Shape and scale as usual. Offset is just an
-            # overall shift.
+            # Parameters for the gamma distribution. Shape and scale as usual. Offset is
+            # just an overall shift.
             self.parameters.update(
                 {
                     "profile_offset": (),
@@ -734,8 +739,8 @@ def sample_params(self, values):
     @_augment_values
     def _evaluate_sample_log_likelihood(self, values, data, i: int = None):
         """
-        Evaluate the log likelihood for each sample conditional on all model parameters, assuming
-        that all patients shed virus.
+        Evaluate the log likelihood for each sample conditional on all model parameters,
+        assuming that all patients shed virus.
 
         Parameters
         ----------
@@ -773,8 +778,8 @@ def _evaluate_sample_log_likelihood(self, values, data, i: int = None):
     @_augment_values
     def _evaluate_patient_log_likelihood(self, values, data):
         """
-        Evaluate the log likelihood for each patient conditional on all model parameters, assuming
-        that all patients shed virus.
+        Evaluate the log likelihood for each patient conditional on all model
+        parameters, assuming that all patients shed virus.
         """
         result = self._evaluate_sample_log_likelihood(values, data)
         return np.bincount(data["idx"], result, minlength=data["num_patients"])
@@ -804,8 +809,8 @@ def evaluate_marginal_log_likelihood(
         self, values, data, n=1000, eps=1e-6, **kwargs
     ):
         """
-        Evaluate the log likelihood of the observed data marginalised with respect to group-level
-        parameters but conditional on hyperparameters.
+        Evaluate the log likelihood of the observed data marginalised with respect to
+        group-level parameters but conditional on hyperparameters.
 
         Parameters
         ----------
@@ -814,8 +819,8 @@ def evaluate_marginal_log_likelihood(
         data : dict
             Data from which the posterior samples were inferred.
         n : int or str
-            Number of samples to use if simulation is required to evaluate the likelihood or `scipy`
-            for numerical integration.
+            Number of samples to use if simulation is required to evaluate the
+            likelihood or `scipy` for numerical integration.
         eps: float
             Value to clip the patient mean at to avoid underflows.
 
@@ -837,8 +842,8 @@ def evaluate_marginal_log_likelihood(
                 ),
                 eps,
             )
-            # Evaluate the sample log likelihood and marginalise with respect to the patient-level
-            # attributes
+            # Evaluate the sample log likelihood and marginalise with respect to the
+            # patient-level attributes.
             sample_likelihood = self._evaluate_sample_log_likelihood(values, data)
             sample_likelihood = logmeanexp(sample_likelihood, axis=0)
 
@@ -881,9 +886,10 @@ def _func(x, patient):
         if not self.inflated:
             return patient_likelihood
 
-        # Patients that have all-negative samples may be non-shedders. So the data are either
-        # generated by having some latent indicator z==0 or z==1 but the samples are too small to be
-        # above the LOQ. So we need to evaluate the mixture distribution.
+        # Patients that have all-negative samples may be non-shedders. So the data are
+        # either generated by having some latent indicator z==0 or z==1 but the samples
+        # are too small to be above the LOQ. So we need to evaluate the mixture
+        # distribution.
         all_negative = data["num_positives_by_patient"] == 0
         patient_likelihood = np.where(
             all_negative,
@@ -948,9 +954,9 @@ def simulate(self, values, data, simulation_mode):
         data : dict
             Data from which the posterior samples were inferred.
         simulation_mode : SimulationMode
-            Whether to simulate only the lowest level of the hierarchical model (e.g. generate new
-            data from existing groups) or simulate the entire hierarchy (e.g. generate new data from
-            new groups).
+            Whether to simulate only the lowest level of the hierarchical model (e.g.
+            generate new data from existing groups) or simulate the entire hierarchy
+            (e.g. generate new data from new groups).
 
         Returns
         -------