Merge pull request #350 from lincc-frameworks/delucchi/docs

Tidy up some docstrings.
lincc-frameworks · Jan 17, 2024 · a3de1be · a3de1be
2 parents f69be16 + e4c87f7
commit a3de1be
Show file tree

Hide file tree

Showing 7 changed files with 137 additions and 103 deletions.
diff --git a/.gitignore b/.gitignore
@@ -71,6 +71,7 @@ instance/
 
 # Sphinx documentation
 docs/_build/
+docs/autoapi/
 _readthedocs/
 
 # PyBuilder

diff --git a/docs/Makefile b/docs/Makefile
@@ -4,7 +4,7 @@
 # You can set these variables from the command line, and also
 # from the environment for the first two.
 SPHINXOPTS    ?= -T -E -d _build/doctrees -D language=en
-EXCLUDENB     ?= -D exclude_patterns="notebooks/*","_build","**.ipynb_checkpoints"
+EXCLUDENB     ?= -D exclude_patterns="notebooks/*","_build","**.ipynb_checkpoints","**.ipynb"
 SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     = .
 BUILDDIR      = ../_readthedocs/

diff --git a/src/tape/analysis/base.py b/src/tape/analysis/base.py
@@ -68,15 +68,14 @@ def meta(self, ens: "Ensemble"):
 
     @abstractmethod
     def on(self, ens: "Ensemble") -> List[str]:
-        """
-        Return the columns to group source table by.
+        """Return the columns to group source table by.
 
         Parameters
         ----------
         ens : Ensemble
             The ensemble object.
 
-        Returns:
+        Returns
         --------
         List[str]
             The column names to group by. Typically, `[ens._id_col]`.

diff --git a/src/tape/analysis/stetsonj.py b/src/tape/analysis/stetsonj.py
@@ -51,7 +51,7 @@ def __call__(
         stetsonJ : `dict`
             StetsonJ statistic for each of input bands.
 
-        Notes
+        Note
         ----------
         In case that no value for `band_to_calc` is passed, the function is
         executed on all available bands in `band`.
@@ -113,7 +113,7 @@ def _stetson_J_single(fluxes, errors):
     .. [1] Stetson, P. B., "On the Automatic Determination of Light-Curve
     Parameters for Cepheid Variables", PASP, 108, 851S, 1996
 
-    Notes
+    Note
     ----------
     Taken from
     https://github.com/lsst/meas_base/blob/main/python/lsst/meas/base/diaCalculationPlugins.py
@@ -168,7 +168,7 @@ def _stetson_J_mean(values, errors, mean=None, alpha=2.0, beta=2.0, n_iter=20, t
     .. [1] Stetson, P. B., "On the Automatic Determination of Light-Curve
     Parameters for Cepheid Variables", PASP, 108, 851S, 1996
 
-    Notes
+    Note
     ----------
     Taken from
     https://github.com/lsst/meas_base/blob/main/python/lsst/meas/base/diaCalculationPlugins.py

diff --git a/src/tape/ensemble.py b/src/tape/ensemble.py
@@ -104,18 +104,19 @@ def add_frame(self, frame, label):
 
         Parameters
         ----------
-        frame: `tape.ensemble.EnsembleFrame`
+        frame: `tape.ensemble_frame.EnsembleFrame`
             The frame object for the Ensemble to track.
         label: `str`
-        |   The label for the Ensemble to use to track the frame.
+            The label for the Ensemble to use to track the frame.
 
         Returns
         -------
-        self: `Ensemble`
+        Ensemble
 
         Raises
         ------
-        ValueError if the label is "source", "object", or already tracked by the Ensemble.
+        ValueError
+            if the label is "source", "object", or already tracked by the Ensemble.
         """
         if label == SOURCE_FRAME_LABEL or label == OBJECT_FRAME_LABEL:
             raise ValueError(f"Unable to add frame with reserved label " f"'{label}'")
@@ -138,12 +139,13 @@ def update_frame(self, frame):
 
         Returns
         -------
-        self: `Ensemble`
+        Ensemble
 
         Raises
         ------
-        ValueError if the `frame.label` is unpopulated, or if the frame is not a SourceFrame or ObjectFrame
-        but uses the reserved labels.
+        ValueError
+            if the `frame.label` is unpopulated, or if the frame is not a SourceFrame or ObjectFrame
+            but uses the reserved labels.
         """
         if frame.label is None:
             raise ValueError(f"Unable to update frame with no populated `EnsembleFrame.label`.")
@@ -167,16 +169,18 @@ def drop_frame(self, label):
         Parameters
         ----------
         label: `str`
-        |   The label of the frame to be dropped by the Ensemble.
+            The label of the frame to be dropped by the Ensemble.
 
         Returns
         -------
-        self: `Ensemble`
+        Ensemble
 
         Raises
         ------
-        ValueError if the label is "source", or "object".
-        KeyError if the label is not tracked by the Ensemble.
+        ValueError
+            if the label is "source", or "object".
+        KeyError
+            if the label is not tracked by the Ensemble.
         """
         if label == SOURCE_FRAME_LABEL or label == OBJECT_FRAME_LABEL:
             raise ValueError(f"Unable to drop frame with reserved label " f"'{label}'")
@@ -191,15 +195,16 @@ def select_frame(self, label):
         Parameters
         ----------
         label: `str`
-        |   The label of a frame tracked by the Ensemble to be selected.
+            The label of a frame tracked by the Ensemble to be selected.
 
         Returns
         -------
-        result: `tape.ensemble.EnsembleFrame`
+        tape.ensemble.EnsembleFrame
 
         Raises
         ------
-        KeyError if the label is not tracked by the Ensemble.
+        KeyError
+            if the label is not tracked by the Ensemble.
         """
         if label not in self.frames:
             raise KeyError(
@@ -229,7 +234,8 @@ def frame_info(self, labels=None, verbose=True, memory_usage=True, **kwargs):
 
         Raises
         ------
-        KeyError if a label in labels is not tracked by the Ensemble.
+        KeyError
+            if a label in labels is not tracked by the Ensemble.
         """
         if labels is None:
             labels = self.frames.keys()
@@ -265,7 +271,7 @@ def insert_sources(
     ):
         """Manually insert sources into the ensemble.
 
-        Requires, at a minimum, the object’s ID and the band, timestamp,
+        Requires, at a minimum, the object's ID and the band, timestamp,
         and flux of the observation.
 
         Note
@@ -364,6 +370,7 @@ def info(self, verbose=True, memory_usage=True, **kwargs):
         memory_usage: `bool`, optional
             Specifies whether total memory usage of the DataFrame elements
             (including the index) should be displayed.
+
         Returns
         ----------
         None
@@ -377,8 +384,7 @@ def info(self, verbose=True, memory_usage=True, **kwargs):
         self.source.info(verbose=verbose, memory_usage=memory_usage, **kwargs)
 
     def check_sorted(self, table="object"):
-        """Checks to see if an Ensemble Dataframe is sorted (increasing) on
-        the index.
+        """Checks to see if an Ensemble Dataframe is sorted (increasing) on the index.
 
         Parameters
         ----------
@@ -387,8 +393,8 @@ def check_sorted(self, table="object"):
 
         Returns
         -------
-        A boolean value indicating whether the index is sorted (True)
-        or not (False)
+        boolean
+            indicating whether the index is sorted (True) or not (False)
         """
         if table == "object":
             idx = self.object.index
@@ -412,10 +418,10 @@ def check_lightcurve_cohesion(self):
 
         Returns
         -------
-        A boolean value indicating whether the sources tied to a given object
-        are only found in a single partition (True), or if they are split
-        across multiple partitions (False)
-
+        boolean
+            indicates whether the sources tied to a given object are only found
+            in a single partition (True), or if they are split across multiple
+            partitions (False)
         """
         idx = self.source.index
         counts = idx.map_partitions(lambda a: Counter(a.unique())).compute()
@@ -440,8 +446,9 @@ def compute(self, table=None, **kwargs):
 
         Returns
         -------
-        A single pandas data frame for the specified table or a tuple of (object, source)
-        data frames.
+        `pd.Dataframe`
+            A single pandas data frame for the specified table or a tuple of
+            (object, source) data frames.
         """
         if table:
             self._lazy_sync_tables(table)
@@ -559,14 +566,17 @@ def query(self, expr, table="object"):
 
         Examples
         --------
-        # Keep sources with flux above 100.0:
-        ens.query("flux > 100", table="source")
+        Keep sources with flux above 100.0::
 
-        # Keep sources in the green band:
-        ens.query("band_col_name == 'g'", table="source")
+            ens.query("flux > 100", table="source")
 
-        # Filtering on the flux column without knowing its name:
-        ens.query(f"{ens._flux_col} > 100", table="source")
+        Keep sources in the green band::
+
+            ens.query("band_col_name == 'g'", table="source")
+
+        Filtering on the flux column without knowing its name::
+
+            ens.query(f"{ens._flux_col} > 100", table="source")
         """
         self._lazy_sync_tables(table)
         if table == "object":
@@ -622,11 +632,13 @@ def assign(self, table="object", temporary=False, **kwargs):
 
         Examples
         --------
-        # Direct assignment of my_series to a column named "new_column".
-        ens.assign(table="object", new_column=my_series)
+        Direct assignment of my_series to a column named "new_column"::
+
+            ens.assign(table="object", new_column=my_series)
+
+        Subtract the value in "err" from the value in "flux"::
 
-        # Subtract the value in "err" from the value in "flux".
-        ens.assign(table="source", lower_bnd=lambda x: x["flux"] - 2.0 * x["err"])
+            ens.assign(table="source", lower_bnd=lambda x: x["flux"] - 2.0 * x["err"])
         """
         self._lazy_sync_tables(table)
 
@@ -869,12 +881,12 @@ def bin_sources(
         Notes
         -----
         * This should only be used for slowly varying sources where we can
-        treat the source as constant within `time_window`.
+          treat the source as constant within `time_window`.
 
         * As a default the function only aggregates and keeps the id, band,
-        time, flux, and flux error columns. Additional columns can be preserved
-        by providing the mapping of column name to aggregation function with the
-        `additional_cols` parameter.
+          time, flux, and flux error columns. Additional columns can be preserved
+          by providing the mapping of column name to aggregation function with the
+          `additional_cols` parameter.
         """
         self._lazy_sync_tables(table="source")
 
@@ -991,31 +1003,28 @@ def batch(self, func, *args, meta=None, by_band=False, use_map=True, on=None, la
 
         Examples
         --------
-        Run a TAPE function on the ensemble:
-        ```
-        from tape.analysis.stetsonj import calc_stetson_J
-        ens = Ensemble().from_dataset('rrlyr82')
-        ensemble.batch(calc_stetson_J, band_to_calc='i')
-        ```
-
-        Run a light-curve function on the ensemble:
-        ```
-        from light_curve import EtaE
-        ens.batch(EtaE(), band_to_calc='g')
-        ```
-
-        Run a custom function on the ensemble:
-        ```
-        def s2n_inter_quartile_range(flux, err):
-             first, third = np.quantile(flux / err, [0.25, 0.75])
-             return third - first
-
-        ens.batch(s2n_inter_quartile_range, ens._flux_col, ens._err_col)
-        ```
-        Or even a numpy built-in function:
-        ```
-        amplitudes = ens.batch(np.ptp, ens._flux_col)
-        ```
+        Run a TAPE function on the ensemble::
+
+            from tape.analysis.stetsonj import calc_stetson_J
+            ens = Ensemble().from_dataset('rrlyr82')
+            ensemble.batch(calc_stetson_J, band_to_calc='i')
+
+        Run a light-curve function on the ensemble::
+
+            from light_curve import EtaE
+            ens.batch(EtaE(), band_to_calc='g')
+
+        Run a custom function on the ensemble::
+
+            def s2n_inter_quartile_range(flux, err):
+            first, third = np.quantile(flux / err, [0.25, 0.75])
+            return third - first
+
+            ens.batch(s2n_inter_quartile_range, ens._flux_col, ens._err_col)
+
+        Or even a numpy built-in function::
+
+            amplitudes = ens.batch(np.ptp, ens._flux_col)
         """
 
         self._lazy_sync_tables(table="all")
@@ -1507,6 +1516,7 @@ def from_dask_dataframe(
 
     def from_hipscat(self, dir, source_subdir="source", object_subdir="object", column_mapper=None, **kwargs):
         """Read in parquet files from a hipscat-formatted directory structure
+
         Parameters
         ----------
         dir: 'str'
@@ -1900,7 +1910,7 @@ def _lazy_sync_tables_from_frame(self, frame):
 
         Parameters
         ----------
-        frame: `tape.EnsembleFrame`
+        frame: `tape.ensemble_frame.EnsembleFrame`
             The frame being modified. Only an `ObjectFrame` or
             `SourceFrame tracked by this `Ensemble` may trigger
             a sync.
@@ -2144,7 +2154,7 @@ def sf2(self, sf_method="basic", argument_container=None, use_map=True):
         result : `pandas.DataFrame`
             Structure function squared for each of input bands.
 
-        Notes
+        Note
         ----------
         In case that no value for `band_to_calc` is passed, the function is
         executed on all available bands in `band`.
@@ -2186,7 +2196,7 @@ def _translate_meta(self, meta):
         Returns
         ----------
         result : `ensemble.TapeFrame` or `ensemble.TapeSeries`
-            The appropriate meta for Dask producing an `Ensemble.EnsembleFrame` or
+            The appropriate meta for Dask producing an `tape.ensemble_frame.EnsembleFrame` or
             `Ensemble.EnsembleSeries` respectively
         """
         if isinstance(meta, TapeFrame) or isinstance(meta, TapeSeries):