JP-1136: Compute scaling for WFSS background subtraction using error-…

…weighted mean (#8990)
spacetelescope · Dec 17, 2024 · fc41e18 · fc41e18
1 parent 5a914c6
commit fc41e18
Show file tree

Hide file tree

Showing 9 changed files with 700 additions and 415 deletions.
diff --git a/changes/8990.background.rst b/changes/8990.background.rst
@@ -0,0 +1 @@
+Compute scaling for WFSS background subtraction using error-weighted mean
diff --git a/docs/jwst/background_step/arguments.rst b/docs/jwst/background_step/arguments.rst
@@ -27,3 +27,21 @@ control the sigma clipping, and are passed as arguments to the astropy
   Sets the minimum (faintest) magnitude limit to use when selecting sources
   from the WFSS source catalog, based on the value of `isophotal_abmag` in the
   source catalog. Defaults to ``None``.
+
+``--wfss_maxiter``
+  Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures.
+  Sets the maximum number of iterations allowed for iterative outlier rejection
+  during determination of the reference background scaling factor. Defaults to 5.
+
+``--wfss_rms_stop``
+  Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures.
+  If the percentage difference in the RMS of the background-subtracted image
+  between iterations is smaller than this value, stop the iterative outlier
+  rejection process.
+  Defaults to 0, i.e., do all iterations up to ``wfss_maxiter``.
+
+``--wfss_outlier_percent``
+  Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures.
+  Sets the percentile of outliers in the data to reject on both the low and high end
+  per iteration during determination of the reference background scaling factor
+  Defaults to 1, i.e., keep the middle 98 percent of the data each iteration.
diff --git a/docs/jwst/background_step/description.rst b/docs/jwst/background_step/description.rst
@@ -87,7 +87,15 @@ For Wide-Field Slitless Spectroscopy expsoures (NIS_WFSS and NRC_WFSS),
 a background reference image is subtracted from the target exposure.
 Before being subtracted, the background reference image is scaled to match the
 signal level of the WFSS image within background (source-free) regions of the
-image. 
+image. The scaling factor is determined based on the variance-weighted mean
+of the science data, i.e., ``factor = sum(sci*bkg/var) / sum(bkg*bkg/var)``.
+This factor is equivalent to solving for the scaling constant applied to the
+reference background that gives the maximum likelihood of matching 
+the science data.
+Outliers are rejected iteratively during determination of the scaling factor
+in order to avoid biasing the scaling factor based on outliers. The iterative
+rejection process is controlled by the
+``wfss_outlier_percent``, ``wfss_rms_stop``, and ``wfss_maxiter`` step arguments.
 
 The locations of source spectra are determined from a source catalog (specified
 by the primary header keyword SCATFILE), in conjunction with a reference file
@@ -99,15 +107,6 @@ abmag of the source catalog objects used to define the background regions.
 The default is to use all source catalog entries that result in a spectrum
 falling within the WFSS image.
 
-Robust mean values are obtained for the background regions in the WFSS image and
-for the same regions in the background reference image, and the ratio of those two
-mean values is used to scale the background reference image. The robust mean is
-computed by excluding the lowest 25% and highest 25% of the data (using the
-numpy.percentile function), and taking a simple arithmetic mean of the
-remaining values.  Note that NaN values (if any) in the background
-reference image are currently set to zero.  If there are a lot of NaNs,
-it may be that more than 25% of the lowest values will need to be excluded.
-
 For both background methods the output results are always returned in a new
 data model, leaving the original input model unchanged.
 

diff --git a/jwst/background/background_step.py b/jwst/background/background_step.py
@@ -2,7 +2,8 @@
 from stdatamodels.jwst import datamodels
 
 from ..stpipe import Step
-from . import background_sub
+from .background_sub import background_sub
+from .background_sub_wfss import subtract_wfss_bkg
 import numpy as np
 __all__ = ["BackgroundStep"]
 
@@ -19,6 +20,9 @@ class BackgroundStep(Step):
         sigma = float(default=3.0)  # Clipping threshold
         maxiters = integer(default=None)  # Number of clipping iterations
         wfss_mmag_extract = float(default=None)  # WFSS minimum abmag to extract
+        wfss_maxiter = integer(default=5)  # WFSS iterative outlier rejection max iterations
+        wfss_rms_stop = float(default=0)  # WFSS iterative outlier rejection RMS improvement threshold (percent)
+        wfss_outlier_percent = float(default=1)  # WFSS outlier percentile to reject per iteration
     """
 
     # These reference files are only used for WFSS/GRISM data.
@@ -60,8 +64,16 @@ def process(self, input, bkg_list):
                               wlrange_name)
 
                 # Do the background subtraction for WFSS/GRISM data
-                result = background_sub.subtract_wfss_bkg(
-                    input_model, bkg_name, wlrange_name, self.wfss_mmag_extract)
+                rescaler_kwargs = {"p": self.wfss_outlier_percent,
+                                   "maxiter": self.wfss_maxiter,
+                                   "delta_rms_thresh": self.wfss_rms_stop/100,
+                                   }
+                result = subtract_wfss_bkg(
+                    input_model,
+                    bkg_name,
+                    wlrange_name,
+                    self.wfss_mmag_extract,
+                    rescaler_kwargs=rescaler_kwargs)
                 if result is None:
                     result = input_model
                     result.meta.cal_step.back_sub = 'SKIPPED'
@@ -88,10 +100,10 @@ def process(self, input, bkg_list):
                                 break
                 # Do the background subtraction
                 if do_sub:
-                    bkg_model, result = background_sub.background_sub(input_model,
-                                                                      bkg_list,
-                                                                      self.sigma,
-                                                                      self.maxiters)
+                    bkg_model, result = background_sub(input_model,
+                                                       bkg_list,
+                                                       self.sigma,
+                                                       self.maxiters)
                     result.meta.cal_step.back_sub = 'COMPLETE'
                     if self.save_combined_background:
                         comb_bkg_path = self.save_model(bkg_model, suffix=self.bkg_suffix, force=True)

diff --git a/jwst/background/background_sub.py b/jwst/background/background_sub.py
@@ -1,13 +1,10 @@
 import copy
-import math
 import numpy as np
 import warnings
 
 from stdatamodels.jwst import datamodels
-from stdatamodels.jwst.datamodels.dqflags import pixel
 
 from . import subtract_images
-from ..assign_wcs.util import create_grism_bbox
 from astropy.stats import sigma_clip
 from astropy.utils.exceptions import AstropyUserWarning
 
@@ -270,194 +267,3 @@ def average_background(input_model, bkg_list, sigma, maxiters):
     avg_bkg.err = (np.sqrt(merr.sum(axis=0)) / (num_bkg - merr.mask.sum(axis=0))).data
 
     return avg_bkg
-
-
-def sufficient_background_pixels(dq_array, bkg_mask, min_pixels=100):
-    """Count number of good pixels for background use.
-
-    Check DQ flags of pixels selected for bkg use - XOR the DQ values with
-    the DO_NOT_USE flag to flip the DO_NOT_USE bit. Then count the number
-    of pixels that AND with the DO_NOT_USE flag, i.e. initially did not have
-    the DO_NOT_USE bit set.
-    """
-    return np.count_nonzero((dq_array[bkg_mask]
-                            ^ pixel['DO_NOT_USE'])
-                            & pixel['DO_NOT_USE']
-                            ) > min_pixels
-
-
-def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=None):
-    """Scale and subtract a background reference image from WFSS/GRISM data.
-
-    Parameters
-    ----------
-    input_model : JWST data model
-        input target exposure data model
-
-    bkg_filename : str
-        name of master background file for WFSS/GRISM
-
-    wl_range_name : str
-        name of wavelengthrange reference file
-
-    mmag_extract : float
-        minimum abmag of grism objects to extract
-
-    Returns
-    -------
-    result : JWST data model
-        background-subtracted target data model
-    """
-
-    bkg_ref = datamodels.open(bkg_filename)
-
-    if hasattr(input_model.meta, "source_catalog"):
-        got_catalog = True
-    else:
-        log.warning("No source_catalog found in input.meta.")
-        got_catalog = False
-
-    # If there are NaNs, we have to replace them with something harmless.
-    bkg_ref = no_NaN(bkg_ref)
-
-    # Create a mask from the source catalog, True where there are no sources,
-    # i.e. in regions we can use as background.
-    if got_catalog:
-        bkg_mask = mask_from_source_cat(input_model, wl_range_name, mmag_extract)
-        if not sufficient_background_pixels(input_model.dq, bkg_mask):
-            log.warning("Not enough background pixels to work with.")
-            log.warning("Step will be SKIPPED.")
-            return None
-    else:
-        bkg_mask = np.ones(input_model.data.shape, dtype=bool)
-    # Compute the mean values of science image and background reference
-    # image, including only regions where there are no identified sources.
-    # Exclude pixel values in the lower and upper 25% of the histogram.
-    lowlim = 25.
-    highlim = 75.
-    sci_mean = robust_mean(input_model.data[bkg_mask],
-                           lowlim=lowlim, highlim=highlim)
-    bkg_mean = robust_mean(bkg_ref.data[bkg_mask],
-                           lowlim=lowlim, highlim=highlim)
-
-    log.debug("mean of [{}, {}] percentile grism image = {}"
-              .format(lowlim, highlim, sci_mean))
-    log.debug("mean of [{}, {}] percentile background image = {}"
-              .format(lowlim, highlim, bkg_mean))
-
-    result = input_model.copy()
-    if bkg_mean != 0.:
-        subtract_this = (sci_mean / bkg_mean) * bkg_ref.data
-        result.data = input_model.data - subtract_this
-        log.info(f"Average of background image subtracted = {subtract_this.mean(dtype=float)}")
-    else:
-        log.warning("Background image has zero mean; nothing will be subtracted.")
-    result.dq = np.bitwise_or(input_model.dq, bkg_ref.dq)
-
-    bkg_ref.close()
-
-    return result
-
-
-def no_NaN(model, fill_value=0.):
-    """Replace NaNs with a harmless value.
-
-    Parameters
-    ----------
-    model : JWST data model
-        Reference file model.
-
-    fill_value : float
-        NaNs will be replaced with this value.
-
-    Returns
-    -------
-    result : JWST data model
-        Reference file model without NaNs in data array.
-    """
-
-    mask = np.isnan(model.data)
-    if mask.sum(dtype=np.intp) == 0:
-        return model
-    else:
-        temp = model.copy()
-        temp.data[mask] = fill_value
-        return temp
-
-
-def mask_from_source_cat(input_model, wl_range_name, mmag_extract=None):
-    """Create a mask that is False within bounding boxes of sources.
-
-    Parameters
-    ----------
-    input_model : JWST data model
-        input target exposure data model
-
-    wl_range_name : str
-        Name of the wavelengthrange reference file
-
-    mmag_extract : float
-        Minimum abmag of grism objects to extract
-
-    Returns
-    -------
-    bkg_mask : ndarray
-        Boolean mask:  True for background, False for pixels that are
-        inside at least one of the source regions defined in the source
-        catalog.
-    """
-
-    shape = input_model.data.shape
-    bkg_mask = np.ones(shape, dtype=bool)
-
-    reference_files = {"wavelengthrange": wl_range_name}
-    grism_obj_list = create_grism_bbox(input_model, reference_files, mmag_extract)
-
-    for obj in grism_obj_list:
-        order_bounding = obj.order_bounding
-        for order in order_bounding.keys():
-            ((ymin, ymax), (xmin, xmax)) = order_bounding[order]
-            xmin = int(math.floor(xmin))
-            xmax = int(math.ceil(xmax)) + 1  # convert to slice limit
-            ymin = int(math.floor(ymin))
-            ymax = int(math.ceil(ymax)) + 1
-            xmin = max(xmin, 0)
-            xmax = min(xmax, shape[-1])
-            ymin = max(ymin, 0)
-            ymax = min(ymax, shape[-2])
-            bkg_mask[..., ymin:ymax, xmin:xmax] = False
-
-    return bkg_mask
-
-
-def robust_mean(x, lowlim=25., highlim=75.):
-    """Compute a mean value, excluding outliers.
-
-    Parameters
-    ----------
-    x : ndarray
-        The array for which we want a mean value.
-
-    lowlim : float
-        The lower `lowlim` percent of the data will not be used when
-        computing the mean.
-
-    highlim : float
-        The upper `highlim` percent of the data will not be used when
-        computing the mean.
-
-    Returns
-    -------
-    mean_value : float
-        The mean of `x`, excluding data outside `lowlim` to `highlim`
-        percentile limits.
-    """
-
-    nan_mask = np.isnan(x)
-    cleaned_x = x[~nan_mask]
-    limits = np.percentile(cleaned_x, (lowlim, highlim))
-    mask = np.logical_and(cleaned_x >= limits[0], cleaned_x <= limits[1])
-
-    mean_value = np.mean(cleaned_x[mask], dtype=float)
-
-    return mean_value
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Compute scaling for WFSS background subtraction using error-weighted mean