Skip to content

Commit

Permalink
JP-1136: Compute scaling for WFSS background subtraction using error-…
Browse files Browse the repository at this point in the history
…weighted mean (#8990)
  • Loading branch information
emolter authored Dec 17, 2024
1 parent 5a914c6 commit fc41e18
Show file tree
Hide file tree
Showing 9 changed files with 700 additions and 415 deletions.
1 change: 1 addition & 0 deletions changes/8990.background.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Compute scaling for WFSS background subtraction using error-weighted mean
18 changes: 18 additions & 0 deletions docs/jwst/background_step/arguments.rst
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,21 @@ control the sigma clipping, and are passed as arguments to the astropy
Sets the minimum (faintest) magnitude limit to use when selecting sources
from the WFSS source catalog, based on the value of `isophotal_abmag` in the
source catalog. Defaults to ``None``.

``--wfss_maxiter``
Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures.
Sets the maximum number of iterations allowed for iterative outlier rejection
during determination of the reference background scaling factor. Defaults to 5.

``--wfss_rms_stop``
Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures.
If the percentage difference in the RMS of the background-subtracted image
between iterations is smaller than this value, stop the iterative outlier
rejection process.
Defaults to 0, i.e., do all iterations up to ``wfss_maxiter``.

``--wfss_outlier_percent``
Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures.
Sets the percentile of outliers in the data to reject on both the low and high end
per iteration during determination of the reference background scaling factor
Defaults to 1, i.e., keep the middle 98 percent of the data each iteration.
19 changes: 9 additions & 10 deletions docs/jwst/background_step/description.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,15 @@ For Wide-Field Slitless Spectroscopy expsoures (NIS_WFSS and NRC_WFSS),
a background reference image is subtracted from the target exposure.
Before being subtracted, the background reference image is scaled to match the
signal level of the WFSS image within background (source-free) regions of the
image.
image. The scaling factor is determined based on the variance-weighted mean
of the science data, i.e., ``factor = sum(sci*bkg/var) / sum(bkg*bkg/var)``.
This factor is equivalent to solving for the scaling constant applied to the
reference background that gives the maximum likelihood of matching
the science data.
Outliers are rejected iteratively during determination of the scaling factor
in order to avoid biasing the scaling factor based on outliers. The iterative
rejection process is controlled by the
``wfss_outlier_percent``, ``wfss_rms_stop``, and ``wfss_maxiter`` step arguments.

The locations of source spectra are determined from a source catalog (specified
by the primary header keyword SCATFILE), in conjunction with a reference file
Expand All @@ -99,15 +107,6 @@ abmag of the source catalog objects used to define the background regions.
The default is to use all source catalog entries that result in a spectrum
falling within the WFSS image.

Robust mean values are obtained for the background regions in the WFSS image and
for the same regions in the background reference image, and the ratio of those two
mean values is used to scale the background reference image. The robust mean is
computed by excluding the lowest 25% and highest 25% of the data (using the
numpy.percentile function), and taking a simple arithmetic mean of the
remaining values. Note that NaN values (if any) in the background
reference image are currently set to zero. If there are a lot of NaNs,
it may be that more than 25% of the lowest values will need to be excluded.

For both background methods the output results are always returned in a new
data model, leaving the original input model unchanged.

Expand Down
26 changes: 19 additions & 7 deletions jwst/background/background_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from stdatamodels.jwst import datamodels

from ..stpipe import Step
from . import background_sub
from .background_sub import background_sub
from .background_sub_wfss import subtract_wfss_bkg
import numpy as np
__all__ = ["BackgroundStep"]

Expand All @@ -19,6 +20,9 @@ class BackgroundStep(Step):
sigma = float(default=3.0) # Clipping threshold
maxiters = integer(default=None) # Number of clipping iterations
wfss_mmag_extract = float(default=None) # WFSS minimum abmag to extract
wfss_maxiter = integer(default=5) # WFSS iterative outlier rejection max iterations
wfss_rms_stop = float(default=0) # WFSS iterative outlier rejection RMS improvement threshold (percent)
wfss_outlier_percent = float(default=1) # WFSS outlier percentile to reject per iteration
"""

# These reference files are only used for WFSS/GRISM data.
Expand Down Expand Up @@ -60,8 +64,16 @@ def process(self, input, bkg_list):
wlrange_name)

# Do the background subtraction for WFSS/GRISM data
result = background_sub.subtract_wfss_bkg(
input_model, bkg_name, wlrange_name, self.wfss_mmag_extract)
rescaler_kwargs = {"p": self.wfss_outlier_percent,
"maxiter": self.wfss_maxiter,
"delta_rms_thresh": self.wfss_rms_stop/100,
}
result = subtract_wfss_bkg(
input_model,
bkg_name,
wlrange_name,
self.wfss_mmag_extract,
rescaler_kwargs=rescaler_kwargs)
if result is None:
result = input_model
result.meta.cal_step.back_sub = 'SKIPPED'
Expand All @@ -88,10 +100,10 @@ def process(self, input, bkg_list):
break
# Do the background subtraction
if do_sub:
bkg_model, result = background_sub.background_sub(input_model,
bkg_list,
self.sigma,
self.maxiters)
bkg_model, result = background_sub(input_model,
bkg_list,
self.sigma,
self.maxiters)
result.meta.cal_step.back_sub = 'COMPLETE'
if self.save_combined_background:
comb_bkg_path = self.save_model(bkg_model, suffix=self.bkg_suffix, force=True)
Expand Down
194 changes: 0 additions & 194 deletions jwst/background/background_sub.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
import copy
import math
import numpy as np
import warnings

from stdatamodels.jwst import datamodels
from stdatamodels.jwst.datamodels.dqflags import pixel

from . import subtract_images
from ..assign_wcs.util import create_grism_bbox
from astropy.stats import sigma_clip
from astropy.utils.exceptions import AstropyUserWarning

Expand Down Expand Up @@ -270,194 +267,3 @@ def average_background(input_model, bkg_list, sigma, maxiters):
avg_bkg.err = (np.sqrt(merr.sum(axis=0)) / (num_bkg - merr.mask.sum(axis=0))).data

return avg_bkg


def sufficient_background_pixels(dq_array, bkg_mask, min_pixels=100):
"""Count number of good pixels for background use.
Check DQ flags of pixels selected for bkg use - XOR the DQ values with
the DO_NOT_USE flag to flip the DO_NOT_USE bit. Then count the number
of pixels that AND with the DO_NOT_USE flag, i.e. initially did not have
the DO_NOT_USE bit set.
"""
return np.count_nonzero((dq_array[bkg_mask]
^ pixel['DO_NOT_USE'])
& pixel['DO_NOT_USE']
) > min_pixels


def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=None):
"""Scale and subtract a background reference image from WFSS/GRISM data.
Parameters
----------
input_model : JWST data model
input target exposure data model
bkg_filename : str
name of master background file for WFSS/GRISM
wl_range_name : str
name of wavelengthrange reference file
mmag_extract : float
minimum abmag of grism objects to extract
Returns
-------
result : JWST data model
background-subtracted target data model
"""

bkg_ref = datamodels.open(bkg_filename)

if hasattr(input_model.meta, "source_catalog"):
got_catalog = True
else:
log.warning("No source_catalog found in input.meta.")
got_catalog = False

# If there are NaNs, we have to replace them with something harmless.
bkg_ref = no_NaN(bkg_ref)

# Create a mask from the source catalog, True where there are no sources,
# i.e. in regions we can use as background.
if got_catalog:
bkg_mask = mask_from_source_cat(input_model, wl_range_name, mmag_extract)
if not sufficient_background_pixels(input_model.dq, bkg_mask):
log.warning("Not enough background pixels to work with.")
log.warning("Step will be SKIPPED.")
return None
else:
bkg_mask = np.ones(input_model.data.shape, dtype=bool)
# Compute the mean values of science image and background reference
# image, including only regions where there are no identified sources.
# Exclude pixel values in the lower and upper 25% of the histogram.
lowlim = 25.
highlim = 75.
sci_mean = robust_mean(input_model.data[bkg_mask],
lowlim=lowlim, highlim=highlim)
bkg_mean = robust_mean(bkg_ref.data[bkg_mask],
lowlim=lowlim, highlim=highlim)

log.debug("mean of [{}, {}] percentile grism image = {}"
.format(lowlim, highlim, sci_mean))
log.debug("mean of [{}, {}] percentile background image = {}"
.format(lowlim, highlim, bkg_mean))

result = input_model.copy()
if bkg_mean != 0.:
subtract_this = (sci_mean / bkg_mean) * bkg_ref.data
result.data = input_model.data - subtract_this
log.info(f"Average of background image subtracted = {subtract_this.mean(dtype=float)}")
else:
log.warning("Background image has zero mean; nothing will be subtracted.")
result.dq = np.bitwise_or(input_model.dq, bkg_ref.dq)

bkg_ref.close()

return result


def no_NaN(model, fill_value=0.):
"""Replace NaNs with a harmless value.
Parameters
----------
model : JWST data model
Reference file model.
fill_value : float
NaNs will be replaced with this value.
Returns
-------
result : JWST data model
Reference file model without NaNs in data array.
"""

mask = np.isnan(model.data)
if mask.sum(dtype=np.intp) == 0:
return model
else:
temp = model.copy()
temp.data[mask] = fill_value
return temp


def mask_from_source_cat(input_model, wl_range_name, mmag_extract=None):
"""Create a mask that is False within bounding boxes of sources.
Parameters
----------
input_model : JWST data model
input target exposure data model
wl_range_name : str
Name of the wavelengthrange reference file
mmag_extract : float
Minimum abmag of grism objects to extract
Returns
-------
bkg_mask : ndarray
Boolean mask: True for background, False for pixels that are
inside at least one of the source regions defined in the source
catalog.
"""

shape = input_model.data.shape
bkg_mask = np.ones(shape, dtype=bool)

reference_files = {"wavelengthrange": wl_range_name}
grism_obj_list = create_grism_bbox(input_model, reference_files, mmag_extract)

for obj in grism_obj_list:
order_bounding = obj.order_bounding
for order in order_bounding.keys():
((ymin, ymax), (xmin, xmax)) = order_bounding[order]
xmin = int(math.floor(xmin))
xmax = int(math.ceil(xmax)) + 1 # convert to slice limit
ymin = int(math.floor(ymin))
ymax = int(math.ceil(ymax)) + 1
xmin = max(xmin, 0)
xmax = min(xmax, shape[-1])
ymin = max(ymin, 0)
ymax = min(ymax, shape[-2])
bkg_mask[..., ymin:ymax, xmin:xmax] = False

return bkg_mask


def robust_mean(x, lowlim=25., highlim=75.):
"""Compute a mean value, excluding outliers.
Parameters
----------
x : ndarray
The array for which we want a mean value.
lowlim : float
The lower `lowlim` percent of the data will not be used when
computing the mean.
highlim : float
The upper `highlim` percent of the data will not be used when
computing the mean.
Returns
-------
mean_value : float
The mean of `x`, excluding data outside `lowlim` to `highlim`
percentile limits.
"""

nan_mask = np.isnan(x)
cleaned_x = x[~nan_mask]
limits = np.percentile(cleaned_x, (lowlim, highlim))
mask = np.logical_and(cleaned_x >= limits[0], cleaned_x <= limits[1])

mean_value = np.mean(cleaned_x[mask], dtype=float)

return mean_value
Loading

0 comments on commit fc41e18

Please sign in to comment.