From e3025659afa44944c30d3b770c3fbe4c405778bd Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 2 Dec 2024 09:29:30 -0500 Subject: [PATCH 01/19] halfway through attempted fix --- jwst/background/background_sub.py | 123 +++++++++-------------- jwst/background/tests/test_background.py | 68 ++++--------- 2 files changed, 68 insertions(+), 123 deletions(-) diff --git a/jwst/background/background_sub.py b/jwst/background/background_sub.py index 9edd641c23..1a991154e4 100755 --- a/jwst/background/background_sub.py +++ b/jwst/background/background_sub.py @@ -10,6 +10,7 @@ from ..assign_wcs.util import create_grism_bbox from astropy.stats import sigma_clip from astropy.utils.exceptions import AstropyUserWarning +from scipy.stats import norm import logging @@ -317,9 +318,6 @@ def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=Non log.warning("No source_catalog found in input.meta.") got_catalog = False - # If there are NaNs, we have to replace them with something harmless. - bkg_ref = no_NaN(bkg_ref) - # Create a mask from the source catalog, True where there are no sources, # i.e. in regions we can use as background. if got_catalog: @@ -330,28 +328,13 @@ def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=Non return None else: bkg_mask = np.ones(input_model.data.shape, dtype=bool) - # Compute the mean values of science image and background reference - # image, including only regions where there are no identified sources. - # Exclude pixel values in the lower and upper 25% of the histogram. - lowlim = 25. - highlim = 75. - sci_mean = robust_mean(input_model.data[bkg_mask], - lowlim=lowlim, highlim=highlim) - bkg_mean = robust_mean(bkg_ref.data[bkg_mask], - lowlim=lowlim, highlim=highlim) - - log.debug("mean of [{}, {}] percentile grism image = {}" - .format(lowlim, highlim, sci_mean)) - log.debug("mean of [{}, {}] percentile background image = {}" - .format(lowlim, highlim, bkg_mean)) + # compute scaling factor for the reference background image + factor = _err_weighted_mean(input_model.data, input_model.err, bkg_ref.data, ~bkg_mask) result = input_model.copy() - if bkg_mean != 0.: - subtract_this = (sci_mean / bkg_mean) * bkg_ref.data - result.data = input_model.data - subtract_this - log.info(f"Average of background image subtracted = {subtract_this.mean(dtype=float)}") - else: - log.warning("Background image has zero mean; nothing will be subtracted.") + subtract_this = factor * bkg_ref.data + result.data = input_model.data - subtract_this + log.info(f"Average of background image subtracted = {subtract_this.mean(dtype=float)}") result.dq = np.bitwise_or(input_model.dq, bkg_ref.dq) bkg_ref.close() @@ -359,30 +342,55 @@ def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=Non return result -def no_NaN(model, fill_value=0.): - """Replace NaNs with a harmless value. +def _apply_percentiles(data, lo, hi): + limits = np.nanpercentile(data, (lo, hi)) + print(limits) + mask = np.logical_and(data < limits[0], data > limits[1]) + return data[~mask] - Parameters - ---------- - model : JWST data model - Reference file model. - fill_value : float - NaNs will be replaced with this value. +def _err_weighted_mean(sci, var, bkg, mask, p_lo=25., p_hi=75.): + """ + Compute scaling factor by which to multiply background image before subtraction. + It is computed as + scale_factor = np.sum(sci*bkg/var) / np.sum(sci*bkg/var) + where sci and bkg have both been sigma-clipped according to p_lo and p_hi + + Parameters + ---------- + sci: ndarray, required + The science array + var: ndarray, required + The variance array + bkg: ndarray, required + The reference model background array + mask: ndarray, required + Mask to remove sources from the data. Should be 1 where BAD + p_lo: float, optional + Lower percentile for sigma clipping + p_hi: float, optional + Upper percentile for sigma clipping + Returns ------- - result : JWST data model - Reference file model without NaNs in data array. + factor: float + Scaling factor by which to multiply background image before subtraction """ - - mask = np.isnan(model.data) - if mask.sum(dtype=np.intp) == 0: - return model - else: - temp = model.copy() - temp.data[mask] = fill_value - return temp + # apply the mask. we don't care about the input shape here + sci = sci[~mask] + bkg = bkg[~mask] + var = var[~mask] + + print("sci", sci[~sci.mask].shape, np.nanmean(sci), np.sum(np.isnan(sci))) + print("bkg", bkg[~bkg.mask].shape, np.nanmean(bkg), np.sum(np.isnan(bkg))) + print("var", var[~var.mask].shape, np.nanmean(var), np.sum(np.isnan(var))) + sci = _apply_percentiles(sci, p_lo, p_hi) + bkg = _apply_percentiles(bkg, p_lo, p_hi) + print("sci", sci[~sci.mask].shape, np.nanmean(sci), np.sum(np.isnan(sci))) + print("bkg", bkg[~bkg.mask].shape, np.nanmean(bkg), np.sum(np.isnan(bkg))) + print("var", var[~var.mask].shape, np.nanmean(var), np.sum(np.isnan(var))) + return np.nansum(sci*bkg/var) / np.nansum(bkg*bkg/var) def mask_from_source_cat(input_model, wl_range_name, mmag_extract=None): @@ -428,36 +436,3 @@ def mask_from_source_cat(input_model, wl_range_name, mmag_extract=None): bkg_mask[..., ymin:ymax, xmin:xmax] = False return bkg_mask - - -def robust_mean(x, lowlim=25., highlim=75.): - """Compute a mean value, excluding outliers. - - Parameters - ---------- - x : ndarray - The array for which we want a mean value. - - lowlim : float - The lower `lowlim` percent of the data will not be used when - computing the mean. - - highlim : float - The upper `highlim` percent of the data will not be used when - computing the mean. - - Returns - ------- - mean_value : float - The mean of `x`, excluding data outside `lowlim` to `highlim` - percentile limits. - """ - - nan_mask = np.isnan(x) - cleaned_x = x[~nan_mask] - limits = np.percentile(cleaned_x, (lowlim, highlim)) - mask = np.logical_and(cleaned_x >= limits[0], cleaned_x <= limits[1]) - - mean_value = np.mean(cleaned_x[mask], dtype=float) - - return mean_value diff --git a/jwst/background/tests/test_background.py b/jwst/background/tests/test_background.py index 00b04c93a8..ca2bfc0da9 100644 --- a/jwst/background/tests/test_background.py +++ b/jwst/background/tests/test_background.py @@ -14,9 +14,11 @@ from jwst.assign_wcs import AssignWcsStep from jwst.background import BackgroundStep from jwst.stpipe import Step -from jwst.background.background_sub import (robust_mean, mask_from_source_cat, - no_NaN, sufficient_background_pixels) +from jwst.background.background_sub import (subtract_wfss_bkg, + mask_from_source_cat, + sufficient_background_pixels) +UNIFORM_BKG = 0.123 @pytest.fixture(scope="module") def data_path(): @@ -239,7 +241,19 @@ def make_wfss_datamodel(data_path): image.meta.observation._instance.update(observation) image.meta.subarray._instance.update(subarray) image.meta.exposure._instance.update(exposure) - image.data = np.random.rand(2048, 2048) + + # make random data and error arrays, add NaNs to ensure NaN handling works properly + rng = np.random.default_rng(seed=42) + image.data = rng.random((2048, 2048)) + image.err = 0.1*rng.random((2048, 2048)) + num_nans = 123 + nan_indices = np.unravel_index(rng.choice(image.data.size, num_nans), image.data.shape) + image.data[nan_indices] = np.nan + image.err[nan_indices] = np.nan + + # also add a small background to the data to see if it will get removed + image.data += UNIFORM_BKG + image.meta.source_catalog = str(data_path / "test_cat.ecsv") return image @@ -308,20 +322,10 @@ def test_nis_wfss_background(filters, pupils, make_wfss_datamodel): wavelenrange = Step().get_reference_file(wcs_corrected, "wavelengthrange") bkg_file = Step().get_reference_file(wcs_corrected, 'wfssbkg') - mask = mask_from_source_cat(wcs_corrected, wavelenrange) + result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) - with datamodels.open(bkg_file) as bkg_ref: - bkg_ref = no_NaN(bkg_ref) + assert np.isclose(np.nanmean(result.data), 0.0) - # calculate backgrounds - pipeline_data_mean = robust_mean(wcs_corrected.data[mask]) - test_data_mean, _, _ = sigma_clipped_stats(wcs_corrected.data, sigma=2) - - pipeline_reference_mean = robust_mean(bkg_ref.data[mask]) - test_reference_mean, _, _ = sigma_clipped_stats(bkg_ref.data, sigma=2) - - assert np.isclose([pipeline_data_mean], [test_data_mean], rtol=1e-3) - assert np.isclose([pipeline_reference_mean], [test_reference_mean], rtol=1e-1) @pytest.mark.parametrize('data_shape,background_shape', @@ -373,40 +377,6 @@ def test_miri_subarray_partial_overlap(data_shape, background_shape): background.close() -def test_robust_mean(): - """Test robust mean calculation""" - data = np.random.rand(2048, 2048) - result = robust_mean(data) - test = np.mean(data) - - assert np.isclose([test], [result], rtol=1e-3) - - -def test_no_nan(): - """Make sure that nan values are filled with fill value""" - # Make data model - model = datamodels.ImageModel() - data = np.random.rand(10, 10) - - # Randomly insert NaNs - data.ravel()[np.random.choice(data.size, 10, replace=False)] = np.nan - model.data = data - - # Randomly select fill value - fill_val = np.random.randint(0, 20) - - # Call no_NaN - result = no_NaN(model, fill_value=fill_val) - - # Use np.nan to find NaNs. - test_result = np.isnan(model.data) - # Assign fill values to NaN indices - model.data[test_result] = fill_val - - # Make sure arrays are equal. - assert np.array_equal(model.data, result.data) - - def test_sufficient_background_pixels(): model = datamodels.ImageModel(data=np.zeros((2048, 2048)), dq=np.zeros((2048, 2048))) From 41a7ab9d3d72c032a625fe6a2326920f1fdd2069 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 3 Dec 2024 11:33:15 -0500 Subject: [PATCH 02/19] Fix sigma-clipping behavior and apply mask properly --- jwst/background/background_sub.py | 33 +++++++----------------- jwst/background/tests/test_background.py | 33 ++++++++++++++---------- 2 files changed, 30 insertions(+), 36 deletions(-) diff --git a/jwst/background/background_sub.py b/jwst/background/background_sub.py index 1a991154e4..878f05bc15 100755 --- a/jwst/background/background_sub.py +++ b/jwst/background/background_sub.py @@ -10,7 +10,6 @@ from ..assign_wcs.util import create_grism_bbox from astropy.stats import sigma_clip from astropy.utils.exceptions import AstropyUserWarning -from scipy.stats import norm import logging @@ -330,7 +329,11 @@ def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=Non bkg_mask = np.ones(input_model.data.shape, dtype=bool) # compute scaling factor for the reference background image - factor = _err_weighted_mean(input_model.data, input_model.err, bkg_ref.data, ~bkg_mask) + factor = _clipped_err_weighted_mean( + input_model.data[bkg_mask], + input_model.err[bkg_mask], + bkg_ref.data[bkg_mask], + ) result = input_model.copy() subtract_this = factor * bkg_ref.data result.data = input_model.data - subtract_this @@ -342,19 +345,11 @@ def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=Non return result -def _apply_percentiles(data, lo, hi): - limits = np.nanpercentile(data, (lo, hi)) - print(limits) - mask = np.logical_and(data < limits[0], data > limits[1]) - return data[~mask] - - - -def _err_weighted_mean(sci, var, bkg, mask, p_lo=25., p_hi=75.): +def _clipped_err_weighted_mean(sci, var, bkg, p_lo=25., p_hi=75.): """ Compute scaling factor by which to multiply background image before subtraction. It is computed as - scale_factor = np.sum(sci*bkg/var) / np.sum(sci*bkg/var) + scale_factor = sum(sci*bkg/var) / sum(bkg*bkg/var) where sci and bkg have both been sigma-clipped according to p_lo and p_hi Parameters @@ -365,8 +360,6 @@ def _err_weighted_mean(sci, var, bkg, mask, p_lo=25., p_hi=75.): The variance array bkg: ndarray, required The reference model background array - mask: ndarray, required - Mask to remove sources from the data. Should be 1 where BAD p_lo: float, optional Lower percentile for sigma clipping p_hi: float, optional @@ -377,19 +370,13 @@ def _err_weighted_mean(sci, var, bkg, mask, p_lo=25., p_hi=75.): factor: float Scaling factor by which to multiply background image before subtraction """ - # apply the mask. we don't care about the input shape here + # find sigma-clipping mask from the data + limits = np.nanpercentile(sci, (p_lo, p_hi)) + mask = np.logical_and(sci < limits[0], sci > limits[1]) #true where BAD sci = sci[~mask] bkg = bkg[~mask] var = var[~mask] - print("sci", sci[~sci.mask].shape, np.nanmean(sci), np.sum(np.isnan(sci))) - print("bkg", bkg[~bkg.mask].shape, np.nanmean(bkg), np.sum(np.isnan(bkg))) - print("var", var[~var.mask].shape, np.nanmean(var), np.sum(np.isnan(var))) - sci = _apply_percentiles(sci, p_lo, p_hi) - bkg = _apply_percentiles(bkg, p_lo, p_hi) - print("sci", sci[~sci.mask].shape, np.nanmean(sci), np.sum(np.isnan(sci))) - print("bkg", bkg[~bkg.mask].shape, np.nanmean(bkg), np.sum(np.isnan(bkg))) - print("var", var[~var.mask].shape, np.nanmean(var), np.sum(np.isnan(var))) return np.nansum(sci*bkg/var) / np.nansum(bkg*bkg/var) diff --git a/jwst/background/tests/test_background.py b/jwst/background/tests/test_background.py index ca2bfc0da9..1807cee469 100644 --- a/jwst/background/tests/test_background.py +++ b/jwst/background/tests/test_background.py @@ -288,20 +288,18 @@ def test_nrc_wfss_background(tmp_cwd, filters, pupils, detectors, make_wfss_data wavelenrange = Step().get_reference_file(wcs_corrected, "wavelengthrange") bkg_file = Step().get_reference_file(wcs_corrected, 'wfssbkg') - mask = mask_from_source_cat(wcs_corrected, wavelenrange) - - with datamodels.open(bkg_file) as bkg_ref: - bkg_ref = no_NaN(bkg_ref) - - # calculate backgrounds - pipeline_data_mean = robust_mean(wcs_corrected.data[mask]) - test_data_mean, _, _ = sigma_clipped_stats(wcs_corrected.data, sigma=2) + result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) - pipeline_reference_mean = robust_mean(bkg_ref.data[mask]) - test_reference_mean, _, _ = sigma_clipped_stats(bkg_ref.data, sigma=2) + # re-mask data to check background level + mask = mask_from_source_cat(result, wavelenrange) + sci_bkg_out = result.data[mask] - assert np.isclose([pipeline_data_mean], [test_data_mean], rtol=1e-3) - assert np.isclose([pipeline_reference_mean], [test_reference_mean], rtol=1e-1) + # check that background is zero to within some fraction of the data stdev + # since we have a uniform bkg for this test but the reference file takes into account + # lots of imperfections, the subtraction doesn't actually look that great here. + # Should be better for real data + tol = 0.2*np.nanstd(sci_bkg_out) + assert np.isclose(np.nanmean(sci_bkg_out), 0.0, atol=tol) @pytest.mark.parametrize("filters", ['GR150C', 'GR150R']) @@ -324,7 +322,16 @@ def test_nis_wfss_background(filters, pupils, make_wfss_datamodel): result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) - assert np.isclose(np.nanmean(result.data), 0.0) + # re-mask data to check background level + mask = mask_from_source_cat(result, wavelenrange) + sci_bkg_out = result.data[mask] + + # check that background is zero to within some fraction of the data stdev + # since we have a uniform bkg for this test but the reference file takes into account + # lots of imperfections, the subtraction doesn't actually look that great here. + # Should be better for real data + tol = 0.2*np.nanstd(sci_bkg_out) + assert np.isclose(np.nanmean(sci_bkg_out), 0.0, atol=tol) From a858eb2eafc47621a701757912f111f671850e03 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Tue, 3 Dec 2024 12:58:25 -0500 Subject: [PATCH 03/19] relax tolerance of test --- changes/8990.background.rst | 1 + jwst/background/tests/test_background.py | 31 ++++++++++++------------ 2 files changed, 17 insertions(+), 15 deletions(-) create mode 100644 changes/8990.background.rst diff --git a/changes/8990.background.rst b/changes/8990.background.rst new file mode 100644 index 0000000000..ed656009a4 --- /dev/null +++ b/changes/8990.background.rst @@ -0,0 +1 @@ +Compute scaling for WFSS background subtraction using error-weighted mean diff --git a/jwst/background/tests/test_background.py b/jwst/background/tests/test_background.py index 1807cee469..7fe26d27ed 100644 --- a/jwst/background/tests/test_background.py +++ b/jwst/background/tests/test_background.py @@ -3,7 +3,6 @@ """ import pathlib -from astropy.stats import sigma_clipped_stats import pytest import numpy as np from numpy.testing import assert_allclose @@ -290,16 +289,17 @@ def test_nrc_wfss_background(tmp_cwd, filters, pupils, detectors, make_wfss_data result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) - # re-mask data to check background level - mask = mask_from_source_cat(result, wavelenrange) - sci_bkg_out = result.data[mask] - # check that background is zero to within some fraction of the data stdev # since we have a uniform bkg for this test but the reference file takes into account # lots of imperfections, the subtraction doesn't actually look that great here. - # Should be better for real data - tol = 0.2*np.nanstd(sci_bkg_out) - assert np.isclose(np.nanmean(sci_bkg_out), 0.0, atol=tol) + # TODO: refactor this test to use a fake background instead of reference file background + + # re-mask data so "real" sources are ignored here + mask = mask_from_source_cat(result, wavelenrange) + data = result.data[mask] + + tol = 0.5*np.nanstd(data) + assert np.isclose(np.nanmean(data), 0.0, atol=tol) @pytest.mark.parametrize("filters", ['GR150C', 'GR150R']) @@ -322,16 +322,17 @@ def test_nis_wfss_background(filters, pupils, make_wfss_datamodel): result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) - # re-mask data to check background level - mask = mask_from_source_cat(result, wavelenrange) - sci_bkg_out = result.data[mask] - # check that background is zero to within some fraction of the data stdev # since we have a uniform bkg for this test but the reference file takes into account # lots of imperfections, the subtraction doesn't actually look that great here. - # Should be better for real data - tol = 0.2*np.nanstd(sci_bkg_out) - assert np.isclose(np.nanmean(sci_bkg_out), 0.0, atol=tol) + # TODO: refactor this test to use a fake background instead of reference file background + + # re-mask data so "real" sources are ignored here + mask = mask_from_source_cat(result, wavelenrange) + data = result.data[mask] + + tol = 0.5*np.nanstd(data) + assert np.isclose(np.nanmean(data), 0.0, atol=tol) From b8d3d0fe850e05806b9e6c8eb5c822f9796fafa9 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 4 Dec 2024 09:48:47 -0500 Subject: [PATCH 04/19] apply sigma clipping on sci/bkg ratio --- jwst/background/background_sub.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/jwst/background/background_sub.py b/jwst/background/background_sub.py index 878f05bc15..07250a108d 100755 --- a/jwst/background/background_sub.py +++ b/jwst/background/background_sub.py @@ -345,12 +345,12 @@ def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=Non return result -def _clipped_err_weighted_mean(sci, var, bkg, p_lo=25., p_hi=75.): +def _clipped_err_weighted_mean(sci, var, bkg, p_lo=1., p_hi=99.): """ Compute scaling factor by which to multiply background image before subtraction. - It is computed as + Sigma-clip the (sci/bkg) ratio according to p_lo and p_hi to determine a clipping mask, + apply that mask to the data, background, and variance, then compute scale_factor = sum(sci*bkg/var) / sum(bkg*bkg/var) - where sci and bkg have both been sigma-clipped according to p_lo and p_hi Parameters ---------- @@ -370,9 +370,9 @@ def _clipped_err_weighted_mean(sci, var, bkg, p_lo=25., p_hi=75.): factor: float Scaling factor by which to multiply background image before subtraction """ - # find sigma-clipping mask from the data - limits = np.nanpercentile(sci, (p_lo, p_hi)) - mask = np.logical_and(sci < limits[0], sci > limits[1]) #true where BAD + ratio = sci/bkg + limits = np.nanpercentile(ratio, (p_lo, p_hi)) + mask = np.logical_or(ratio < limits[0], ratio > limits[1]) #true where BAD sci = sci[~mask] bkg = bkg[~mask] var = var[~mask] From f85926e145a2a82c7bb6bcb83c7ec068e0042a62 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 4 Dec 2024 09:58:25 -0500 Subject: [PATCH 05/19] doc changes --- docs/jwst/background_step/description.rst | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/docs/jwst/background_step/description.rst b/docs/jwst/background_step/description.rst index 183c8e1951..cc8162f681 100644 --- a/docs/jwst/background_step/description.rst +++ b/docs/jwst/background_step/description.rst @@ -87,7 +87,10 @@ For Wide-Field Slitless Spectroscopy expsoures (NIS_WFSS and NRC_WFSS), a background reference image is subtracted from the target exposure. Before being subtracted, the background reference image is scaled to match the signal level of the WFSS image within background (source-free) regions of the -image. +image. The scaling factor is determined based on the variance-weighted mean +of the science data, i.e., ``factor = sum(sci*bkg/var) / sum(bkg*bkg/var)``. +Prior to this computation, the data are sigma-clipped according to the ratio +``sci/bkg``, rejecting pixels where that ratio is in the 1st or 99th percentile. The locations of source spectra are determined from a source catalog (specified by the primary header keyword SCATFILE), in conjunction with a reference file @@ -99,15 +102,6 @@ abmag of the source catalog objects used to define the background regions. The default is to use all source catalog entries that result in a spectrum falling within the WFSS image. -Robust mean values are obtained for the background regions in the WFSS image and -for the same regions in the background reference image, and the ratio of those two -mean values is used to scale the background reference image. The robust mean is -computed by excluding the lowest 25% and highest 25% of the data (using the -numpy.percentile function), and taking a simple arithmetic mean of the -remaining values. Note that NaN values (if any) in the background -reference image are currently set to zero. If there are a lot of NaNs, -it may be that more than 25% of the lowest values will need to be excluded. - For both background methods the output results are always returned in a new data model, leaving the original input model unchanged. From a0661e0b93599eb11f015f4951fbbe91e40ec361 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 5 Dec 2024 17:56:12 -0500 Subject: [PATCH 06/19] Split wfss into its own files, implemented iterative bkg sub for wfss --- jwst/background/background_step.py | 4 +- jwst/background/background_sub.py | 156 ---------- jwst/background/background_sub_wfss.py | 241 ++++++++++++++++ jwst/background/tests/conftest.py | 7 + jwst/background/tests/test_background.py | 183 +----------- jwst/background/tests/test_background_wfss.py | 269 ++++++++++++++++++ 6 files changed, 520 insertions(+), 340 deletions(-) create mode 100644 jwst/background/background_sub_wfss.py create mode 100644 jwst/background/tests/conftest.py create mode 100644 jwst/background/tests/test_background_wfss.py diff --git a/jwst/background/background_step.py b/jwst/background/background_step.py index 4f5946195d..b7325c80bd 100755 --- a/jwst/background/background_step.py +++ b/jwst/background/background_step.py @@ -2,7 +2,7 @@ from stdatamodels.jwst import datamodels from ..stpipe import Step -from . import background_sub +from . import background_sub, background_sub_wfss import numpy as np __all__ = ["BackgroundStep"] @@ -60,7 +60,7 @@ def process(self, input, bkg_list): wlrange_name) # Do the background subtraction for WFSS/GRISM data - result = background_sub.subtract_wfss_bkg( + result = background_sub_wfss.subtract_wfss_bkg( input_model, bkg_name, wlrange_name, self.wfss_mmag_extract) if result is None: result = input_model diff --git a/jwst/background/background_sub.py b/jwst/background/background_sub.py index 07250a108d..a04a03ea9c 100755 --- a/jwst/background/background_sub.py +++ b/jwst/background/background_sub.py @@ -1,13 +1,10 @@ import copy -import math import numpy as np import warnings from stdatamodels.jwst import datamodels -from stdatamodels.jwst.datamodels.dqflags import pixel from . import subtract_images -from ..assign_wcs.util import create_grism_bbox from astropy.stats import sigma_clip from astropy.utils.exceptions import AstropyUserWarning @@ -270,156 +267,3 @@ def average_background(input_model, bkg_list, sigma, maxiters): avg_bkg.err = (np.sqrt(merr.sum(axis=0)) / (num_bkg - merr.mask.sum(axis=0))).data return avg_bkg - - -def sufficient_background_pixels(dq_array, bkg_mask, min_pixels=100): - """Count number of good pixels for background use. - - Check DQ flags of pixels selected for bkg use - XOR the DQ values with - the DO_NOT_USE flag to flip the DO_NOT_USE bit. Then count the number - of pixels that AND with the DO_NOT_USE flag, i.e. initially did not have - the DO_NOT_USE bit set. - """ - return np.count_nonzero((dq_array[bkg_mask] - ^ pixel['DO_NOT_USE']) - & pixel['DO_NOT_USE'] - ) > min_pixels - - -def subtract_wfss_bkg(input_model, bkg_filename, wl_range_name, mmag_extract=None): - """Scale and subtract a background reference image from WFSS/GRISM data. - - Parameters - ---------- - input_model : JWST data model - input target exposure data model - - bkg_filename : str - name of master background file for WFSS/GRISM - - wl_range_name : str - name of wavelengthrange reference file - - mmag_extract : float - minimum abmag of grism objects to extract - - Returns - ------- - result : JWST data model - background-subtracted target data model - """ - - bkg_ref = datamodels.open(bkg_filename) - - if hasattr(input_model.meta, "source_catalog"): - got_catalog = True - else: - log.warning("No source_catalog found in input.meta.") - got_catalog = False - - # Create a mask from the source catalog, True where there are no sources, - # i.e. in regions we can use as background. - if got_catalog: - bkg_mask = mask_from_source_cat(input_model, wl_range_name, mmag_extract) - if not sufficient_background_pixels(input_model.dq, bkg_mask): - log.warning("Not enough background pixels to work with.") - log.warning("Step will be SKIPPED.") - return None - else: - bkg_mask = np.ones(input_model.data.shape, dtype=bool) - - # compute scaling factor for the reference background image - factor = _clipped_err_weighted_mean( - input_model.data[bkg_mask], - input_model.err[bkg_mask], - bkg_ref.data[bkg_mask], - ) - result = input_model.copy() - subtract_this = factor * bkg_ref.data - result.data = input_model.data - subtract_this - log.info(f"Average of background image subtracted = {subtract_this.mean(dtype=float)}") - result.dq = np.bitwise_or(input_model.dq, bkg_ref.dq) - - bkg_ref.close() - - return result - - -def _clipped_err_weighted_mean(sci, var, bkg, p_lo=1., p_hi=99.): - """ - Compute scaling factor by which to multiply background image before subtraction. - Sigma-clip the (sci/bkg) ratio according to p_lo and p_hi to determine a clipping mask, - apply that mask to the data, background, and variance, then compute - scale_factor = sum(sci*bkg/var) / sum(bkg*bkg/var) - - Parameters - ---------- - sci: ndarray, required - The science array - var: ndarray, required - The variance array - bkg: ndarray, required - The reference model background array - p_lo: float, optional - Lower percentile for sigma clipping - p_hi: float, optional - Upper percentile for sigma clipping - - Returns - ------- - factor: float - Scaling factor by which to multiply background image before subtraction - """ - ratio = sci/bkg - limits = np.nanpercentile(ratio, (p_lo, p_hi)) - mask = np.logical_or(ratio < limits[0], ratio > limits[1]) #true where BAD - sci = sci[~mask] - bkg = bkg[~mask] - var = var[~mask] - - return np.nansum(sci*bkg/var) / np.nansum(bkg*bkg/var) - - -def mask_from_source_cat(input_model, wl_range_name, mmag_extract=None): - """Create a mask that is False within bounding boxes of sources. - - Parameters - ---------- - input_model : JWST data model - input target exposure data model - - wl_range_name : str - Name of the wavelengthrange reference file - - mmag_extract : float - Minimum abmag of grism objects to extract - - Returns - ------- - bkg_mask : ndarray - Boolean mask: True for background, False for pixels that are - inside at least one of the source regions defined in the source - catalog. - """ - - shape = input_model.data.shape - bkg_mask = np.ones(shape, dtype=bool) - - reference_files = {"wavelengthrange": wl_range_name} - grism_obj_list = create_grism_bbox(input_model, reference_files, mmag_extract) - - for obj in grism_obj_list: - order_bounding = obj.order_bounding - for order in order_bounding.keys(): - ((ymin, ymax), (xmin, xmax)) = order_bounding[order] - xmin = int(math.floor(xmin)) - xmax = int(math.ceil(xmax)) + 1 # convert to slice limit - ymin = int(math.floor(ymin)) - ymax = int(math.ceil(ymax)) + 1 - xmin = max(xmin, 0) - xmax = min(xmax, shape[-1]) - ymin = max(ymin, 0) - ymax = min(ymax, shape[-2]) - bkg_mask[..., ymin:ymax, xmin:xmax] = False - - return bkg_mask diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py new file mode 100644 index 0000000000..008fbc1912 --- /dev/null +++ b/jwst/background/background_sub_wfss.py @@ -0,0 +1,241 @@ +import math +import numpy as np + +from stdatamodels.jwst import datamodels +from stdatamodels.jwst.datamodels.dqflags import pixel + +from ..assign_wcs.util import create_grism_bbox + +import logging +log = logging.getLogger(__name__) +log.setLevel(logging.DEBUG) + + +def subtract_wfss_bkg( + input_model, + bkg_filename, + wl_range_name, + mmag_extract=None, + rescaler_kwargs: dict = {}, +): + """Scale and subtract a background reference image from WFSS/GRISM data. + + Parameters + ---------- + input_model : JWST data model + input target exposure data model + + bkg_filename : str + name of master background file for WFSS/GRISM + + wl_range_name : str + name of wavelengthrange reference file + + mmag_extract : float, optional, default None + minimum abmag of grism objects to extract + + rescaler_kwargs : dict, optional, default {} + Keyword arguments to pass to ScalingFactorComputer + + Returns + ------- + result : JWST data model + background-subtracted target data model + """ + + bkg_ref = datamodels.open(bkg_filename) + + if hasattr(input_model.meta, "source_catalog"): + got_catalog = True + else: + log.warning("No source_catalog found in input.meta.") + got_catalog = False + + # Create a mask from the source catalog, True where there are no sources, + # i.e. in regions we can use as background. + if got_catalog: + bkg_mask = _mask_from_source_cat(input_model, wl_range_name, mmag_extract) + if not _sufficient_background_pixels(input_model.dq, bkg_mask): + log.warning("Not enough background pixels to work with.") + log.warning("Step will be SKIPPED.") + return None + else: + bkg_mask = np.ones(input_model.data.shape, dtype=bool) + + # compute scaling factor for the reference background image + rescaler = _ScalingFactorComputer(**rescaler_kwargs) + factor, _ = rescaler(input_model.data.copy(), + bkg_ref.data.copy(), + input_model.err.copy()**2, + mask=~bkg_mask) + + # extract the derived factor and apply it to the unmasked, non-outlier-rejected data + subtract_this = factor * bkg_ref.data + + result = input_model.copy() + result.data = input_model.data - subtract_this + result.dq = np.bitwise_or(input_model.dq, bkg_ref.dq) + + log.info(f"Average of background image subtracted = {np.nanmean(subtract_this):.5f}") + + bkg_ref.close() + + return result + + +class _ScalingFactorComputer: + + def __init__(self, p=1.0, maxiter=5, rms_thresh=None, dispersion_axis=None): + """ + Parameters + ---------- + p: float, optional + Percentile for sigma clipping on both low and high ends, default 1.0. + For example, with p=2.0, the middle 96% of the data is kept. + maxiter: int, optional + Maximum number of iterations for outlier rejection. Default 5. + rms_thresh: float, optional + Stopping criterion for outlier rejection; stops when the rms residuals + are less than this threshold. Default None, i.e., ignore this + and only stop at maxiter. + dispersion_axis: int, optional + The index to select the along-dispersion axis. Used to compute the RMS + residual, so must be set if rms_thresh > 0. Default None. + """ + if rms_thresh is None: + rms_thresh = -1 + if (rms_thresh > 0) and (dispersion_axis not in [0,1]): + msg = (f"Unrecognized dispersion axis {dispersion_axis}. " + "Dispersion axis must be specified if rms_thresh " + "is used as a stopping criterion.") + raise ValueError(msg) + + self.p = p + self.maxiter = maxiter + self.rms_thresh = rms_thresh + self.dispersion_axis = dispersion_axis + + + def __call__(self, sci, bkg, var, mask=None): + """ + Parameters + ---------- + sci: ndarray + The science data. + bkg: ndarray + The reference background model. + var: ndarray + Total variance (error squared) of the science data. + mask: ndarray[bool], optional + Initial mask to be applied to the data, True where bad. + Typically this would mask out the real sources in the data. + + Returns + ------- + float + Scaling factor that minimizes sci - factor*bkg, + taking into account residuals and outliers. + ndarray[bool] + Outlier mask generated by the iterative clipping procedure. + """ + if mask is None: + mask = np.zeros(sci.shape, dtype="bool") + self._update_nans(sci, bkg, var, mask) + + # iteratively reject more and more outliers + rms_resid = np.inf + i = 0 + while (i < self.maxiter) and (rms_resid > self.rms_thresh): + + # Reject outliers based on residual between sci and bkg. + # Updating the sci, var, and bkg nan values + # means they are ignored by nanpercentile in the next iteration + factor = self.err_weighted_mean(sci, bkg, var) + sci_sub = sci-factor*bkg + limits = np.nanpercentile(sci_sub, (self.p, 100-self.p)) + mask += np.logical_or(sci_sub < limits[0], sci_sub > limits[1]) + self._update_nans(sci, bkg, var, mask) + + # update stop conditions + if (self.rms_thresh > 0): + rms_resid = self._compute_rms_residual(sci_sub) + i += 1 + + return self.err_weighted_mean(sci, bkg, var), mask + + + def err_weighted_mean(self, sci, bkg, var): + return np.nansum(sci*bkg/var) / np.nansum(bkg*bkg/var) + + + def _update_nans(self, sci, bkg, var, mask): + sci[mask] = np.nan + bkg[mask] = np.nan + var[mask] = np.nan + + + def _compute_rms_residual(self, sci_sub): + """Calculate the RMS of the cross-dispersion background, which is found by taking + the median along the dispersion axis""" + sci_sub_profile = np.nanmedian(sci_sub, axis=self.dispersion_axis) + return np.sqrt(np.nanmean(sci_sub_profile**2)) + + + +def _sufficient_background_pixels(dq_array, bkg_mask, min_pixels=100): + """Count number of good pixels for background use. + + Check DQ flags of pixels selected for bkg use - XOR the DQ values with + the DO_NOT_USE flag to flip the DO_NOT_USE bit. Then count the number + of pixels that AND with the DO_NOT_USE flag, i.e. initially did not have + the DO_NOT_USE bit set. + """ + return np.count_nonzero((dq_array[bkg_mask] + ^ pixel['DO_NOT_USE']) + & pixel['DO_NOT_USE'] + ) > min_pixels + + +def _mask_from_source_cat(input_model, wl_range_name, mmag_extract=None): + """Create a mask that is False within bounding boxes of sources. + + Parameters + ---------- + input_model : JWST data model + input target exposure data model + + wl_range_name : str + Name of the wavelengthrange reference file + + mmag_extract : float + Minimum abmag of grism objects to extract + + Returns + ------- + bkg_mask : ndarray + Boolean mask: True for background, False for pixels that are + inside at least one of the source regions defined in the source + catalog. + """ + + shape = input_model.data.shape + bkg_mask = np.ones(shape, dtype=bool) + + reference_files = {"wavelengthrange": wl_range_name} + grism_obj_list = create_grism_bbox(input_model, reference_files, mmag_extract) + + for obj in grism_obj_list: + order_bounding = obj.order_bounding + for order in order_bounding.keys(): + ((ymin, ymax), (xmin, xmax)) = order_bounding[order] + xmin = int(math.floor(xmin)) + xmax = int(math.ceil(xmax)) + 1 # convert to slice limit + ymin = int(math.floor(ymin)) + ymax = int(math.ceil(ymax)) + 1 + xmin = max(xmin, 0) + xmax = min(xmax, shape[-1]) + ymin = max(ymin, 0) + ymax = min(ymax, shape[-2]) + bkg_mask[..., ymin:ymax, xmin:xmax] = False + + return bkg_mask diff --git a/jwst/background/tests/conftest.py b/jwst/background/tests/conftest.py new file mode 100644 index 0000000000..cfe5d98314 --- /dev/null +++ b/jwst/background/tests/conftest.py @@ -0,0 +1,7 @@ +import pytest +import pathlib + + +@pytest.fixture(scope="module") +def data_path(): + return pathlib.Path(__file__).parent / "data" \ No newline at end of file diff --git a/jwst/background/tests/test_background.py b/jwst/background/tests/test_background.py index 7fe26d27ed..841aa3a362 100644 --- a/jwst/background/tests/test_background.py +++ b/jwst/background/tests/test_background.py @@ -1,32 +1,16 @@ """ Unit tests for background subtraction """ -import pathlib - import pytest -import numpy as np from numpy.testing import assert_allclose from stdatamodels.jwst import datamodels -from stdatamodels.jwst.datamodels.dqflags import pixel - -from jwst.assign_wcs import AssignWcsStep from jwst.background import BackgroundStep -from jwst.stpipe import Step -from jwst.background.background_sub import (subtract_wfss_bkg, - mask_from_source_cat, - sufficient_background_pixels) - -UNIFORM_BKG = 0.123 - -@pytest.fixture(scope="module") -def data_path(): - return pathlib.Path(__file__).parent / "data" @pytest.fixture(scope='module') def background(tmp_path_factory): - """Generate a background image to feed to background step""" + """Generate a background image to feed to background step""" filename = tmp_path_factory.mktemp('background_input') filename = filename / 'background.fits' @@ -191,151 +175,6 @@ def test_nirspec_gwa_ytilt(tmp_cwd, background, science_image): back_image.close() -@pytest.fixture(scope='module') -def make_wfss_datamodel(data_path): - """Generate WFSS Observation""" - wcsinfo = { - 'dec_ref': -27.79156387419731, - 'ra_ref': 53.16247756038121, - 'roll_ref': 0.04254766236781744, - 'v2_ref': -290.1, - 'v3_ref': -697.5, - 'v3yangle': 0.56987, - 'vparity': -1} - - observation = { - 'date': '2023-01-05', - 'time': '8:59:37'} - - exposure = { - 'duration': 11.805952, - 'end_time': 58119.85416, - 'exposure_time': 11.776, - 'frame_time': 0.11776, - 'group_time': 0.11776, - 'groupgap': 0, - 'integration_time': 11.776, - 'nframes': 1, - 'ngroups': 8, - 'nints': 1, - 'nresets_between_ints': 0, - 'nsamples': 1, - 'sample_time': 10.0, - 'start_time': 58668.72509857639, - 'zero_frame': False} - - subarray = {'xsize': 2048, - 'ysize': 2048, - 'xstart': 1, - 'ystart': 1} - - instrument = { - 'filter_position': 1, - 'pupil_position': 1} - - image = datamodels.ImageModel((2048, 2048)) - - image.meta.wcsinfo._instance.update(wcsinfo) - image.meta.instrument._instance.update(instrument) - image.meta.observation._instance.update(observation) - image.meta.subarray._instance.update(subarray) - image.meta.exposure._instance.update(exposure) - - # make random data and error arrays, add NaNs to ensure NaN handling works properly - rng = np.random.default_rng(seed=42) - image.data = rng.random((2048, 2048)) - image.err = 0.1*rng.random((2048, 2048)) - num_nans = 123 - nan_indices = np.unravel_index(rng.choice(image.data.size, num_nans), image.data.shape) - image.data[nan_indices] = np.nan - image.err[nan_indices] = np.nan - - # also add a small background to the data to see if it will get removed - image.data += UNIFORM_BKG - - image.meta.source_catalog = str(data_path / "test_cat.ecsv") - - return image - - -filter_list = ['F250M', 'F277W', 'F335M', 'F356W', 'F460M', - 'F356W', 'F410M', 'F430M', 'F444W'] # + ['F480M', 'F322W2', 'F300M'] - - -@pytest.mark.parametrize("pupils", ['GRISMC', 'GRISMR']) -@pytest.mark.parametrize("filters", filter_list) -@pytest.mark.parametrize("detectors", ['NRCALONG', 'NRCBLONG']) -def test_nrc_wfss_background(tmp_cwd, filters, pupils, detectors, make_wfss_datamodel): - """Test background subtraction for NIRCAM WFSS modes.""" - data = make_wfss_datamodel - - data.meta.instrument.filter = filters - data.meta.instrument.pupil = pupils - data.meta.instrument.detector = detectors - data.meta.instrument.channel = 'LONG' - data.meta.instrument.name = 'NIRCAM' - data.meta.exposure.type = 'NRC_WFSS' - - if data.meta.instrument.detector == 'NRCALONG': - data.meta.instrument.module = 'A' - elif data.meta.instrument.detector == 'NRCBLONG': - data.meta.instrument.module = 'B' - - wcs_corrected = AssignWcsStep.call(data) - - # Get References - wavelenrange = Step().get_reference_file(wcs_corrected, "wavelengthrange") - bkg_file = Step().get_reference_file(wcs_corrected, 'wfssbkg') - - result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) - - # check that background is zero to within some fraction of the data stdev - # since we have a uniform bkg for this test but the reference file takes into account - # lots of imperfections, the subtraction doesn't actually look that great here. - # TODO: refactor this test to use a fake background instead of reference file background - - # re-mask data so "real" sources are ignored here - mask = mask_from_source_cat(result, wavelenrange) - data = result.data[mask] - - tol = 0.5*np.nanstd(data) - assert np.isclose(np.nanmean(data), 0.0, atol=tol) - - -@pytest.mark.parametrize("filters", ['GR150C', 'GR150R']) -@pytest.mark.parametrize("pupils", ['F090W', 'F115W', 'F140M', 'F150W', 'F158M', 'F200W']) -def test_nis_wfss_background(filters, pupils, make_wfss_datamodel): - """Test background subtraction for NIRISS WFSS modes.""" - data = make_wfss_datamodel - - data.meta.instrument.filter = filters - data.meta.instrument.pupil = pupils - data.meta.instrument.detector = 'NIS' - data.meta.instrument.name = 'NIRISS' - data.meta.exposure.type = 'NIS_WFSS' - - wcs_corrected = AssignWcsStep.call(data) - - # Get References - wavelenrange = Step().get_reference_file(wcs_corrected, "wavelengthrange") - bkg_file = Step().get_reference_file(wcs_corrected, 'wfssbkg') - - result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) - - # check that background is zero to within some fraction of the data stdev - # since we have a uniform bkg for this test but the reference file takes into account - # lots of imperfections, the subtraction doesn't actually look that great here. - # TODO: refactor this test to use a fake background instead of reference file background - - # re-mask data so "real" sources are ignored here - mask = mask_from_source_cat(result, wavelenrange) - data = result.data[mask] - - tol = 0.5*np.nanstd(data) - assert np.isclose(np.nanmean(data), 0.0, atol=tol) - - - @pytest.mark.parametrize('data_shape,background_shape', [((10, 10), (10, 10)), ((10, 10), (20, 20)), @@ -383,23 +222,3 @@ def test_miri_subarray_partial_overlap(data_shape, background_shape): image.close() background.close() - - -def test_sufficient_background_pixels(): - model = datamodels.ImageModel(data=np.zeros((2048, 2048)), - dq=np.zeros((2048, 2048))) - refpix_flags = pixel['DO_NOT_USE'] | pixel['REFERENCE_PIXEL'] - model.dq[:4, :] = refpix_flags - model.dq[-4:, :] = refpix_flags - model.dq[:, :4] = refpix_flags - model.dq[:, -4:] = refpix_flags - - bkg_mask = np.ones((2048, 2048), dtype=bool) - # With full array minux refpix available for bkg, should be sufficient - assert sufficient_background_pixels(model.dq, bkg_mask) - - bkg_mask[4: -4, :] = 0 - bkg_mask[:, 4: -4] = 0 - # Now mask out entire array, mocking full source coverage of detector - - # no pixels should be available for bkg - assert not sufficient_background_pixels(model.dq, bkg_mask) diff --git a/jwst/background/tests/test_background_wfss.py b/jwst/background/tests/test_background_wfss.py new file mode 100644 index 0000000000..4e8447f2d6 --- /dev/null +++ b/jwst/background/tests/test_background_wfss.py @@ -0,0 +1,269 @@ +import pytest +import numpy as np +from pathlib import Path + +from stdatamodels.jwst.datamodels.dqflags import pixel +from stdatamodels.jwst import datamodels +from jwst.stpipe import Step +from jwst.assign_wcs import AssignWcsStep +from jwst.background.background_sub_wfss import (subtract_wfss_bkg, + _mask_from_source_cat, + _sufficient_background_pixels, + _ScalingFactorComputer) + +BKG_SCALING = 0.123 +DETECTOR_SHAPE = (2048, 2048) +INITIAL_NAN_FRACTION = 1e-4 + +@pytest.fixture(scope="module") +def known_bkg(): + """Make a simplified version of the reference background model data.""" + + ny, nx = DETECTOR_SHAPE + y, x = np.mgrid[:ny, :nx] + gradient = x * y / (nx*ny) + gradient = gradient - np.mean(gradient) + return gradient + 1 + + +@pytest.fixture(scope="module") +def mock_data(known_bkg): + """Synthetic data with NaNs, noise, and the known background structure + but rescaled. Later tests will ensure we can retrieve the proper scaling.""" + + err_scaling = 0.05 + nan_fraction = INITIAL_NAN_FRACTION + + # make random data and error arrays + rng = np.random.default_rng(seed=42) + data = rng.normal(0, 1, DETECTOR_SHAPE) + # ensure all errors are positive and not too close to zero + err = err_scaling*(1 + rng.normal(0, 1, DETECTOR_SHAPE)**2) + + # add NaNs + num_nans = int(data.size * nan_fraction) + nan_indices = np.unravel_index(rng.choice(data.size, num_nans), data.shape) + data[nan_indices] = np.nan + err[nan_indices] = np.nan + original_data_mean = np.nanmean(data) + + # also add a small background to the data with same structure + # as the known reference background to see if it will get removed + data += known_bkg*BKG_SCALING + + return data, err, original_data_mean + + +@pytest.fixture(scope='module') +def make_wfss_datamodel(data_path, mock_data, known_bkg): + + """Generate WFSS Observation""" + wcsinfo = { + 'dec_ref': -27.79156387419731, + 'ra_ref': 53.16247756038121, + 'roll_ref': 0.04254766236781744, + 'v2_ref': -290.1, + 'v3_ref': -697.5, + 'v3yangle': 0.56987, + 'vparity': -1} + + observation = { + 'date': '2023-01-05', + 'time': '8:59:37'} + + exposure = { + 'duration': 11.805952, + 'end_time': 58119.85416, + 'exposure_time': 11.776, + 'frame_time': 0.11776, + 'group_time': 0.11776, + 'groupgap': 0, + 'integration_time': 11.776, + 'nframes': 1, + 'ngroups': 8, + 'nints': 1, + 'nresets_between_ints': 0, + 'nsamples': 1, + 'sample_time': 10.0, + 'start_time': 58668.72509857639, + 'zero_frame': False} + + subarray = {'xsize': DETECTOR_SHAPE[0], + 'ysize': DETECTOR_SHAPE[1], + 'xstart': 1, + 'ystart': 1} + + instrument = { + 'filter_position': 1, + 'pupil_position': 1} + + image = datamodels.ImageModel(DETECTOR_SHAPE) + + image.meta.wcsinfo._instance.update(wcsinfo) + image.meta.instrument._instance.update(instrument) + image.meta.observation._instance.update(observation) + image.meta.subarray._instance.update(subarray) + image.meta.exposure._instance.update(exposure) + + image.data = mock_data[0] + image.err = mock_data[1] + image.original_data_mean = mock_data[2] #just add this here for convenience + image.dq = np.isnan(image.data) + + image.meta.source_catalog = str(data_path / "test_cat.ecsv") + + return image + + +@pytest.fixture() +def bkg_file(tmp_cwd, make_wfss_datamodel, known_bkg): + """Mock background reference file""" + + bkg_fname = "ref_bkg.fits" + bkg_image = make_wfss_datamodel.copy() + bkg_image.data = known_bkg + bkg_image.save(tmp_cwd / Path(bkg_fname)) + + return bkg_fname + + +def test_make_wfss_ref_bkg(make_wfss_ref_bkg): + bkg_model = datamodels.open(make_wfss_ref_bkg) + bkg = bkg_model.data + import matplotlib.pyplot as plt + plt.imshow(bkg, origin='lower') + plt.show() + + +filter_list = ['F250M', 'F277W', 'F335M', 'F356W', 'F460M', + 'F356W', 'F410M', 'F430M', 'F444W'] # + ['F480M', 'F322W2', 'F300M'] + + +@pytest.mark.parametrize("pupils", ['GRISMC', 'GRISMR']) +@pytest.mark.parametrize("filters", filter_list) +@pytest.mark.parametrize("detectors", ['NRCALONG', 'NRCBLONG']) +def test_nrc_wfss_background(tmp_cwd, filters, pupils, detectors, make_wfss_datamodel, bkg_file): + """Test background subtraction for NIRCAM WFSS modes.""" + data = make_wfss_datamodel + + data.meta.instrument.filter = filters + data.meta.instrument.pupil = pupils + data.meta.instrument.detector = detectors + data.meta.instrument.channel = 'LONG' + data.meta.instrument.name = 'NIRCAM' + data.meta.exposure.type = 'NRC_WFSS' + + if data.meta.instrument.detector == 'NRCALONG': + data.meta.instrument.module = 'A' + elif data.meta.instrument.detector == 'NRCBLONG': + data.meta.instrument.module = 'B' + + # Get References + wcs_corrected = AssignWcsStep.call(data) + wavelenrange = Step().get_reference_file(wcs_corrected, "wavelengthrange") + + # do the subtraction + result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) + + # ensure NaN fraction did not increase. Rejecting outliers during determination + # of factor should not have carried over into result. + nan_frac = np.sum(np.isnan(result.data))/result.data.size + rtol = 1/(result.data.size*INITIAL_NAN_FRACTION) + assert np.isclose(nan_frac, INITIAL_NAN_FRACTION, rtol=rtol) + + # re-mask data so "real" sources are ignored here + mask = _mask_from_source_cat(result, wavelenrange) + result.data[~mask] = np.nan + + # test that the background has been subtracted from the data to within some fraction of + # the noise in the data. There's probably a first-principles way to determine the tolerance, + # but this is ok for the purposes of this test. + tol = 0.01*np.nanstd(result.data) + assert np.isclose(np.nanmean(result.data), result.original_data_mean, atol=tol) + + +@pytest.mark.parametrize("filters", ['GR150C', 'GR150R']) +@pytest.mark.parametrize("pupils", ['F090W', 'F115W', 'F140M', 'F150W', 'F158M', 'F200W']) +def test_nis_wfss_background(filters, pupils, make_wfss_datamodel, bkg_file): + """Test background subtraction for NIRISS WFSS modes.""" + data = make_wfss_datamodel + + data.meta.instrument.filter = filters + data.meta.instrument.pupil = pupils + data.meta.instrument.detector = 'NIS' + data.meta.instrument.name = 'NIRISS' + data.meta.exposure.type = 'NIS_WFSS' + + # Get References + wcs_corrected = AssignWcsStep.call(data) + wavelenrange = Step().get_reference_file(wcs_corrected, "wavelengthrange") + + # do the subtraction + result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) + + # ensure NaN fraction did not increase. Rejecting outliers during determination + # of factor should not have carried over into result. + nan_frac = np.sum(np.isnan(result.data))/result.data.size + rtol = 1/(result.data.size*INITIAL_NAN_FRACTION) + assert np.isclose(nan_frac, INITIAL_NAN_FRACTION, rtol=rtol) + + # re-mask data so "real" sources are ignored here + mask = _mask_from_source_cat(result, wavelenrange) + data = result.data[mask] + + tol = 0.01*np.nanstd(result.data) + assert np.isclose(np.nanmean(result.data), result.original_data_mean, atol=tol) + + +def test_sufficient_background_pixels(): + model = datamodels.ImageModel(data=np.zeros((2048, 2048)), + dq=np.zeros((2048, 2048))) + refpix_flags = pixel['DO_NOT_USE'] | pixel['REFERENCE_PIXEL'] + model.dq[:4, :] = refpix_flags + model.dq[-4:, :] = refpix_flags + model.dq[:, :4] = refpix_flags + model.dq[:, -4:] = refpix_flags + + bkg_mask = np.ones((2048, 2048), dtype=bool) + # With full array minux refpix available for bkg, should be sufficient + assert _sufficient_background_pixels(model.dq, bkg_mask) + + bkg_mask[4: -4, :] = 0 + bkg_mask[:, 4: -4] = 0 + # Now mask out entire array, mocking full source coverage of detector - + # no pixels should be available for bkg + assert not _sufficient_background_pixels(model.dq, bkg_mask) + + +def test_weighted_mean(make_wfss_datamodel, bkg_file): + + sci = make_wfss_datamodel.data + var = make_wfss_datamodel.err**2 + with datamodels.open(bkg_file) as bkg_model: + bkg = bkg_model.data + + # instantiate scaling factor computer + rescaler = _ScalingFactorComputer() + + # just get the weighted mean without iteration + factor = rescaler.err_weighted_mean(sci, bkg, var) + original_data_mean = make_wfss_datamodel.original_data_mean + expected_factor = BKG_SCALING+original_data_mean + assert np.isclose(factor, expected_factor, atol=1e-3) + + # ensure it still works after iteration + for niter in [1,2,5]: + for p in [2, 0.5, 0.1]: + rescaler = rescaler = _ScalingFactorComputer(p=p, maxiter=niter) + factor, mask_out = rescaler(sci, bkg, var) + mask_fraction = np.sum(mask_out)/mask_out.size + max_mask_fraction = p*niter*2 + INITIAL_NAN_FRACTION + + assert np.isclose(factor, expected_factor, atol=1e-3) + assert mask_fraction <= max_mask_fraction + assert mask_fraction > INITIAL_NAN_FRACTION + + # TODO: test that this works with variance stopping criterion + # TODO: test invalid inputs + # TODO: test passing in all the different options + From 3b2f9f42d430a4463ef6d43e31d9815b7930d601 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 6 Dec 2024 10:31:59 -0500 Subject: [PATCH 07/19] pass dispersion axis into scaling factor computer, add more tests --- jwst/background/background_step.py | 7 +-- jwst/background/background_sub_wfss.py | 14 +++++- jwst/background/tests/test_background_wfss.py | 45 +++++++++++++------ 3 files changed, 49 insertions(+), 17 deletions(-) diff --git a/jwst/background/background_step.py b/jwst/background/background_step.py index b7325c80bd..217b93ba83 100755 --- a/jwst/background/background_step.py +++ b/jwst/background/background_step.py @@ -2,7 +2,8 @@ from stdatamodels.jwst import datamodels from ..stpipe import Step -from . import background_sub, background_sub_wfss +from .background_sub import background_sub +from .background_sub_wfss import subtract_wfss_bkg import numpy as np __all__ = ["BackgroundStep"] @@ -60,7 +61,7 @@ def process(self, input, bkg_list): wlrange_name) # Do the background subtraction for WFSS/GRISM data - result = background_sub_wfss.subtract_wfss_bkg( + result = subtract_wfss_bkg( input_model, bkg_name, wlrange_name, self.wfss_mmag_extract) if result is None: result = input_model @@ -88,7 +89,7 @@ def process(self, input, bkg_list): break # Do the background subtraction if do_sub: - bkg_model, result = background_sub.background_sub(input_model, + bkg_model, result = background_sub(input_model, bkg_list, self.sigma, self.maxiters) diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py index 008fbc1912..a72ad90cdc 100644 --- a/jwst/background/background_sub_wfss.py +++ b/jwst/background/background_sub_wfss.py @@ -44,7 +44,18 @@ def subtract_wfss_bkg( """ bkg_ref = datamodels.open(bkg_filename) - + + # get the dispersion axis + try: + dispaxis = input_model.meta.wcsinfo.dispersion_direction + except AttributeError: + log.warning("Dispersion axis not found in input science image metadata. " + "Variance stopping criterion will have no effect for iterative " + "outlier rejection (will run until maxiter).") + dispaxis = None + rescaler_kwargs["dispersion_axis"] = dispaxis + + # get the source catalog for masking if hasattr(input_model.meta, "source_catalog"): got_catalog = True else: @@ -161,6 +172,7 @@ def __call__(self, sci, bkg, var, mask=None): rms_resid = self._compute_rms_residual(sci_sub) i += 1 + self._iters_run_last_call = i return self.err_weighted_mean(sci, bkg, var), mask diff --git a/jwst/background/tests/test_background_wfss.py b/jwst/background/tests/test_background_wfss.py index 4e8447f2d6..cc44e756d8 100644 --- a/jwst/background/tests/test_background_wfss.py +++ b/jwst/background/tests/test_background_wfss.py @@ -127,14 +127,6 @@ def bkg_file(tmp_cwd, make_wfss_datamodel, known_bkg): return bkg_fname -def test_make_wfss_ref_bkg(make_wfss_ref_bkg): - bkg_model = datamodels.open(make_wfss_ref_bkg) - bkg = bkg_model.data - import matplotlib.pyplot as plt - plt.imshow(bkg, origin='lower') - plt.show() - - filter_list = ['F250M', 'F277W', 'F335M', 'F356W', 'F460M', 'F356W', 'F410M', 'F430M', 'F444W'] # + ['F480M', 'F322W2', 'F300M'] @@ -254,7 +246,9 @@ def test_weighted_mean(make_wfss_datamodel, bkg_file): # ensure it still works after iteration for niter in [1,2,5]: for p in [2, 0.5, 0.1]: - rescaler = rescaler = _ScalingFactorComputer(p=p, maxiter=niter) + rescaler = _ScalingFactorComputer(p=p, maxiter=niter) + assert rescaler.rms_thresh == -1 #check rms_thresh=None input sets thresh properly + factor, mask_out = rescaler(sci, bkg, var) mask_fraction = np.sum(mask_out)/mask_out.size max_mask_fraction = p*niter*2 + INITIAL_NAN_FRACTION @@ -263,7 +257,32 @@ def test_weighted_mean(make_wfss_datamodel, bkg_file): assert mask_fraction <= max_mask_fraction assert mask_fraction > INITIAL_NAN_FRACTION - # TODO: test that this works with variance stopping criterion - # TODO: test invalid inputs - # TODO: test passing in all the different options - + # test that variance stopping criterion works + # tune the RMS thresh to take roughly half the iterations + # need lots of significant digits here because iterating makes little difference + # for this test case + maxiter = 10 + rms_thresh = 0.0215 + rescaler = _ScalingFactorComputer(p=0.5, dispersion_axis=1, rms_thresh=rms_thresh, maxiter=maxiter) + factor, mask_out = rescaler(sci, bkg, var) + assert rescaler._iters_run_last_call < maxiter + # ensure the final answer variance is smaller than rms_thresh + sci[mask_out] = np.nan + bkg[mask_out] = np.nan + var[mask_out] = np.nan + final_factor = rescaler.err_weighted_mean(sci, bkg, var) + final_sub = sci - final_factor*bkg + assert rescaler._compute_rms_residual(final_sub) <= rms_thresh + + # test putting mask=None works ok, and that maxiter=0 just gives you err weighted mean + rescaler = _ScalingFactorComputer(maxiter=0) + factor, mask_out = rescaler(sci, bkg, var) + assert np.all(mask_out == 0) + assert factor == rescaler.err_weighted_mean(sci, bkg, var) + + # test invalid inputs + with pytest.raises(ValueError): + rescaler = _ScalingFactorComputer(dispersion_axis=5, rms_thresh=1) + + with pytest.raises(ValueError): + rescaler = _ScalingFactorComputer(dispersion_axis=None, rms_thresh=1) From 8df0f613fb8cb46d9f5ef4110c739c2ccf519005 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 9 Dec 2024 11:31:21 -0500 Subject: [PATCH 08/19] fixed dispersion axis calculation and zero-valued variance problem --- jwst/background/background_sub_wfss.py | 18 ++++++++++++------ jwst/background/tests/test_background_wfss.py | 9 ++++++++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py index a72ad90cdc..b985d8014d 100644 --- a/jwst/background/background_sub_wfss.py +++ b/jwst/background/background_sub_wfss.py @@ -87,7 +87,7 @@ def subtract_wfss_bkg( result.data = input_model.data - subtract_this result.dq = np.bitwise_or(input_model.dq, bkg_ref.dq) - log.info(f"Average of background image subtracted = {np.nanmean(subtract_this):.5f}") + log.info(f"Average of background image subtracted = {np.nanmean(subtract_this):.3e}") bkg_ref.close() @@ -115,7 +115,7 @@ def __init__(self, p=1.0, maxiter=5, rms_thresh=None, dispersion_axis=None): """ if rms_thresh is None: rms_thresh = -1 - if (rms_thresh > 0) and (dispersion_axis not in [0,1]): + if (rms_thresh > 0) and (dispersion_axis not in [1,2]): msg = (f"Unrecognized dispersion axis {dispersion_axis}. " "Dispersion axis must be specified if rms_thresh " "is used as a stopping criterion.") @@ -177,6 +177,9 @@ def __call__(self, sci, bkg, var, mask=None): def err_weighted_mean(self, sci, bkg, var): + """Remove any var=0 values, which can happen for real data""" + mask = (var == 0) + self._update_nans(sci, bkg, var, mask) return np.nansum(sci*bkg/var) / np.nansum(bkg*bkg/var) @@ -187,13 +190,16 @@ def _update_nans(self, sci, bkg, var, mask): def _compute_rms_residual(self, sci_sub): - """Calculate the RMS of the cross-dispersion background, which is found by taking - the median along the dispersion axis""" - sci_sub_profile = np.nanmedian(sci_sub, axis=self.dispersion_axis) + """ + Calculate the RMS of the cross-dispersion background, which is found by taking + the median along the dispersion axis. + Note meta.wcsinfo.dispersion_axis is 1-indexed coming out of assign_wcs, i.e., in [1,2]. + So we need to """ + collapsing_axis = int(not bool(self.dispersion_axis - 1)) + sci_sub_profile = np.nanmedian(sci_sub, axis=collapsing_axis) return np.sqrt(np.nanmean(sci_sub_profile**2)) - def _sufficient_background_pixels(dq_array, bkg_mask, min_pixels=100): """Count number of good pixels for background use. diff --git a/jwst/background/tests/test_background_wfss.py b/jwst/background/tests/test_background_wfss.py index cc44e756d8..ac57b38fe3 100644 --- a/jwst/background/tests/test_background_wfss.py +++ b/jwst/background/tests/test_background_wfss.py @@ -233,6 +233,13 @@ def test_weighted_mean(make_wfss_datamodel, bkg_file): var = make_wfss_datamodel.err**2 with datamodels.open(bkg_file) as bkg_model: bkg = bkg_model.data + + # put 0.1% zero values in variance to ensure coverage of previous bug where zero-valued + # variances in real data caused factor = 1/np.inf = 0 + rng = np.random.default_rng(seed=42) + n_bad = int(var.size / 1000) + bad_i = rng.choice(var.size-1, n_bad) + var[np.unravel_index(bad_i, var.shape)] = 0.0 # instantiate scaling factor computer rescaler = _ScalingFactorComputer() @@ -262,7 +269,7 @@ def test_weighted_mean(make_wfss_datamodel, bkg_file): # need lots of significant digits here because iterating makes little difference # for this test case maxiter = 10 - rms_thresh = 0.0215 + rms_thresh = 0.0217 rescaler = _ScalingFactorComputer(p=0.5, dispersion_axis=1, rms_thresh=rms_thresh, maxiter=maxiter) factor, mask_out = rescaler(sci, bkg, var) assert rescaler._iters_run_last_call < maxiter From 39d2165d7e921909a0d05ff83185fef0a530bbc3 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 9 Dec 2024 11:50:01 -0500 Subject: [PATCH 09/19] fixed dispersion axis computation again --- jwst/background/background_sub_wfss.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py index b985d8014d..ec6d166b36 100644 --- a/jwst/background/background_sub_wfss.py +++ b/jwst/background/background_sub_wfss.py @@ -42,7 +42,6 @@ def subtract_wfss_bkg( result : JWST data model background-subtracted target data model """ - bkg_ref = datamodels.open(bkg_filename) # get the dispersion axis @@ -195,7 +194,7 @@ def _compute_rms_residual(self, sci_sub): the median along the dispersion axis. Note meta.wcsinfo.dispersion_axis is 1-indexed coming out of assign_wcs, i.e., in [1,2]. So we need to """ - collapsing_axis = int(not bool(self.dispersion_axis - 1)) + collapsing_axis = int(self.dispersion_axis - 1) sci_sub_profile = np.nanmedian(sci_sub, axis=collapsing_axis) return np.sqrt(np.nanmean(sci_sub_profile**2)) From 216df40fb7222bf07df67b1bf6e3874a1084b884 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 11 Dec 2024 13:03:46 -0500 Subject: [PATCH 10/19] fractional rms improvement stopping criterion --- jwst/background/background_step.py | 13 ++++- jwst/background/background_sub_wfss.py | 54 +++++++++++------- jwst/background/tests/test_background_wfss.py | 57 ++++++++++++------- 3 files changed, 83 insertions(+), 41 deletions(-) diff --git a/jwst/background/background_step.py b/jwst/background/background_step.py index 217b93ba83..03306815e5 100755 --- a/jwst/background/background_step.py +++ b/jwst/background/background_step.py @@ -20,6 +20,9 @@ class BackgroundStep(Step): sigma = float(default=3.0) # Clipping threshold maxiters = integer(default=None) # Number of clipping iterations wfss_mmag_extract = float(default=None) # WFSS minimum abmag to extract + wfss_maxiter = integer(default=5) # WFSS iterative outlier rejection max iterations + wfss_p_rms = float(default=1) # WFSS iterative outlier rejection RMS improvement threshold (percent) + wfss_p = float(default=1) # WFSS outlier percentile to reject per iteration """ # These reference files are only used for WFSS/GRISM data. @@ -61,8 +64,16 @@ def process(self, input, bkg_list): wlrange_name) # Do the background subtraction for WFSS/GRISM data + rescaler_kwargs = {"p": self.wfss_p, + "maxiter": self.wfss_maxiter, + "delta_rms_thresh": self.wfss_p_rms/100, + } result = subtract_wfss_bkg( - input_model, bkg_name, wlrange_name, self.wfss_mmag_extract) + input_model, + bkg_name, + wlrange_name, + self.wfss_mmag_extract, + rescaler_kwargs=rescaler_kwargs) if result is None: result = input_model result.meta.cal_step.back_sub = 'SKIPPED' diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py index ec6d166b36..5db477bf3c 100644 --- a/jwst/background/background_sub_wfss.py +++ b/jwst/background/background_sub_wfss.py @@ -73,6 +73,7 @@ def subtract_wfss_bkg( bkg_mask = np.ones(input_model.data.shape, dtype=bool) # compute scaling factor for the reference background image + log.info("Starting iterative outlier rejection for background subtraction.") rescaler = _ScalingFactorComputer(**rescaler_kwargs) factor, _ = rescaler(input_model.data.copy(), bkg_ref.data.copy(), @@ -95,34 +96,35 @@ def subtract_wfss_bkg( class _ScalingFactorComputer: - def __init__(self, p=1.0, maxiter=5, rms_thresh=None, dispersion_axis=None): + def __init__(self, p=1.0, maxiter=5, delta_rms_thresh=0, dispersion_axis=None): """ Parameters ---------- p: float, optional - Percentile for sigma clipping on both low and high ends, default 1.0. + Percentile for sigma clipping on both low and high ends per iteration, default 1.0. For example, with p=2.0, the middle 96% of the data is kept. maxiter: int, optional Maximum number of iterations for outlier rejection. Default 5. - rms_thresh: float, optional + delta_rms_thresh: float, optional Stopping criterion for outlier rejection; stops when the rms residuals - are less than this threshold. Default None, i.e., ignore this - and only stop at maxiter. + change by less than this fractional threshold in a single iteration. + For example, assuming delta_rms_thresh=0.1 and a residual RMS of 100 + in iteration 1, the iteration will stop if the RMS residual in iteration + 2 is greater than 90. + Default 0.0, i.e., ignore this and only stop at maxiter. dispersion_axis: int, optional The index to select the along-dispersion axis. Used to compute the RMS residual, so must be set if rms_thresh > 0. Default None. """ - if rms_thresh is None: - rms_thresh = -1 - if (rms_thresh > 0) and (dispersion_axis not in [1,2]): + if (delta_rms_thresh > 0) and (dispersion_axis not in [1,2]): msg = (f"Unrecognized dispersion axis {dispersion_axis}. " - "Dispersion axis must be specified if rms_thresh " + "Dispersion axis must be specified if delta_rms_thresh " "is used as a stopping criterion.") raise ValueError(msg) self.p = p self.maxiter = maxiter - self.rms_thresh = rms_thresh + self.delta_rms_thresh = delta_rms_thresh self.dispersion_axis = dispersion_axis @@ -153,23 +155,37 @@ def __call__(self, sci, bkg, var, mask=None): self._update_nans(sci, bkg, var, mask) # iteratively reject more and more outliers - rms_resid = np.inf i = 0 - while (i < self.maxiter) and (rms_resid > self.rms_thresh): + last_rms_resid = np.inf + while (i < self.maxiter): - # Reject outliers based on residual between sci and bkg. - # Updating the sci, var, and bkg nan values - # means they are ignored by nanpercentile in the next iteration + # compute the factor that minimizes the residuals factor = self.err_weighted_mean(sci, bkg, var) sci_sub = sci-factor*bkg + + # Check fractional improvement stopping criterion before incrementing. + # Note this never passes in iteration 0 because last_rms_resid is inf. + if self.delta_rms_thresh > 0: + rms_resid = self._compute_rms_residual(sci_sub) + fractional_diff = (last_rms_resid - rms_resid)/last_rms_resid + if fractional_diff < self.delta_rms_thresh: + msg = (f"Stopping at iteration {i}; too little improvement " + "since last iteration (hit delta_rms_thresh).") + log.info(msg) + break + last_rms_resid = rms_resid + + i += 1 + + # Reject outliers based on residual between sci and bkg. + # Updating the sci, var, and bkg nan values means that + # they are ignored by nanpercentile in the next iteration limits = np.nanpercentile(sci_sub, (self.p, 100-self.p)) mask += np.logical_or(sci_sub < limits[0], sci_sub > limits[1]) self._update_nans(sci, bkg, var, mask) - # update stop conditions - if (self.rms_thresh > 0): - rms_resid = self._compute_rms_residual(sci_sub) - i += 1 + if i >= self.maxiter: + log.info(f"Stopped at maxiter ({i}).") self._iters_run_last_call = i return self.err_weighted_mean(sci, bkg, var), mask diff --git a/jwst/background/tests/test_background_wfss.py b/jwst/background/tests/test_background_wfss.py index ac57b38fe3..e78b9f28af 100644 --- a/jwst/background/tests/test_background_wfss.py +++ b/jwst/background/tests/test_background_wfss.py @@ -14,6 +14,7 @@ BKG_SCALING = 0.123 DETECTOR_SHAPE = (2048, 2048) INITIAL_NAN_FRACTION = 1e-4 +INITIAL_OUTLIER_FRACTION = 1e-3 @pytest.fixture(scope="module") def known_bkg(): @@ -47,6 +48,14 @@ def mock_data(known_bkg): err[nan_indices] = np.nan original_data_mean = np.nanmean(data) + # add some outliers + num_outliers = int(data.size * INITIAL_OUTLIER_FRACTION) + outlier_indices = np.unravel_index(rng.choice(data.size, num_outliers), data.shape) + data[outlier_indices] = rng.normal(100, 1, num_outliers) + + data[nan_indices] = np.nan + err[nan_indices] = np.nan + # also add a small background to the data with same structure # as the known reference background to see if it will get removed data += known_bkg*BKG_SCALING @@ -55,7 +64,7 @@ def mock_data(known_bkg): @pytest.fixture(scope='module') -def make_wfss_datamodel(data_path, mock_data, known_bkg): +def make_wfss_datamodel(data_path, mock_data): """Generate WFSS Observation""" wcsinfo = { @@ -160,8 +169,7 @@ def test_nrc_wfss_background(tmp_cwd, filters, pupils, detectors, make_wfss_data # ensure NaN fraction did not increase. Rejecting outliers during determination # of factor should not have carried over into result. nan_frac = np.sum(np.isnan(result.data))/result.data.size - rtol = 1/(result.data.size*INITIAL_NAN_FRACTION) - assert np.isclose(nan_frac, INITIAL_NAN_FRACTION, rtol=rtol) + assert np.isclose(nan_frac, INITIAL_NAN_FRACTION, rtol=1e-2) # re-mask data so "real" sources are ignored here mask = _mask_from_source_cat(result, wavelenrange) @@ -170,8 +178,11 @@ def test_nrc_wfss_background(tmp_cwd, filters, pupils, detectors, make_wfss_data # test that the background has been subtracted from the data to within some fraction of # the noise in the data. There's probably a first-principles way to determine the tolerance, # but this is ok for the purposes of this test. - tol = 0.01*np.nanstd(result.data) - assert np.isclose(np.nanmean(result.data), result.original_data_mean, atol=tol) + sci = result.data.copy() + # ignore the outliers for the purposes of this test + sci[sci>50] = np.nan + tol = 0.01*np.nanstd(sci) + assert np.isclose(np.nanmean(sci), result.original_data_mean, atol=tol) @pytest.mark.parametrize("filters", ['GR150C', 'GR150R']) @@ -196,15 +207,20 @@ def test_nis_wfss_background(filters, pupils, make_wfss_datamodel, bkg_file): # ensure NaN fraction did not increase. Rejecting outliers during determination # of factor should not have carried over into result. nan_frac = np.sum(np.isnan(result.data))/result.data.size - rtol = 1/(result.data.size*INITIAL_NAN_FRACTION) - assert np.isclose(nan_frac, INITIAL_NAN_FRACTION, rtol=rtol) + assert np.isclose(nan_frac, INITIAL_NAN_FRACTION, rtol=1e-2) # re-mask data so "real" sources are ignored here mask = _mask_from_source_cat(result, wavelenrange) data = result.data[mask] - tol = 0.01*np.nanstd(result.data) - assert np.isclose(np.nanmean(result.data), result.original_data_mean, atol=tol) + # test that the background has been subtracted from the data to within some fraction of + # the noise in the data. There's probably a first-principles way to determine the tolerance, + # but this is ok for the purposes of this test. + sci = result.data.copy() + # ignore the outliers for the purposes of this test + sci[sci>50] = np.nan + tol = 0.01*np.nanstd(sci) + assert np.isclose(np.nanmean(sci), result.original_data_mean, atol=tol) def test_sufficient_background_pixels(): @@ -245,6 +261,8 @@ def test_weighted_mean(make_wfss_datamodel, bkg_file): rescaler = _ScalingFactorComputer() # just get the weighted mean without iteration + # to check it's as expected, mask outliers + sci[sci>50] = np.nan factor = rescaler.err_weighted_mean(sci, bkg, var) original_data_mean = make_wfss_datamodel.original_data_mean expected_factor = BKG_SCALING+original_data_mean @@ -254,7 +272,7 @@ def test_weighted_mean(make_wfss_datamodel, bkg_file): for niter in [1,2,5]: for p in [2, 0.5, 0.1]: rescaler = _ScalingFactorComputer(p=p, maxiter=niter) - assert rescaler.rms_thresh == -1 #check rms_thresh=None input sets thresh properly + assert rescaler.delta_rms_thresh == 0 #check rms_thresh=None input sets thresh properly factor, mask_out = rescaler(sci, bkg, var) mask_fraction = np.sum(mask_out)/mask_out.size @@ -269,17 +287,14 @@ def test_weighted_mean(make_wfss_datamodel, bkg_file): # need lots of significant digits here because iterating makes little difference # for this test case maxiter = 10 - rms_thresh = 0.0217 - rescaler = _ScalingFactorComputer(p=0.5, dispersion_axis=1, rms_thresh=rms_thresh, maxiter=maxiter) + delta_rms_thresh = 1e-4 + p = 100*INITIAL_OUTLIER_FRACTION/2 + rescaler = _ScalingFactorComputer(p=p, + dispersion_axis=1, + delta_rms_thresh=delta_rms_thresh, + maxiter=maxiter) factor, mask_out = rescaler(sci, bkg, var) assert rescaler._iters_run_last_call < maxiter - # ensure the final answer variance is smaller than rms_thresh - sci[mask_out] = np.nan - bkg[mask_out] = np.nan - var[mask_out] = np.nan - final_factor = rescaler.err_weighted_mean(sci, bkg, var) - final_sub = sci - final_factor*bkg - assert rescaler._compute_rms_residual(final_sub) <= rms_thresh # test putting mask=None works ok, and that maxiter=0 just gives you err weighted mean rescaler = _ScalingFactorComputer(maxiter=0) @@ -289,7 +304,7 @@ def test_weighted_mean(make_wfss_datamodel, bkg_file): # test invalid inputs with pytest.raises(ValueError): - rescaler = _ScalingFactorComputer(dispersion_axis=5, rms_thresh=1) + rescaler = _ScalingFactorComputer(dispersion_axis=5, delta_rms_thresh=1) with pytest.raises(ValueError): - rescaler = _ScalingFactorComputer(dispersion_axis=None, rms_thresh=1) + rescaler = _ScalingFactorComputer(dispersion_axis=None, delta_rms_thresh=1) From 964ec59fd3671970ef891f63a8ec89eac2a7b4b3 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 11 Dec 2024 13:25:16 -0500 Subject: [PATCH 11/19] update docs for iterative background sub --- docs/jwst/background_step/arguments.rst | 17 +++++++++++++++++ docs/jwst/background_step/description.rst | 8 ++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/docs/jwst/background_step/arguments.rst b/docs/jwst/background_step/arguments.rst index 576260ef66..1f1d84c0c1 100644 --- a/docs/jwst/background_step/arguments.rst +++ b/docs/jwst/background_step/arguments.rst @@ -27,3 +27,20 @@ control the sigma clipping, and are passed as arguments to the astropy Sets the minimum (faintest) magnitude limit to use when selecting sources from the WFSS source catalog, based on the value of `isophotal_abmag` in the source catalog. Defaults to ``None``. + +``--wfss_maxiter`` + Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. + Sets the maximum number of iterations allowed for iterative outlier rejection + during determination of the reference background scaling factor. Defaults to 5. + +``wfss_p_rms`` + Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. + Sets the minimum percentage difference in the RMS of the background-subtracted image + between iterations to continue the iterative outlier rejection process. + Defaults to 0, i.e., do all iterations up to ``wfss_maxiter``. + +``wfss_p`` + Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. + Sets the percentile of outliers in the data to reject on both the low and high end + per iteration during determination of the reference background scaling factor. + Defaults to 1, i.e., keep the middle 98 percent of the data. diff --git a/docs/jwst/background_step/description.rst b/docs/jwst/background_step/description.rst index cc8162f681..42294e3c90 100644 --- a/docs/jwst/background_step/description.rst +++ b/docs/jwst/background_step/description.rst @@ -89,8 +89,12 @@ Before being subtracted, the background reference image is scaled to match the signal level of the WFSS image within background (source-free) regions of the image. The scaling factor is determined based on the variance-weighted mean of the science data, i.e., ``factor = sum(sci*bkg/var) / sum(bkg*bkg/var)``. -Prior to this computation, the data are sigma-clipped according to the ratio -``sci/bkg``, rejecting pixels where that ratio is in the 1st or 99th percentile. +Outliers are rejected iteratively during determination of the scaling factor +according to the percentile threshold and stopping criteria set using the +``wfss_p``, ``wfss_p_rms``, and ``wfss_maxiter`` step arguments in order to avoid +biasing the scaling factor based on outliers. Note, however, that the final output +products do not contain any new outliers; more robust outlier detection is performed +later on in the pipeline. The locations of source spectra are determined from a source catalog (specified by the primary header keyword SCATFILE), in conjunction with a reference file From 6e2fda31e4609bd43cd50ec433bda425dad56e4f Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 11 Dec 2024 13:43:47 -0500 Subject: [PATCH 12/19] added warning filters --- jwst/background/background_sub_wfss.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py index 5db477bf3c..35697c4e73 100644 --- a/jwst/background/background_sub_wfss.py +++ b/jwst/background/background_sub_wfss.py @@ -1,5 +1,6 @@ import math import numpy as np +import warnings from stdatamodels.jwst import datamodels from stdatamodels.jwst.datamodels.dqflags import pixel @@ -75,10 +76,14 @@ def subtract_wfss_bkg( # compute scaling factor for the reference background image log.info("Starting iterative outlier rejection for background subtraction.") rescaler = _ScalingFactorComputer(**rescaler_kwargs) - factor, _ = rescaler(input_model.data.copy(), - bkg_ref.data.copy(), - input_model.err.copy()**2, - mask=~bkg_mask) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", + category=RuntimeWarning, + message="All-NaN slice encountered") + factor, _ = rescaler(input_model.data.copy(), + bkg_ref.data.copy(), + input_model.err.copy()**2, + mask=~bkg_mask) # extract the derived factor and apply it to the unmasked, non-outlier-rejected data subtract_this = factor * bkg_ref.data @@ -167,7 +172,11 @@ def __call__(self, sci, bkg, var, mask=None): # Note this never passes in iteration 0 because last_rms_resid is inf. if self.delta_rms_thresh > 0: rms_resid = self._compute_rms_residual(sci_sub) - fractional_diff = (last_rms_resid - rms_resid)/last_rms_resid + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", + category=RuntimeWarning, + message="invalid value encountered in scalar divide") + fractional_diff = (last_rms_resid - rms_resid)/last_rms_resid if fractional_diff < self.delta_rms_thresh: msg = (f"Stopping at iteration {i}; too little improvement " "since last iteration (hit delta_rms_thresh).") From 622183be28d182aa058768fd82e66b9ffc47fcb7 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 11 Dec 2024 13:55:54 -0500 Subject: [PATCH 13/19] caught some typos during self review --- docs/jwst/background_step/arguments.rst | 9 +++++---- docs/jwst/background_step/description.rst | 8 +++----- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/docs/jwst/background_step/arguments.rst b/docs/jwst/background_step/arguments.rst index 1f1d84c0c1..31f0e9cdfb 100644 --- a/docs/jwst/background_step/arguments.rst +++ b/docs/jwst/background_step/arguments.rst @@ -35,12 +35,13 @@ control the sigma clipping, and are passed as arguments to the astropy ``wfss_p_rms`` Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. - Sets the minimum percentage difference in the RMS of the background-subtracted image - between iterations to continue the iterative outlier rejection process. + If the percentage difference in the RMS of the background-subtracted image + between iterations is smaller than this value, stop the iterative outlier + rejection process. Defaults to 0, i.e., do all iterations up to ``wfss_maxiter``. ``wfss_p`` Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. Sets the percentile of outliers in the data to reject on both the low and high end - per iteration during determination of the reference background scaling factor. - Defaults to 1, i.e., keep the middle 98 percent of the data. + per iteration during determination of the reference background scaling factor + Defaults to 1, i.e., keep the middle 98 percent of the data each iteration. diff --git a/docs/jwst/background_step/description.rst b/docs/jwst/background_step/description.rst index 42294e3c90..f5b1ea8334 100644 --- a/docs/jwst/background_step/description.rst +++ b/docs/jwst/background_step/description.rst @@ -90,11 +90,9 @@ signal level of the WFSS image within background (source-free) regions of the image. The scaling factor is determined based on the variance-weighted mean of the science data, i.e., ``factor = sum(sci*bkg/var) / sum(bkg*bkg/var)``. Outliers are rejected iteratively during determination of the scaling factor -according to the percentile threshold and stopping criteria set using the -``wfss_p``, ``wfss_p_rms``, and ``wfss_maxiter`` step arguments in order to avoid -biasing the scaling factor based on outliers. Note, however, that the final output -products do not contain any new outliers; more robust outlier detection is performed -later on in the pipeline. +in order to avoid biasing the scaling factor based on outliers. The iterative +rejection process is controlled by the +``wfss_p``, ``wfss_p_rms``, and ``wfss_maxiter`` step arguments. The locations of source spectra are determined from a source catalog (specified by the primary header keyword SCATFILE), in conjunction with a reference file From 3ee9cddb7df9286fdf908466d8df29ba3499ce91 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Wed, 11 Dec 2024 14:01:47 -0500 Subject: [PATCH 14/19] fix ruff style issues --- jwst/background/background_sub_wfss.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py index 35697c4e73..e6cab8aff1 100644 --- a/jwst/background/background_sub_wfss.py +++ b/jwst/background/background_sub_wfss.py @@ -5,7 +5,7 @@ from stdatamodels.jwst import datamodels from stdatamodels.jwst.datamodels.dqflags import pixel -from ..assign_wcs.util import create_grism_bbox +from jwst.assign_wcs.util import create_grism_bbox import logging log = logging.getLogger(__name__) @@ -17,7 +17,7 @@ def subtract_wfss_bkg( bkg_filename, wl_range_name, mmag_extract=None, - rescaler_kwargs: dict = {}, + rescaler_kwargs={}, ): """Scale and subtract a background reference image from WFSS/GRISM data. From 2092a3b99bb01de45f87ad9f818d01974411bb86 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Thu, 12 Dec 2024 18:05:40 -0500 Subject: [PATCH 15/19] fixed small style things in response to review comments --- docs/jwst/background_step/arguments.rst | 4 ++-- docs/jwst/background_step/description.rst | 3 +++ jwst/background/background_step.py | 6 +++--- jwst/background/background_sub_wfss.py | 7 ++++--- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/docs/jwst/background_step/arguments.rst b/docs/jwst/background_step/arguments.rst index 31f0e9cdfb..1585a4bafb 100644 --- a/docs/jwst/background_step/arguments.rst +++ b/docs/jwst/background_step/arguments.rst @@ -33,14 +33,14 @@ control the sigma clipping, and are passed as arguments to the astropy Sets the maximum number of iterations allowed for iterative outlier rejection during determination of the reference background scaling factor. Defaults to 5. -``wfss_p_rms`` +``--wfss_p_rms`` Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. If the percentage difference in the RMS of the background-subtracted image between iterations is smaller than this value, stop the iterative outlier rejection process. Defaults to 0, i.e., do all iterations up to ``wfss_maxiter``. -``wfss_p`` +``--wfss_p`` Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. Sets the percentile of outliers in the data to reject on both the low and high end per iteration during determination of the reference background scaling factor diff --git a/docs/jwst/background_step/description.rst b/docs/jwst/background_step/description.rst index f5b1ea8334..58203168d4 100644 --- a/docs/jwst/background_step/description.rst +++ b/docs/jwst/background_step/description.rst @@ -89,6 +89,9 @@ Before being subtracted, the background reference image is scaled to match the signal level of the WFSS image within background (source-free) regions of the image. The scaling factor is determined based on the variance-weighted mean of the science data, i.e., ``factor = sum(sci*bkg/var) / sum(bkg*bkg/var)``. +This factor is equivalent to solving for the scaling constant applied to the +reference background that gives the maximum likelihood of matching +the science data. Outliers are rejected iteratively during determination of the scaling factor in order to avoid biasing the scaling factor based on outliers. The iterative rejection process is controlled by the diff --git a/jwst/background/background_step.py b/jwst/background/background_step.py index 03306815e5..bb7adbe98b 100755 --- a/jwst/background/background_step.py +++ b/jwst/background/background_step.py @@ -101,9 +101,9 @@ def process(self, input, bkg_list): # Do the background subtraction if do_sub: bkg_model, result = background_sub(input_model, - bkg_list, - self.sigma, - self.maxiters) + bkg_list, + self.sigma, + self.maxiters) result.meta.cal_step.back_sub = 'COMPLETE' if self.save_combined_background: comb_bkg_path = self.save_model(bkg_model, suffix=self.bkg_suffix, force=True) diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py index e6cab8aff1..1e9877bf22 100644 --- a/jwst/background/background_sub_wfss.py +++ b/jwst/background/background_sub_wfss.py @@ -92,7 +92,8 @@ def subtract_wfss_bkg( result.data = input_model.data - subtract_this result.dq = np.bitwise_or(input_model.dq, bkg_ref.dq) - log.info(f"Average of background image subtracted = {np.nanmean(subtract_this):.3e}") + log.info(f"Average of scaled background image = {np.nanmean(subtract_this):.3e}") + log.info(f"Scaling factor = {factor:.5e}") bkg_ref.close() @@ -215,8 +216,8 @@ def _update_nans(self, sci, bkg, var, mask): def _compute_rms_residual(self, sci_sub): """ - Calculate the RMS of the cross-dispersion background, which is found by taking - the median along the dispersion axis. + Calculate the background-subtracted RMS along the dispersion axis, which is found + by taking the median profile of the image collapsed along the cross-dispersion axis. Note meta.wcsinfo.dispersion_axis is 1-indexed coming out of assign_wcs, i.e., in [1,2]. So we need to """ collapsing_axis = int(self.dispersion_axis - 1) From 0d43f940f875e446a13b5898ad2d0bf9c4cef2a2 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 13 Dec 2024 10:54:40 -0500 Subject: [PATCH 16/19] Made top-level unit tests, removed unnecessary looping thru all filters --- jwst/background/background_sub_wfss.py | 10 +- jwst/background/tests/test_background_wfss.py | 162 ++++++++++++------ 2 files changed, 112 insertions(+), 60 deletions(-) diff --git a/jwst/background/background_sub_wfss.py b/jwst/background/background_sub_wfss.py index 1e9877bf22..dcfc7baee5 100644 --- a/jwst/background/background_sub_wfss.py +++ b/jwst/background/background_sub_wfss.py @@ -80,14 +80,14 @@ def subtract_wfss_bkg( warnings.filterwarnings("ignore", category=RuntimeWarning, message="All-NaN slice encountered") - factor, _ = rescaler(input_model.data.copy(), - bkg_ref.data.copy(), - input_model.err.copy()**2, - mask=~bkg_mask) + # copy to avoid propagating NaNs from iterative clipping into final product + sci = input_model.data.copy() + var = input_model.err.copy()**2 + bkg = bkg_ref.data.copy() + factor, _ = rescaler(sci, bkg, var, mask=~bkg_mask) # extract the derived factor and apply it to the unmasked, non-outlier-rejected data subtract_this = factor * bkg_ref.data - result = input_model.copy() result.data = input_model.data - subtract_this result.dq = np.bitwise_or(input_model.dq, bkg_ref.dq) diff --git a/jwst/background/tests/test_background_wfss.py b/jwst/background/tests/test_background_wfss.py index e78b9f28af..5cb7f2190e 100644 --- a/jwst/background/tests/test_background_wfss.py +++ b/jwst/background/tests/test_background_wfss.py @@ -6,6 +6,7 @@ from stdatamodels.jwst import datamodels from jwst.stpipe import Step from jwst.assign_wcs import AssignWcsStep +from jwst.background import BackgroundStep from jwst.background.background_sub_wfss import (subtract_wfss_bkg, _mask_from_source_cat, _sufficient_background_pixels, @@ -124,6 +125,36 @@ def make_wfss_datamodel(data_path, mock_data): return image +@pytest.fixture +def make_nrc_wfss_datamodel(make_wfss_datamodel): + """Make a NIRCAM WFSS datamodel and call AssignWCS to populate its WCS""" + data = make_wfss_datamodel.copy() + data.meta.instrument.filter = 'F250M' + data.meta.instrument.pupil = 'GRISMC' + data.meta.instrument.detector = 'NRCALONG' + data.meta.instrument.channel = 'LONG' + data.meta.instrument.name = 'NIRCAM' + data.meta.exposure.type = 'NRC_WFSS' + data.meta.instrument.module = 'A' + result = AssignWcsStep.call(data) + + return result + + +@pytest.fixture +def make_nis_wfss_datamodel(make_wfss_datamodel): + """Make a NIRISS WFSS datamodel and call AssignWCS to populate its WCS""" + data = make_wfss_datamodel.copy() + data.meta.instrument.filter = 'GR150C' + data.meta.instrument.pupil = 'F090W' + data.meta.instrument.detector = 'NIS' + data.meta.instrument.name = 'NIRISS' + data.meta.exposure.type = 'NIS_WFSS' + result = AssignWcsStep.call(data) + + return result + + @pytest.fixture() def bkg_file(tmp_cwd, make_wfss_datamodel, known_bkg): """Mock background reference file""" @@ -136,91 +167,112 @@ def bkg_file(tmp_cwd, make_wfss_datamodel, known_bkg): return bkg_fname -filter_list = ['F250M', 'F277W', 'F335M', 'F356W', 'F460M', - 'F356W', 'F410M', 'F430M', 'F444W'] # + ['F480M', 'F322W2', 'F300M'] +def shared_tests(sci, mask, original_data_mean): + """Tests that are common to all WFSS modes + Note that NaN fraction test in test_nrc_wfss_background and test_nis_wfss_background + cannot be applied to the full run tests because the background reference files contain + NaNs in some cases (specifically for NIRISS)""" + # re-mask data so "real" sources are ignored here + sci[~mask] = np.nan -@pytest.mark.parametrize("pupils", ['GRISMC', 'GRISMR']) -@pytest.mark.parametrize("filters", filter_list) -@pytest.mark.parametrize("detectors", ['NRCALONG', 'NRCBLONG']) -def test_nrc_wfss_background(tmp_cwd, filters, pupils, detectors, make_wfss_datamodel, bkg_file): - """Test background subtraction for NIRCAM WFSS modes.""" - data = make_wfss_datamodel + # test that the background has been subtracted from the data to within some fraction of + # the noise in the data. There's probably a first-principles way to determine the tolerance, + # but this is ok for the purposes of this test. + # ignore the outliers here too + sci[sci>50] = np.nan + tol = 0.01*np.nanstd(sci) + assert np.isclose(np.nanmean(sci), original_data_mean, atol=tol) - data.meta.instrument.filter = filters - data.meta.instrument.pupil = pupils - data.meta.instrument.detector = detectors - data.meta.instrument.channel = 'LONG' - data.meta.instrument.name = 'NIRCAM' - data.meta.exposure.type = 'NRC_WFSS' - if data.meta.instrument.detector == 'NRCALONG': - data.meta.instrument.module = 'A' - elif data.meta.instrument.detector == 'NRCBLONG': - data.meta.instrument.module = 'B' +def test_nrc_wfss_background(make_nrc_wfss_datamodel, bkg_file): + """Test background subtraction for NIRCAM WFSS modes.""" + data = make_nrc_wfss_datamodel.copy() # Get References - wcs_corrected = AssignWcsStep.call(data) - wavelenrange = Step().get_reference_file(wcs_corrected, "wavelengthrange") + wavelenrange = Step().get_reference_file(data, "wavelengthrange") # do the subtraction - result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) + result = subtract_wfss_bkg(data, bkg_file, wavelenrange) + sci = result.data.copy() # ensure NaN fraction did not increase. Rejecting outliers during determination # of factor should not have carried over into result. - nan_frac = np.sum(np.isnan(result.data))/result.data.size + nan_frac = np.sum(np.isnan(sci))/sci.size assert np.isclose(nan_frac, INITIAL_NAN_FRACTION, rtol=1e-2) - # re-mask data so "real" sources are ignored here + # re-compute mask to ignore "real" sources for tests mask = _mask_from_source_cat(result, wavelenrange) - result.data[~mask] = np.nan - # test that the background has been subtracted from the data to within some fraction of - # the noise in the data. There's probably a first-principles way to determine the tolerance, - # but this is ok for the purposes of this test. - sci = result.data.copy() - # ignore the outliers for the purposes of this test - sci[sci>50] = np.nan - tol = 0.01*np.nanstd(sci) - assert np.isclose(np.nanmean(sci), result.original_data_mean, atol=tol) + shared_tests(sci, mask, data.original_data_mean) -@pytest.mark.parametrize("filters", ['GR150C', 'GR150R']) -@pytest.mark.parametrize("pupils", ['F090W', 'F115W', 'F140M', 'F150W', 'F158M', 'F200W']) -def test_nis_wfss_background(filters, pupils, make_wfss_datamodel, bkg_file): +# test both filters because they have opposite dispersion directions +def test_nis_wfss_background(make_nis_wfss_datamodel, bkg_file): """Test background subtraction for NIRISS WFSS modes.""" - data = make_wfss_datamodel - - data.meta.instrument.filter = filters - data.meta.instrument.pupil = pupils - data.meta.instrument.detector = 'NIS' - data.meta.instrument.name = 'NIRISS' - data.meta.exposure.type = 'NIS_WFSS' + data = make_nis_wfss_datamodel.copy() # Get References - wcs_corrected = AssignWcsStep.call(data) - wavelenrange = Step().get_reference_file(wcs_corrected, "wavelengthrange") + wavelenrange = Step().get_reference_file(data, "wavelengthrange") # do the subtraction - result = subtract_wfss_bkg(wcs_corrected, bkg_file, wavelenrange) + result = subtract_wfss_bkg(data, bkg_file, wavelenrange) + sci = result.data.copy() # ensure NaN fraction did not increase. Rejecting outliers during determination # of factor should not have carried over into result. - nan_frac = np.sum(np.isnan(result.data))/result.data.size + nan_frac = np.sum(np.isnan(sci))/sci.size assert np.isclose(nan_frac, INITIAL_NAN_FRACTION, rtol=1e-2) - # re-mask data so "real" sources are ignored here mask = _mask_from_source_cat(result, wavelenrange) - data = result.data[mask] + shared_tests(sci, mask, data.original_data_mean) + + +# test both filters because they have opposite dispersion directions +@pytest.mark.parametrize("pupil", ["GRISMC", "GRISMR"]) +def test_nrc_wfss_full_run(pupil, make_nrc_wfss_datamodel): + """Test full run of NIRCAM WFSS background subtraction. + The residual structure in the background will not look as nice as in + test_nis_wfss_background because here it's taken from a reference file, + so the bkg has real detector imperfections + while the data is synthetic and just has a mock gradient""" + data = make_nrc_wfss_datamodel.copy() + data.meta.instrument.pupil = pupil + + # do the subtraction. set all options to ensure they are at least recognized + result = BackgroundStep.call(data, None, + wfss_maxiter=8, + wfss_p=0.5, + wfss_p_rms=0,) - # test that the background has been subtracted from the data to within some fraction of - # the noise in the data. There's probably a first-principles way to determine the tolerance, - # but this is ok for the purposes of this test. sci = result.data.copy() - # ignore the outliers for the purposes of this test - sci[sci>50] = np.nan - tol = 0.01*np.nanstd(sci) - assert np.isclose(np.nanmean(sci), result.original_data_mean, atol=tol) + # re-derive mask to ignore "real" sources for tests + wavelenrange = Step().get_reference_file(data, "wavelengthrange") + mask = _mask_from_source_cat(result, wavelenrange) + shared_tests(sci, mask, data.original_data_mean) + + +@pytest.mark.parametrize("filt", ["GR150C", "GR150R"]) +def test_nis_wfss_full_run(filt, make_nis_wfss_datamodel): + """Test full run of NIRISS WFSS background subtraction. + The residual structure in the background will not look as nice as in + test_nis_wfss_background because here it's taken from a reference file, + so the bkg has real detector imperfections + while the data is synthetic and just has a mock gradient""" + data = make_nis_wfss_datamodel.copy() + data.meta.instrument.filter = filt + + # do the subtraction. set all options to ensure they are at least recognized + result = BackgroundStep.call(data, None, + wfss_maxiter=8, + wfss_p=0.5, + wfss_p_rms=0,) + + sci = result.data.copy() + # re-derive mask to ignore "real" sources for tests + wavelenrange = Step().get_reference_file(data, "wavelengthrange") + mask = _mask_from_source_cat(result, wavelenrange) + shared_tests(sci, mask, data.original_data_mean) def test_sufficient_background_pixels(): From 380d089bdc7744019d73f3727faa7906e0320bd7 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Fri, 13 Dec 2024 12:43:46 -0500 Subject: [PATCH 17/19] made new parameter names more descriptive --- docs/jwst/background_step/arguments.rst | 4 ++-- docs/jwst/background_step/description.rst | 2 +- jwst/background/background_step.py | 8 ++++---- jwst/background/tests/test_background_wfss.py | 13 ++++++------- jwst/regtest/test_associations_standards.py | 2 +- 5 files changed, 14 insertions(+), 15 deletions(-) diff --git a/docs/jwst/background_step/arguments.rst b/docs/jwst/background_step/arguments.rst index 1585a4bafb..fbee951e6f 100644 --- a/docs/jwst/background_step/arguments.rst +++ b/docs/jwst/background_step/arguments.rst @@ -33,14 +33,14 @@ control the sigma clipping, and are passed as arguments to the astropy Sets the maximum number of iterations allowed for iterative outlier rejection during determination of the reference background scaling factor. Defaults to 5. -``--wfss_p_rms`` +``--wfss_rms_stop`` Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. If the percentage difference in the RMS of the background-subtracted image between iterations is smaller than this value, stop the iterative outlier rejection process. Defaults to 0, i.e., do all iterations up to ``wfss_maxiter``. -``--wfss_p`` +``--wfss_outlier_percent`` Only applies to Wide Field Slitless Spectroscopy (WFSS) exposures. Sets the percentile of outliers in the data to reject on both the low and high end per iteration during determination of the reference background scaling factor diff --git a/docs/jwst/background_step/description.rst b/docs/jwst/background_step/description.rst index 58203168d4..3934b1db4c 100644 --- a/docs/jwst/background_step/description.rst +++ b/docs/jwst/background_step/description.rst @@ -95,7 +95,7 @@ the science data. Outliers are rejected iteratively during determination of the scaling factor in order to avoid biasing the scaling factor based on outliers. The iterative rejection process is controlled by the -``wfss_p``, ``wfss_p_rms``, and ``wfss_maxiter`` step arguments. +``wfss_outlier_percent``, ``wfss_rms_stop``, and ``wfss_maxiter`` step arguments. The locations of source spectra are determined from a source catalog (specified by the primary header keyword SCATFILE), in conjunction with a reference file diff --git a/jwst/background/background_step.py b/jwst/background/background_step.py index bb7adbe98b..7bfe582758 100755 --- a/jwst/background/background_step.py +++ b/jwst/background/background_step.py @@ -21,8 +21,8 @@ class BackgroundStep(Step): maxiters = integer(default=None) # Number of clipping iterations wfss_mmag_extract = float(default=None) # WFSS minimum abmag to extract wfss_maxiter = integer(default=5) # WFSS iterative outlier rejection max iterations - wfss_p_rms = float(default=1) # WFSS iterative outlier rejection RMS improvement threshold (percent) - wfss_p = float(default=1) # WFSS outlier percentile to reject per iteration + wfss_rms_stop = float(default=1) # WFSS iterative outlier rejection RMS improvement threshold (percent) + wfss_outlier_percent = float(default=1) # WFSS outlier percentile to reject per iteration """ # These reference files are only used for WFSS/GRISM data. @@ -64,9 +64,9 @@ def process(self, input, bkg_list): wlrange_name) # Do the background subtraction for WFSS/GRISM data - rescaler_kwargs = {"p": self.wfss_p, + rescaler_kwargs = {"p": self.wfss_outlier_percent, "maxiter": self.wfss_maxiter, - "delta_rms_thresh": self.wfss_p_rms/100, + "delta_rms_thresh": self.wfss_rms_stop/100, } result = subtract_wfss_bkg( input_model, diff --git a/jwst/background/tests/test_background_wfss.py b/jwst/background/tests/test_background_wfss.py index 5cb7f2190e..d1219fbaa1 100644 --- a/jwst/background/tests/test_background_wfss.py +++ b/jwst/background/tests/test_background_wfss.py @@ -207,7 +207,6 @@ def test_nrc_wfss_background(make_nrc_wfss_datamodel, bkg_file): shared_tests(sci, mask, data.original_data_mean) -# test both filters because they have opposite dispersion directions def test_nis_wfss_background(make_nis_wfss_datamodel, bkg_file): """Test background subtraction for NIRISS WFSS modes.""" data = make_nis_wfss_datamodel.copy() @@ -241,9 +240,9 @@ def test_nrc_wfss_full_run(pupil, make_nrc_wfss_datamodel): # do the subtraction. set all options to ensure they are at least recognized result = BackgroundStep.call(data, None, - wfss_maxiter=8, - wfss_p=0.5, - wfss_p_rms=0,) + wfss_maxiter=3, + wfss_outlier_percent=0.5, + wfss_rms_stop=0,) sci = result.data.copy() # re-derive mask to ignore "real" sources for tests @@ -264,9 +263,9 @@ def test_nis_wfss_full_run(filt, make_nis_wfss_datamodel): # do the subtraction. set all options to ensure they are at least recognized result = BackgroundStep.call(data, None, - wfss_maxiter=8, - wfss_p=0.5, - wfss_p_rms=0,) + wfss_maxiter=3, + wfss_outlier_percent=0.5, + wfss_rms_stop=0,) sci = result.data.copy() # re-derive mask to ignore "real" sources for tests diff --git a/jwst/regtest/test_associations_standards.py b/jwst/regtest/test_associations_standards.py index 6f103385ed..42950249bd 100644 --- a/jwst/regtest/test_associations_standards.py +++ b/jwst/regtest/test_associations_standards.py @@ -90,7 +90,7 @@ def __init__( MakePars('pool_030_mir_lrs_nods_bkg'), MakePars('pool_031_mir_lrs_nonod_bkg'), MakePars('pool_032_nircam_wfss'), - MakePars('pool_034_wfss_parallel'), + MakePars('pool_034_wfss_outlier_percentarallel'), ] From 841c51603866dfa302d07c72c8e5b5e1fc526500 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 16 Dec 2024 09:17:51 -0500 Subject: [PATCH 18/19] typo fix --- jwst/regtest/test_associations_standards.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jwst/regtest/test_associations_standards.py b/jwst/regtest/test_associations_standards.py index 42950249bd..6f103385ed 100644 --- a/jwst/regtest/test_associations_standards.py +++ b/jwst/regtest/test_associations_standards.py @@ -90,7 +90,7 @@ def __init__( MakePars('pool_030_mir_lrs_nods_bkg'), MakePars('pool_031_mir_lrs_nonod_bkg'), MakePars('pool_032_nircam_wfss'), - MakePars('pool_034_wfss_outlier_percentarallel'), + MakePars('pool_034_wfss_parallel'), ] From 8baec7e3b9c150ebad6cde53af427d26fdc7b001 Mon Sep 17 00:00:00 2001 From: Ned Molter Date: Mon, 16 Dec 2024 16:19:44 -0500 Subject: [PATCH 19/19] update spec to make delta_rms_thresh have correct default --- jwst/background/background_step.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jwst/background/background_step.py b/jwst/background/background_step.py index 7bfe582758..a63d55ac85 100755 --- a/jwst/background/background_step.py +++ b/jwst/background/background_step.py @@ -21,7 +21,7 @@ class BackgroundStep(Step): maxiters = integer(default=None) # Number of clipping iterations wfss_mmag_extract = float(default=None) # WFSS minimum abmag to extract wfss_maxiter = integer(default=5) # WFSS iterative outlier rejection max iterations - wfss_rms_stop = float(default=1) # WFSS iterative outlier rejection RMS improvement threshold (percent) + wfss_rms_stop = float(default=0) # WFSS iterative outlier rejection RMS improvement threshold (percent) wfss_outlier_percent = float(default=1) # WFSS outlier percentile to reject per iteration """