Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JP-3677: Add maximum_shower_amplitude parameter to jump step #306

Merged
merged 11 commits into from
Dec 3, 2024
2 changes: 2 additions & 0 deletions changes/307.jump.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add maximum_shower_amplitude parameter to MIRI cosmic rays showers routine
to fix accidental flagging of bright science pixels.
45 changes: 45 additions & 0 deletions src/stcal/jump/jump.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def detect_jumps(
min_diffs_single_pass=10,
mask_persist_grps_next_int=True,
persist_grps_flagged=25,
max_shower_amplitude=2
):
"""
This is the high-level controlling routine for the jump detection process.
Expand Down Expand Up @@ -220,6 +221,8 @@ def detect_jumps(
then all differences are processed at once.
min_diffs_single_pass : int
The minimum number of groups to switch to flagging all outliers in a single pass.
max_shower_amplitude : float
The maximum possible amplitude for flagged MIRI showers in DN/s

Returns
-------
Expand Down Expand Up @@ -317,6 +320,7 @@ def detect_jumps(
)
log.info("Total snowballs = %i", total_snowballs)
number_extended_events = total_snowballs

if find_showers:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As noted in another PR, the find_showers portion of the code is always invoked in both the if and else portion of the conditional. Since it is always executed unconditionally, it should be moved outside the conditional. This applies to the expand_large_events portion of the code, too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

gdq, num_showers = find_faint_extended(
data,
Expand All @@ -335,6 +339,7 @@ def detect_jumps(
ellipse_expand=extend_ellipse_expand_ratio,
num_grps_masked=grps_masked_after_shower,
max_extended_radius=max_extended_radius,
max_shower_amplitude=max_shower_amplitude
)
log.info("Total showers= %i", num_showers)
number_extended_events = num_showers
Expand Down Expand Up @@ -482,6 +487,7 @@ def detect_jumps(
)
log.info("Total snowballs = %i", total_snowballs)
number_extended_events = total_snowballs

if find_showers:
gdq, num_showers = find_faint_extended(
data,
Expand All @@ -500,6 +506,7 @@ def detect_jumps(
ellipse_expand=extend_ellipse_expand_ratio,
num_grps_masked=grps_masked_after_shower,
max_extended_radius=max_extended_radius,
max_shower_amplitude=max_shower_amplitude
)
log.info("Total showers= %i", num_showers)
number_extended_events = num_showers
Expand Down Expand Up @@ -878,6 +885,7 @@ def near_edge(jump, low_threshold, high_threshold):
)


# MIRI cosmic ray showers code
def find_faint_extended(
indata,
ingdq,
Expand All @@ -897,6 +905,7 @@ def find_faint_extended(
num_grps_masked=25,
max_extended_radius=200,
min_diffs_for_shower=10,
max_shower_amplitude=2,
):
"""
Parameters
Expand Down Expand Up @@ -931,6 +940,8 @@ def find_faint_extended(
The upper limit for the extension of saturation and jump
minimum_sigclip_groups : int
The minimum number of groups to use sigma clipping.
max_shower_amplitude : float
The maximum amplitude of shower artifacts to correct in DN/s


Returns
Expand All @@ -948,6 +959,7 @@ def find_faint_extended(
nints = data.shape[0]
ngrps = data.shape[1]
num_grps_donotuse = 0

for integ in range(nints):
for grp in range(ngrps):
if np.all(np.bitwise_and(gdq[integ, grp, :, :], donotuse_flag)):
Expand Down Expand Up @@ -1111,6 +1123,39 @@ def find_faint_extended(
num_grps_masked=num_grps_masked,
max_extended_radius=max_extended_radius
)

# Ensure that flagging showers didn't change final fluxes by more than the allowed amount
for intg in range(nints):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this loop needed? Is it possible to operate on the full data set, taking advantage of fast numpy looping, instead of slow explicit looping?

Copy link
Contributor Author

@drlaw1558 drlaw1558 Oct 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Here the worry was that calling .copy() on the entire array could be extremely memory intensive for cases where there were a large number of integrations. Looping over the integration is less efficient, but it looks like it still only takes 3 seconds even for a TSO case broken into 18 ints, 30 groups, and 1024x1032 pixels.

# Approximate pre-shower rates
tempdata = indata[intg,:,:,:].copy()
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add spaces after ,.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

# Ignore any groups flagged in the original gdq array
tempdata[ingdq[intg,:,:,:] & donotuse_flag != 0] = np.nan
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be combined into a single computation by setting invalid_flags = donotuse_flag | sat_flag | jump_flag.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add spaces after ,.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

tempdata[ingdq[intg,:,:,:] & sat_flag != 0] = np.nan
tempdata[ingdq[intg,:,:,:] & jump_flag != 0] = np.nan
# Compute group differences
diff = np.diff(tempdata,axis=0)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning, message="All-NaN")
image1 = np.nanmean(diff,axis=0)

# Approximate post-shower rates
tempdata = indata[intg,:,:,:].copy()
# Ignore any groups flagged in the shower gdq array
tempdata[gdq[intg,:,:,:] & donotuse_flag != 0] = np.nan
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add spaces after ,.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be combined into a single computation by setting invalid_flags = donotuse_flag | sat_flag | jump_flag.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done

tempdata[gdq[intg,:,:,:] & sat_flag != 0] = np.nan
tempdata[gdq[intg,:,:,:] & jump_flag != 0] = np.nan
# Compute group differences
diff = np.diff(tempdata,axis=0)
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=RuntimeWarning, message="All-NaN")
image2 = np.nanmean(diff,axis=0)

# Revert the group flags to the pre-shower flags for any pixels whose rates
# became NaN or changed by more than the amount reasonable for a real CR shower
diff = np.abs(image1 - image2)
indx = np.where((np.isfinite(diff) == False)|(diff > max_shower_amplitude))
gdq[intg,:,indx[0],indx[1]] = ingdq[intg,:,indx[0],indx[1]]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add spaces after ,.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


return gdq, total_showers

def find_first_good_group(int_gdq, do_not_use):
Expand Down
31 changes: 1 addition & 30 deletions tests/test_jump.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ def test_find_faint_extended():
jump_flag=4,
ellipse_expand=1.,
num_grps_masked=1,
max_shower_amplitude=10
)
# Check that all the expected samples in group 2 are flagged as jump and
# that they are not flagged outside
Expand All @@ -405,36 +406,6 @@ def test_find_faint_extended():
# Check that the flags are not applied in the 3rd group after the event
assert np.all(gdq[0, 4, 12:22, 14:23]) == 0

def test_find_faint_extended():
nint, ngrps, ncols, nrows = 1, 66, 5, 5
data = np.zeros(shape=(nint, ngrps, nrows, ncols), dtype=np.float32)
gdq = np.zeros_like(data, dtype=np.uint32)
pdq = np.zeros(shape=(nrows, ncols), dtype=np.uint32)
pdq[0, 0] = 1
pdq[1, 1] = 2147483648
# pdq = np.zeros(shape=(data.shape[2], data.shape[3]), dtype=np.uint8)
gain = 4
readnoise = np.ones(shape=(nrows, ncols), dtype=np.float32) * 6.0 * gain
rng = np.random.default_rng(12345)
data[0, 1:, 14:20, 15:20] = 6 * gain * 6.0 * np.sqrt(2)
data = data + rng.normal(size=(nint, ngrps, nrows, ncols)) * readnoise
gdq, num_showers = find_faint_extended(
data,
gdq,
pdq,
readnoise * np.sqrt(2),
1,
100,
snr_threshold=3,
min_shower_area=10,
inner=1,
outer=2.6,
sat_flag=2,
jump_flag=4,
ellipse_expand=1.1,
num_grps_masked=0,
)


# No shower is found because the event is identical in all ints
def test_find_faint_extended_sigclip():
Expand Down
Loading