Update 3.3.x for release candidate 4 (#1164)

* Convert the masked values/empty strings ("") and nans to a numeric indicator (#1162) in the output catalogs so the catalogs can more easily be ingested into a database. * Add WCS keywords to catalog metadata (#1160) * Improve logic for reporting nmatches (#1157) * Improve logic for reporting nmatches * Update nmatches in hlet file Co-authored-by: mdlpstsci <[email protected]>
spacetelescope · Oct 15, 2021 · 9033060 · 9033060
1 parent b163ad6
commit 9033060
Show file tree

Hide file tree

Showing 5 changed files with 108 additions and 22 deletions.
diff --git a/drizzlepac/align.py b/drizzlepac/align.py
@@ -696,18 +696,28 @@ def determine_fit_quality(imglist, filtered_table, catalogs_remaining, align_par
     do_consistency_check = align_pars['determine_fit_quality'].get('consistency_check', True)
 
     for item in imglist:
-        if item.meta['fit_info']['status'].startswith('FAILED') is False:
+        if item.meta['fit_info']['status'].startswith('REFERENCE'):
+            # Reference will never be modified, so set shifts to (0., 0.)
+            # so that there are entries for all images.
+            xshifts.append(0.0)
+            yshifts.append(0.0)
+            continue
+        if not item.meta['fit_info']['status'].startswith('FAILED'):
             xshifts.append(item.meta['fit_info']['shift'][0])
             yshifts.append(item.meta['fit_info']['shift'][1])
+        else:
+            # Fit not successful, so no shifts to collate
+            break
 
     for item in imglist:
         image_name = item.meta['name']
         chip_num = item.meta['chip']
-        fitgeom = item.meta['fit_info']['fitgeom'] if 'fitgeom' in item.meta['fit_info'] else 'rscale'
+        fit_info = item.meta['fit_info']
+        fitgeom = fit_info['fitgeom'] if 'fitgeom' in fit_info else 'rscale'
 
         log.debug("\n{}\n".format("-"*40))
         log.debug("FIT being evaluated for {}".format(image_name))
-        log.debug(item.meta['fit_info'])
+        log.debug(fit_info)
         log.debug("\n{}\n".format("-"*40))
 
         # Build fit_status_dict entry
@@ -719,16 +729,20 @@ def determine_fit_quality(imglist, filtered_table, catalogs_remaining, align_par
                                      'reason': ""}
 
         # Handle fitting failures (no matches found or any other failure in fit)
-        if item.meta['fit_info']['status'].startswith("FAILED") is True:
+        if fit_info['status'].startswith("FAILED"):
             log.warning("Alignment FAILED for {} - no processing done.".format(image_name))
             overall_valid = False
             continue
-        fit_rms_val = item.meta['fit_info']['FIT_RMS']
-        max_rms_val = item.meta['fit_info']['TOTAL_RMS']
-        # fit_rms_ra = item.meta['fit_info']['RMS_RA']
-        # fit_rms_dec = item.meta['fit_info']['RMS_DEC']
+        if fit_info['status'].startswith("REFERENCE") or 'FIT_RMS' not in fit_info:
+            # No fit information available for reference image
+            continue
+
+        fit_rms_val = fit_info['FIT_RMS']
+        max_rms_val = fit_info['TOTAL_RMS']
+        # fit_rms_ra = fit_info['RMS_RA']
+        # fit_rms_dec = fit_info['RMS_DEC']
         # rms_ratio = abs(fit_rms_ra - fit_rms_dec) / min(fit_rms_ra, fit_rms_dec)
-        num_xmatches = item.meta['fit_info']['nmatches']
+        num_xmatches = fit_info['nmatches']
         fit_status_dict[dict_key]['max_rms'] = max_rms_val
         fit_status_dict[dict_key]['num_matches'] = num_xmatches
 
@@ -756,8 +770,8 @@ def determine_fit_quality(imglist, filtered_table, catalogs_remaining, align_par
 
         radial_offset_check = False
         radial_offset = math.sqrt(
-            float(item.meta['fit_info']['shift'][0])**2 +
-            float(item.meta['fit_info']['shift'][1])**2) * item.wcs.pscale  # radial offset in arssec
+            float(fit_info['shift'][0])**2 +
+            float(fit_info['shift'][1])**2) * item.wcs.pscale  # radial offset in arssec
         # Without the '+2', this will always fail for xmatches < 3 regardless of how small
         # the offset is:  2*0.36 == 0.72 vs 0.8 + 0 [perfect alignment]
         # Adding 2 allows low offset solutions with only 1 or 2 sources to pass this check.
@@ -775,7 +789,7 @@ def determine_fit_quality(imglist, filtered_table, catalogs_remaining, align_par
 
         consistency_check = True
         if do_consistency_check:
-            rms_limit = max(item.meta['fit_info']['TOTAL_RMS'], 10.)
+            rms_limit = max(fit_info['TOTAL_RMS'], 10.)
             if not math.sqrt(np.std(np.asarray(xshifts)) ** 2 + np.std(
                              np.asarray(yshifts)) ** 2) <= (rms_limit / align_pars['determine_fit_quality']['MAS_TO_ARCSEC']) / (item.wcs.pscale):  # \
                              # or rms_ratio > MAX_RMS_RATIO:
@@ -829,7 +843,7 @@ def determine_fit_quality(imglist, filtered_table, catalogs_remaining, align_par
             log.info("chip: {}".format(item.meta['chip']))
             log.info("group_id: {}".format(item.meta['group_id']))
             for tweakwcs_info_key in log_info_keys:
-                log.info("{} : {}".format(tweakwcs_info_key, item.meta['fit_info'][tweakwcs_info_key]))
+                log.info("{} : {}".format(tweakwcs_info_key, fit_info[tweakwcs_info_key]))
             log.info("~" * 84)
             log.info("nmatches_check: {} radial_offset_check: {}"
                      " large_rms_check: {},"

diff --git a/drizzlepac/hapsequencer.py b/drizzlepac/hapsequencer.py
@@ -289,7 +289,7 @@ def create_catalog_products(total_obj_list, log_level, diagnostic_mode=False, ph
             # At this point the total product catalog contains all columns contributed
             # by each filter catalog. However, some of the columns originating in one or more of
             # the filter catalogs contain no measurements for a particular source.  Remove all
-            # rows which contain empty strings (masked values) for all measurements for all
+            # rows which contain empty strings (masked values) for *all* measurements for *all*
             # of the filter catalogs.
             for cat_type in total_product_catalogs.catalogs.keys():
                 good_rows_index = []
@@ -1243,6 +1243,13 @@ def update_active_wcs(filename, wcsname):
             # from the headerlet extension.
             try:
                 wcsutil.headerlet.restore_from_headerlet(filename, hdrname=hdrname, force=True)
+                # Update value of nmatches based on headerlet
+                log.debug("{}: Updating NMATCHES from EXT={}".format(filename, extensions[0]))
+                fhdu = fits.open(filename, mode='update')
+                for sciext in range(1, num_sci_ext+1):
+                    nm = fhdu[extensions[0]].header['nmatch'] if 'nmatch' in fhdu[extensions[0]].header else 0
+                    fhdu[(extname, sciext)].header['nmatches'] = nm
+                fhdu.close()
             except ValueError as err:
                 log.warning("Trapped ValueError - attempting recovery: {}".format(str(err)))
                 found_string = [i for i in keyword_wcs_list if wcsname == i]
@@ -1265,7 +1272,6 @@ def update_active_wcs(filename, wcsname):
     else:
         log.info("No need to update active WCS solution of {} for {} as it is already the active solution.".format(wcsname, filename))
 
-
 # ------------------------------------------------------------------------------
 
 

diff --git a/drizzlepac/haputils/align_utils.py b/drizzlepac/haputils/align_utils.py
@@ -1225,11 +1225,23 @@ def update_image_wcs_info(tweakwcs_output, headerlet_filenames=None, fit_label=N
         hdr_name = "{}_{}-hlet.fits".format(image_name.rstrip(".fits"), wcs_name)
         updatehdr.update_wcs(hdulist, sci_extn, item.wcs, wcsname=wcs_name, reusename=True)
         info = item.meta['fit_info']
-        hdulist[sci_extn].header['RMS_RA'] = info['RMS_RA'].value if info['RMS_RA'] is not None else -1.0
-        hdulist[sci_extn].header['RMS_DEC'] = info['RMS_DEC'].value if info['RMS_DEC'] is not None else -1.0
-        hdulist[sci_extn].header['CRDER1'] = info['RMS_RA'].value if info['RMS_RA'] is not None else -1.0
-        hdulist[sci_extn].header['CRDER2'] = info['RMS_DEC'].value if info['RMS_DEC'] is not None else -1.0
-        hdulist[sci_extn].header['NMATCHES'] = len(info['ref_mag']) if info['ref_mag'] is not None else -1.0
+        if info['catalog'] and info['catalog'] != '':
+            hdulist[sci_extn].header['RMS_RA'] = info['RMS_RA'].value if info['RMS_RA'] is not None else -1.0
+            hdulist[sci_extn].header['RMS_DEC'] = info['RMS_DEC'].value if info['RMS_DEC'] is not None else -1.0
+            hdulist[sci_extn].header['CRDER1'] = info['RMS_RA'].value if info['RMS_RA'] is not None else -1.0
+            hdulist[sci_extn].header['CRDER2'] = info['RMS_DEC'].value if info['RMS_DEC'] is not None else -1.0
+            hdulist[sci_extn].header['NMATCHES'] = len(info['ref_mag']) if info['ref_mag'] is not None else 0
+        else:
+            hdulist[sci_extn].header['RMS_RA'] = -1.0
+            hdulist[sci_extn].header['RMS_DEC'] = -1.0
+            hdulist[sci_extn].header['CRDER1'] = -1.0
+            hdulist[sci_extn].header['CRDER2'] = -1.0
+            hdulist[sci_extn].header['NMATCHES'] = 0
+
+        # Update value of 'nmatches' in fit_info so that this value will get
+        # used in writing out the headerlet as a file.
+        info['nmatches'] = hdulist[sci_extn].header['NMATCHES']
+
         if 'HDRNAME' in hdulist[sci_extn].header:
             del hdulist[sci_extn].header['HDRNAME']
         hdulist[sci_extn].header['HDRNAME'] = hdr_name

diff --git a/drizzlepac/haputils/astrometric_utils.py b/drizzlepac/haputils/astrometric_utils.py
@@ -1932,7 +1932,12 @@ def check_mag_corr(imglist, threshold=0.5):
             log.info("{} Magnitude correlation: {}".format(image.meta['name'], mag_corr))
             cross_match_check = True if abs(mag_corr) > threshold else False
         else:
-            cross_match_check = False
+            # Assume that if there is only one cross-match, that it is correct
+            # It may not be correct all the time, but works well for very sparse fields.
+            if len(input_mags) == 1:
+                cross_match_check = True
+            else:
+                cross_match_check = False
         mag_checks.append(cross_match_check)
 
     return mag_checks

diff --git a/drizzlepac/haputils/catalog_utils.py b/drizzlepac/haputils/catalog_utils.py
@@ -494,6 +494,13 @@ def _get_header_data(self):
         keyword_dict["photflam"] = proc_utils.find_flt_keyword(self.imghdu, "PHOTFLAM")
         keyword_dict["photplam"] = proc_utils.find_flt_keyword(self.imghdu, "PHOTPLAM")
 
+        # Include WCS keywords as well for use in updating the output catalog metadata
+        drzwcs = HSTWCS(self.imghdu, ext=1)
+        wcshdr = drzwcs.wcs2header()  # create FITS keywords with CD matrix
+        # add them to the keyword_dict preserving
+        keyword_dict['wcs'] = {}
+        keyword_dict['wcs'].update(wcshdr)
+
         return keyword_dict
 
 
@@ -836,6 +843,11 @@ def annotate_table(self, data_table, param_dict_qc, proc_type="aperture", produc
         else:
             data_table.meta["Filter 1"] = self.image.keyword_dict["filter1"]
             data_table.meta["Filter 2"] = self.image.keyword_dict["filter2"]
+
+        # Insert WCS keywords into metadata
+        # This relies on the fact that the dicts are always ordered.
+        data_table.meta.update(self.image.keyword_dict['wcs'])
+
         num_sources = len(data_table)
         data_table.meta["Number of sources"] = num_sources
 
@@ -868,7 +880,7 @@ def annotate_table(self, data_table, param_dict_qc, proc_type="aperture", produc
         data_table.meta["h17.9"] = ["  128 - Bleeding and Cosmic Rays"]
         data_table.meta["h18"] = ["#================================================================================================="]
 
-        if proc_type is "segment":
+        if proc_type == "segment":
             if self.is_big_island:
                 data_table.meta["h19"] = ["WARNING: Segmentation catalog is considered to be of poor quality due to a crowded field or large segments."]
 
@@ -1245,6 +1257,14 @@ def write_catalog(self, reject_catalogs):
         -------
         Nothing!
 
+        The total product catalog contains several columns contributed from each filter catalog.
+        However, there is no guarantee that every filter catalog contains measurements for each
+        source in the total product catalog which is to say the row is actually missing from the
+        filter product catalog.  When the catalog tables are combined in the combined_tables method,
+        the missing entries will contain masked values represented by dashes.  When these values are
+        written to output ECSV files, they are written as empty ("") strings.  In order for the catalogs
+        to be ingested into a database by possible downstream processing, the masked values will be
+        replaced by a numeric indicator.
         """
         # Insure catalog has all necessary metadata
         self.source_cat = self.annotate_table(self.source_cat, self.param_dict_qc, proc_type="aperture",
@@ -1254,6 +1274,9 @@ def write_catalog(self, reject_catalogs):
             # This will delete all rows from the existing table
             self.source_cat.remove_rows(slice(0, None))
 
+        # Fill the nans and masked values with numeric data
+        self.source_cat = fill_nans_maskvalues (self.source_cat, fill_value=-9999.9)
+
         # Write out catalog to ecsv file
         # self.source_cat.meta['comments'] = \
         #     ["NOTE: The X and Y coordinates in this table are 0-indexed (i.e. the origin is (0,0))."]
@@ -2610,6 +2633,14 @@ def write_catalog(self, reject_catalogs):
         -------
         Nothing
 
+        The total product catalog contains several columns contributed from each filter catalog.
+        However, there is no guarantee that every filter catalog contains measurements for each
+        source in the total product catalog which is to say the row is actually missing from the
+        filter product catalog.  When the catalog tables are combined in the combined_tables method,
+        the missing entries will contain masked values represented by dashes.  When these values are
+        written to output ECSV files, they are written as empty ("") strings.  In order for the catalogs
+        to be ingested into a database by possible downstream processing, the masked values will be
+        replaced by a numeric indicator.
         """
         self.source_cat = self.annotate_table(self.source_cat, self.param_dict_qc, proc_type="segment",
                                               product=self.image.ghd_product)
@@ -2618,6 +2649,9 @@ def write_catalog(self, reject_catalogs):
             # This will delete all rows from the existing table
             self.source_cat.remove_rows(slice(0, None))
 
+        # Fill the nans and masked values with numeric data
+        self.source_cat = fill_nans_maskvalues (self.source_cat, fill_value=-9999.9)
+
         # Write out catalog to ecsv file
         self.source_cat.write(self.sourcelist_filename, format=self.catalog_format)
         log.info("Wrote catalog file '{}' containing {} sources".format(self.sourcelist_filename, len(self.source_cat)))
@@ -2707,3 +2741,18 @@ def make_wht_masks(whtarr, maskarr, scale=1.5, sensitivity=0.95, kernel=(11, 11)
         limit /= scale
 
     return masks
+
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+# Utility functions supporting point and segmentation catalogs
+# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+def fill_nans_maskvalues(catalog, fill_value=-9999.9):
+
+    # Fill the masked values with fill_value - the value is truncated for int as datatype of column is known
+    catalog = catalog.filled(fill_value)
+
+    # Also fill any nan values with fill_value
+    for col in catalog.itercols():
+        np.nan_to_num(col, copy=False, nan=fill_value)
+
+    return catalog