From 5d9663373d9e2b6a2a3170f2c40ba52a98e57487 Mon Sep 17 00:00:00 2001
From: "Warren J. Hack" <hack@stsci.edu>
Date: Wed, 20 Oct 2021 16:46:12 -0400
Subject: [PATCH] Cherrypick for rc5 (#1169)

* Modified azure-pipeline.yml to use ubuntu-latest versus ubuntu-16.04 (#1166)

* Modified azure-pipeline.yml to use ubuntu-latest versus ubuntu-16.04.  This was necessary
as support for Ubuntu 16.04 was removed from Azure DevOps on 18 October 2021.  See this
URL for details: https://github.com/actions/virtual-environments/issues/3287

* Updated *all* instances of ubuntu-16.04 to ubuntu-latest.

* No longer create MVM exposure level DRC images (#1167)

* Remove unmodified input files from disk and manifest file (#1163)

* Identify and remove unmodified inputs from disk

* Apply logic in MVM classes

* Change floating point fill value from -9999.9 to -9999.0 to ease (#1165)

comparison issues.

Co-authored-by: mdlpstsci <mdelapena@stsci.edu>
---
 azure-pipelines.yml                  |  4 +-
 azure-templates.yml                  |  2 +-
 drizzlepac/hapmultisequencer.py      | 78 ++++++++++++++++------------
 drizzlepac/haputils/catalog_utils.py |  6 +--
 drizzlepac/haputils/product.py       | 55 ++++++++++++++++++++
 5 files changed, 107 insertions(+), 38 deletions(-)

diff --git a/azure-pipelines.yml b/azure-pipelines.yml
index 84bdd72e2..44cb3c6c0 100644
--- a/azure-pipelines.yml
+++ b/azure-pipelines.yml
@@ -27,7 +27,7 @@ jobs:
 
 - job: 'PEP8'
   pool:
-    vmImage: 'Ubuntu-16.04'
+    vmImage: 'ubuntu-latest'
 
   steps:
   - task: UsePythonVersion@0
@@ -45,7 +45,7 @@ jobs:
 - job: 'Publish'
   dependsOn: 'Linux'
   pool:
-    vmImage: 'Ubuntu-16.04'
+    vmImage: 'ubuntu-latest'
 
   steps:
   - task: UsePythonVersion@0
diff --git a/azure-templates.yml b/azure-templates.yml
index 0e0fdd0bc..b6149286b 100644
--- a/azure-templates.yml
+++ b/azure-templates.yml
@@ -6,7 +6,7 @@ jobs:
     ${{ if eq(parameters.os, 'macos') }}:
       vmImage: macOS-10.14
     ${{ if eq(parameters.os, 'linux') }}:
-      vmImage: ubuntu-16.04
+      vmImage: ubuntu-latest
 
   strategy:
     matrix:
diff --git a/drizzlepac/hapmultisequencer.py b/drizzlepac/hapmultisequencer.py
index ac1deaf15..f90ba6b08 100644
--- a/drizzlepac/hapmultisequencer.py
+++ b/drizzlepac/hapmultisequencer.py
@@ -197,11 +197,6 @@ def create_drizzle_products(total_obj_list, custom_limits=None):
         # Add individual single input images with updated WCS headers to manifest
         for exposure_obj in filt_obj.edp_list:
             product_list.append(exposure_obj.full_filename)
-            # Create Drizzled images for each input on SkyCell pixels
-            exposure_obj.wcs_drizzle_product(meta_wcs)
-            # Add drizzled FLC images to manifest
-            product_list.append(exposure_obj.drizzle_filename)
-            product_list.append(exposure_obj.trl_filename)
 
     # Ensure that all drizzled products have headers that are to specification
     try:
@@ -231,7 +226,7 @@ def create_drizzle_products(total_obj_list, custom_limits=None):
 # ----------------------------------------------------------------------------------------------------------------------
 
 
-def run_mvm_processing(input_filename, skip_gaia_alignment=False, diagnostic_mode=False,
+def run_mvm_processing(input_filename, skip_gaia_alignment=True, diagnostic_mode=False,
                        use_defaults_configs=True, input_custom_pars_file=None, output_custom_pars_file=None,
                        phot_mode="both", custom_limits=None, output_file_prefix=None,
                        log_level=logutil.logging.INFO):
@@ -420,6 +415,20 @@ def run_mvm_processing(input_filename, skip_gaia_alignment=False, diagnostic_mod
         # 9: Compare results to HLA classic counterparts (if possible)
         # if diagnostic_mode:
             # run_sourcelist_comparison(total_obj_list, diagnostic_mode=diagnostic_mode, log_level=log_level)
+        # If we are running in diagnostic_mode, we want to see all inputs
+        del_files = []
+        # for each total product...
+        for tot_obj in total_obj_list:
+            # get the list of unmodified files and delete those files from disk
+            del_files.extend(tot_obj.verify_members(clean=not diagnostic_mode))
+
+        # Now remove those files from the manifest file
+        for f in del_files:
+            # Just in case something unexpected happened, check that
+            # unmodified file filename is still in product_list
+            if f in product_list:
+                # Remove filename from manifest file input
+                product_list.remove(f)
 
         # Insure manifest file does not contain duplicate entries
         # Use of numpy.unique preserves the order of the entries in the product list
@@ -461,7 +470,7 @@ def run_mvm_processing(input_filename, skip_gaia_alignment=False, diagnostic_mod
 
 def run_align_to_gaia(total_obj_list, custom_limits=None, log_level=logutil.logging.INFO, diagnostic_mode=False):
     # Run align.py on all input images sorted by overlap with GAIA bandpass
-    log.info("\n{}: Align the all filters to GAIA with the same fit".format(str(datetime.datetime.now())))
+    log.info("\n{}: Align all the filters to GAIA with the same fit".format(str(datetime.datetime.now())))
     gaia_obj = None
     # Start by creating a FilterProduct instance which includes ALL input exposures
     for tot_obj in total_obj_list:
@@ -475,35 +484,40 @@ def run_align_to_gaia(total_obj_list, custom_limits=None, log_level=logutil.logg
                 gaia_obj.configobj_pars = tot_obj.configobj_pars
             gaia_obj.add_member(exp_obj)
 
-        log.info("\n{}: Combined all filter objects in gaia_obj".format(str(datetime.datetime.now())))
-
-        # Now, perform alignment to GAIA with 'match_relative_fit' across all inputs
-        # Need to start with one filt_obj.align_table instance as gaia_obj.align_table
-        #  - append imglist from each filt_obj.align_table to the gaia_obj.align_table.imglist
-        #  - reset group_id for all members of gaia_obj.align_table.imglist to the unique incremental values
-        #  - run gaia_obj.align_table.perform_fit() with 'match_relative_fit' only
-        #  - migrate updated WCS solutions to exp_obj instances, if necessary (probably not?)
-        #  - re-run tot_obj.generate_metawcs() method to recompute total object meta_wcs based on updated
-        #    input exposure's WCSs
-        catalog_list = [gaia_obj.configobj_pars.pars['alignment'].pars_multidict['all']['run_align']['catalog_list'][0]]  # For now, just pass in a single catalog name as list
-        align_table, filt_exposures = gaia_obj.align_to_gaia(catalog_list=catalog_list,
-                                                             output=diagnostic_mode,
-                                                             fit_label='MVM')
+    log.info("\n{}: Combined all filter objects in gaia_obj".format(str(datetime.datetime.now())))
+
+    # Now, perform alignment to GAIA with 'match_relative_fit' across all inputs
+    # Need to start with one filt_obj.align_table instance as gaia_obj.align_table
+    #  - append imglist from each filt_obj.align_table to the gaia_obj.align_table.imglist
+    #  - reset group_id for all members of gaia_obj.align_table.imglist to the unique incremental values
+    #  - run gaia_obj.align_table.perform_fit() with 'match_relative_fit' only
+    #  - migrate updated WCS solutions to exp_obj instances, if necessary (probably not?)
+    #  - re-run tot_obj.generate_metawcs() method to recompute total object meta_wcs based on updated
+    #    input exposure's WCSs
+    catalog_list = [gaia_obj.configobj_pars.pars['alignment'].pars_multidict['all']['run_align']['catalog_list'][0]]  # For now, just pass in a single catalog name as list
+    align_table, filt_exposures = gaia_obj.align_to_gaia(catalog_list=catalog_list,
+                                                         output=diagnostic_mode,
+                                                         fit_label='MVM')
 
-        for tot_obj in total_obj_list:
-            _ = tot_obj.generate_metawcs(custom_limits=custom_limits)
-        log.info("\n{}: Finished aligning gaia_obj to GAIA".format(str(datetime.datetime.now())))
+    for tot_obj in total_obj_list:
+        _ = tot_obj.generate_metawcs(custom_limits=custom_limits)
+    log.info("\n{}: Finished aligning gaia_obj to GAIA".format(str(datetime.datetime.now())))
 
-        # Return the name of the alignment catalog
-        if align_table is None:
-            gaia_obj.refname = None
+    # Return the name of the alignment catalog
+    if align_table is None:
+        gaia_obj.refname = None
+    else:
+        # update all input exposures with attribute to indicate their WCS has been modified
+        for tot_obj in total_obj_list:
+            for exp_obj in tot_obj.edp_list:
+                exp_obj.input_updated = True
 
-        return gaia_obj.refname
+    return gaia_obj.refname
 
-        #
-        # Composite WCS fitting should be done at this point so that all exposures have been fit to GAIA at
-        # the same time (on the same frame)
-        #
+    #
+    # Composite WCS fitting should be done at this point so that all exposures have been fit to GAIA at
+    # the same time (on the same frame)
+    #
 
 # ----------------------------------------------------------------------------------------------------------------------
 
diff --git a/drizzlepac/haputils/catalog_utils.py b/drizzlepac/haputils/catalog_utils.py
index e715e09ec..82047c590 100755
--- a/drizzlepac/haputils/catalog_utils.py
+++ b/drizzlepac/haputils/catalog_utils.py
@@ -1275,7 +1275,7 @@ def write_catalog(self, reject_catalogs):
             self.source_cat.remove_rows(slice(0, None))
 
         # Fill the nans and masked values with numeric data
-        self.source_cat = fill_nans_maskvalues (self.source_cat, fill_value=-9999.9)
+        self.source_cat = fill_nans_maskvalues (self.source_cat, fill_value=-9999.0)
 
         # Write out catalog to ecsv file
         # self.source_cat.meta['comments'] = \
@@ -2650,7 +2650,7 @@ def write_catalog(self, reject_catalogs):
             self.source_cat.remove_rows(slice(0, None))
 
         # Fill the nans and masked values with numeric data
-        self.source_cat = fill_nans_maskvalues (self.source_cat, fill_value=-9999.9)
+        self.source_cat = fill_nans_maskvalues (self.source_cat, fill_value=-9999.0)
  
         # Write out catalog to ecsv file
         self.source_cat.write(self.sourcelist_filename, format=self.catalog_format)
@@ -2746,7 +2746,7 @@ def make_wht_masks(whtarr, maskarr, scale=1.5, sensitivity=0.95, kernel=(11, 11)
 # Utility functions supporting point and segmentation catalogs
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 
-def fill_nans_maskvalues(catalog, fill_value=-9999.9):
+def fill_nans_maskvalues(catalog, fill_value=-9999.0):
 
     # Fill the masked values with fill_value - the value is truncated for int as datatype of column is known
     catalog = catalog.filled(fill_value)
diff --git a/drizzlepac/haputils/product.py b/drizzlepac/haputils/product.py
index 335f41e2f..44e5355cf 100755
--- a/drizzlepac/haputils/product.py
+++ b/drizzlepac/haputils/product.py
@@ -857,6 +857,9 @@ def __init__(self, prop_id, obset_id, instrument, detector, filename, layer, fil
         # Flag whether to use single-image CR identification with this exposure
         self.crclean = False
 
+        # Flag to indicate whether input exposure was modified (typically, with new WCS)
+        self.input_updated = False
+
         log.info("Create SkyCellExposure object:\n    {}".format(self.full_filename))
 
     def find_member(self, name):
@@ -866,6 +869,36 @@ def find_member(self, name):
         else:
             return None
 
+    def verify_member(self, clean=True):
+        """Delete member from disk if it has not been modified during processing
+
+        This method not only deletes the file from disk, but also reports the full
+        filename of that file to allow the calling routine to keep track of what
+        was deleted.
+
+        Parameter
+        ---------
+        clean : bool
+            Specify whether or not to remove file from disk.  If False,
+            leave file behind, usually for debugging purposes when
+            `diagnostic_mode` is True.
+
+        Returns
+        --------
+        full_filename : str
+            Full filename of file that was deleted, IF it was not modified.
+            This value will be an empty string if the file was modified.
+
+        """
+        del_file = ''
+        if not self.input_updated:
+            if clean and os.path.exists(self.full_filename):
+                os.remove(self.full_filename)
+            del_file = self.full_filename
+
+        return del_file
+
+
     def __getattribute__(self, name):
         if name in ["generate_footprint_mask", "generate_metawcs", "meta_wcs", "mask_kws", "mask"]:
             raise AttributeError(name)
@@ -1021,6 +1054,28 @@ def add_member(self, edp):
         self.edp_list.append(edp)
         self.new_to_layer += edp.new_process
 
+    def verify_members(self, clean=True):
+        """ Verify whether input members were modified during processing.
+
+        Parameter
+        ---------
+        clean : bool
+            Specify whether or not to remove file from disk.  If False,
+            leave file behind, usually for debugging purposes when
+            `diagnostic_mode` is True.
+
+        Returns
+        --------
+        del_files : list
+            List of input exposure full_filename values for all exposures
+            which were NOT modified during processing based on the
+            `input_updated` attribute of the `ExposureProduct`.  This will be empty
+            if all exposures had their WCS updated, for example.
+
+        """
+        del_files = [edp.verify_member(clean=clean) for edp in self.edp_list]
+        return del_files
+
     def add_all_mvm_exposures_list(self, exp_list):
         """ Add a list containing all the MVM FLT or FLC filenames, even the
             filenames for exposures which have been previously processed.