From 909a9aaaf49e776f5cd4b20c613cb254c27f290e Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Tue, 27 Jun 2023 09:42:20 -0400 Subject: [PATCH 01/15] First cut at removing astroquery_utils --- CHANGELOG.rst | 10 +- JenkinsfileRT | 3 +- drizzlepac/align.py | 51 +- drizzlepac/haputils/astroquery_utils.py | 169 ------ drizzlepac/haputils/hapcut_utils.py | 712 ------------------------ drizzlepac/haputils/poller_utils.py | 14 +- pyproject.toml | 1 - requirements-dev.txt | 1 - tests/hap/base_classes.py | 19 +- tests/hap/template_svm_demo.py | 6 +- tests/hap/test_align.py | 2 + tests/hap/test_alignpipe_randomlist.py | 32 +- tests/hap/test_apriori.py | 1 + tests/hap/test_pipeline.py | 6 +- tests/hap/test_randomlist.py | 1 + tests/hap/test_run_svmpoller.py | 4 +- tests/hap/test_svm_canary.py | 4 +- tests/hap/test_svm_hrcsbc.py | 4 +- tests/hap/test_svm_ibqk07.py | 4 +- tests/hap/test_svm_ibyt50.py | 4 +- tests/hap/test_svm_j97e06.py | 4 +- tests/hap/test_svm_je281u.py | 4 +- tests/hap/test_svm_wfc3ir.py | 4 +- 23 files changed, 62 insertions(+), 998 deletions(-) delete mode 100644 drizzlepac/haputils/astroquery_utils.py delete mode 100644 drizzlepac/haputils/hapcut_utils.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ddb1dfb85..d0907aa56 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -18,19 +18,15 @@ number of the code change for that issue. These PRs can be viewed at: https://github.com/spacetelescope/drizzlepac/pulls -3.6.1 (unreleased) -================== +3.6.1rc0 (unreleased) +===================== - Fixed an incompatibility in the ``minmed`` code for cosmic ray rejection with the ``numpy`` version ``>=1.25``. [#1573] - -3.6.1rc0 (15-Jun-2023) -====================== - - Force the version of matplotlib to be <= 3.6.3 as the newer versions of the library cause problems with the calcloud preview generation. This - is a temporary restriction. + is a temporary restriction. [#1571] 3.6.0 (12-Jun-2023) ====================== diff --git a/JenkinsfileRT b/JenkinsfileRT index 4bf8a4cab..7e5d82bac 100644 --- a/JenkinsfileRT +++ b/JenkinsfileRT @@ -54,4 +54,5 @@ bc4.conda_packages = ['python=3.11'] // Iterate over configurations that define the (distributed) build matrix. // Spawn a host (or workdir) for each combination and run in parallel. // Also apply the job configuration defined in `jobconfig` above. -utils.run([bc1, bc3, bc4, jobconfig]) +//utils.run([bc1, bc3, bc4, jobconfig]) +utils.run([bc1, jobconfig]) diff --git a/drizzlepac/align.py b/drizzlepac/align.py index 41255bc7f..1059bc1c0 100644 --- a/drizzlepac/align.py +++ b/drizzlepac/align.py @@ -21,7 +21,6 @@ from . import util from .haputils import astrometric_utils as amutils -from .haputils import astroquery_utils as aqutils from .haputils import get_git_rev_info from .haputils import align_utils from .haputils import config_utils @@ -56,6 +55,10 @@ def check_and_get_data(input_list: list, **pars: object) -> list: :func:`haputils/astroquery_utils/retrieve_observation` to get the files through AstroQuery. + ### Need to eliminate the use of astroquery. As such, the files to be processed MUST be + ### available on disk for processing. The user is responsible for making the data available. + ### Add code to indicate files need to be on disk. MDD + Parameters ---------- input_list : list @@ -68,7 +71,7 @@ def check_and_get_data(input_list: list, **pars: object) -> list: Returns ======= total_input_list: list - list of full filenames + list of full filenames - ipppssoot names are no longer allowed. Data must be on disk. See Also ======== @@ -109,50 +112,6 @@ def check_and_get_data(input_list: list, **pars: object) -> list: suffix)) return (empty_list) - # Input is an ipppssoot (association or singleton), nine characters by definition. - # This "else" block actually downloads the data specified as ipppssoot. - elif len(input_item) == 9: - try: - if input_item not in ipppssoot_list: - input_item = input_item.lower() - # An ipppssoot of an individual file which is part of an association cannot be - # retrieved from MAST - retrieve_list = aqutils.retrieve_observation(input_item, **pars) - - # If the retrieved list is not empty, add filename(s) to the total_input_list. - # Also, update the ipppssoot_list so we do not try to download the data again. Need - # to do this since retrieve_list can be empty because (1) data cannot be acquired (error) - # or (2) data is already on disk (ok). - if retrieve_list: - total_input_list += retrieve_list - ipppssoot_list.append(input_item) - else: - # log.error('File {} cannot be retrieved from MAST.'.format(input_item)) - # return(empty_list) - log.warning('File {} cannot be retrieved from MAST.'.format(input_item)) - log.warning(f" using pars: {pars}") - # look for already downloaded ASN and related files instead - # ASN filenames are the only ones that end in a digit - if input_item[-1].isdigit(): - _asn_name = f"{input_item}_asn.fits" - if not os.path.exists(_asn_name): - _ = aqutils.retrieve_observation([f"{input_item}"], - suffix=['ASN'], - clobber=True) - _local_files = _get_asn_members(_asn_name) - if _local_files: - log.warning(f"Using local files instead:\n {_local_files}") - total_input_list.extend(_local_files) - else: - _lfiles = os.listdir() - log.error(f"No suitable files found for input {input_item}") - log.error(f" in directory with files: \n {_lfiles}") - return(total_input_list) - - except Exception: - exc_type, exc_value, exc_tb = sys.exc_info() - traceback.print_exception(exc_type, exc_value, exc_tb, file=sys.stdout) - # Only the retrieve_list files via astroquery have been put into the total_input_list thus far. # Now check candidate_list to detect or acquire the requested files from MAST via astroquery. for file in candidate_list: diff --git a/drizzlepac/haputils/astroquery_utils.py b/drizzlepac/haputils/astroquery_utils.py deleted file mode 100644 index 8f3d14148..000000000 --- a/drizzlepac/haputils/astroquery_utils.py +++ /dev/null @@ -1,169 +0,0 @@ -"""Wrappers for astroquery-related functionality""" -import shutil -import os - -try: - from astroquery.mast import Observations -except FileExistsError: - Observations = None - -import sys -from stsci.tools import logutil - -__taskname__ = 'astroquery_utils' - -product_type_dict = {'HAP': 'HAP', 'pipeline': 'DADS', 'both': ''} - -MSG_DATEFMT = '%Y%j%H%M%S' -SPLUNK_MSG_FORMAT = '%(asctime)s %(levelname)s src=%(name)s- %(message)s' -log = logutil.create_logger(__name__, level=logutil.logging.NOTSET, stream=sys.stdout, - format=SPLUNK_MSG_FORMAT, datefmt=MSG_DATEFMT) - - -def retrieve_observation(obsid, suffix=['FLC'], archive=False, clobber=False, - product_type=None): - """Simple interface for retrieving an observation from the MAST archive - - If the input obsid is for an association, it will request all members with - the specified suffixes. - - Parameters - ----------- - obsid : string or list of strings - ID or list of IDs for observations to be retrieved from the MAST archive. - Only the IPPSSOOT (rootname) of exposure or ASN needs to be provided; eg., - ib6v06060. Additionally, a wild-carded ``obsid`` can be provided to - retrieve all products for a visit; e.g., "ib6v06*". Data from multiple - ASNs, exposures or visits can be retrieved by simply providing them as a list. - - suffix : list, optional - List containing suffixes of files which should be requested from MAST. - Default value "['FLC']". - - archive : Boolean, optional - Retain copies of the downloaded files in the astroquery created - sub-directories? Default is "False". - - clobber : Boolean, optional - Download and Overwrite existing files? Default is "False". - - product_type : str, optional - Specify what type of product you want from the archive, either 'pipeline' - or 'HAP' or 'both' (default). By default, all versions of the products - processed for the requested datasets will be returned. This would include: - - - pipeline : files processed by ``runastrodriz`` to include the latest - distortion calibrations and the best possible alignment to GAIA - with ``ipppssoot_fl[tc].fits`` filenames for FLT/FLC files. - - HAP : files processed as a single visit and aligned (as possible) to GAIA - with ``hst_______fl[tc].fits`` - filenames. - - Returns - ------- - local_files : list - List of filenames - """ - local_files = [] - - if Observations is None: - log.warning("The astroquery package was not found. No files retrieved!") - return local_files - - # Query MAST for the data with an observation type of either "science" or - # "calibration" - obs_table = Observations.query_criteria(obs_id=obsid) - - # Catch the case where no files are found for download - if not obs_table: - log.info("WARNING: Query for {} returned NO RESULTS!".format(obsid)) - return local_files - - dpobs = Observations.get_product_list(obs_table) - if product_type: - ptypes = [product_type_dict[product_type] in descr for descr in dpobs['description']] - dpobs = dpobs[ptypes] - - data_products_by_id = Observations.filter_products(dpobs, - productSubGroupDescription=suffix, - extension='fits', - mrp_only=False) - - # After the filtering has been done, ensure there is still data in the - # table for download. If the table is empty, look for FLT images in lieu - # of FLC images. Only want one or the other (not both!), so just do the - # filtering again. - if not data_products_by_id: - log.info("WARNING: No FLC files found for {} - will look for FLT " - "files instead.".format(obsid)) - suffix = ['FLT'] - data_products_by_id = Observations.filter_products(dpobs, - productSubGroupDescription=suffix, - extension='fits', - mrp_only=False) - - # If still no data, then return. An exception will eventually be - # thrown in the higher level code. - if not data_products_by_id: - log.info( - "WARNING: No FLC or FLT files found for {}.".format(obsid)) - return local_files - - all_images = data_products_by_id['productFilename'].tolist() - log.info(all_images) - if not clobber: - rows_to_remove = [] - for row_idx, row in enumerate(data_products_by_id): - fname = row['productFilename'] - if os.path.isfile(fname): - log.info(fname + " already exists. File download skipped.") - rows_to_remove.append(row_idx) - data_products_by_id.remove_rows(rows_to_remove) - - # Protect against cases where all requested observations are already - # present on local disk and clobber was turned off, so there are no - # files to be downloaded. - if len(data_products_by_id) == 0: - log.warning("No new files identified to be retrieved.") - return local_files - - manifest = Observations.download_products(data_products_by_id, - mrp_only=False) - - # Protect against any other problems with finding files to retrieve based on the - # input file specification. - if not manifest: - log.warning(f"File {data_products_by_id} could not be retrieved. No files returned.") - return local_files - - if not clobber: - for rownum in rows_to_remove[::-1]: - if manifest: - manifest.insert_row(rownum, - vals=[all_images[rownum], "LOCAL", "None", "None"]) - else: - return all_images - - download_dir = None - for file, file_status in zip(manifest['Local Path'], manifest['Status']): - if file_status != "LOCAL": - # Identify what sub-directory was created by astroquery for the - # download - if download_dir is None: - download_dir = os.path.dirname(os.path.abspath(file)) - # Move or copy downloaded file to current directory - local_file = os.path.abspath(os.path.basename(file)) - if not os.path.exists(file): - continue - if archive: - shutil.copy(file, local_file) - else: - shutil.move(file, local_file) - # Record what files were downloaded and their current location - local_files.append(os.path.basename(local_file)) - else: - local_files.append(file) - if not archive: - # Remove astroquery created sub-directories - shutil.rmtree('mastDownload') - return local_files diff --git a/drizzlepac/haputils/hapcut_utils.py b/drizzlepac/haputils/hapcut_utils.py deleted file mode 100644 index e6b6c9bee..000000000 --- a/drizzlepac/haputils/hapcut_utils.py +++ /dev/null @@ -1,712 +0,0 @@ -"""The module is a high-level interface to astrocut for use with HAP SVM and MVM files.""" - -from astrocut import fits_cut -from astropy import units as u -from astropy.coordinates import SkyCoord -from astropy.io import fits -from astropy.table import Table, vstack, unique -from astropy.units.quantity import Quantity -from astroquery.mast import Observations -from drizzlepac.haputils import cell_utils as cu -from pprint import pprint -from stsci.tools import logutil - -import astrocut -import copy -import glob -import math -import numpy as np -import os -import shutil -import sys - -__taskname__ = 'hapcut_utils' - -MSG_DATEFMT = '%Y%j%H%M%S' -SPLUNK_MSG_FORMAT = '%(asctime)s %(levelname)s src=%(name)s- %(message)s' -log = logutil.create_logger("hapcut", level=logutil.logging.NOTSET, stream=sys.stdout, - filename="hapcut_utility.log", format=SPLUNK_MSG_FORMAT, datefmt=MSG_DATEFMT) - - -def mvm_id_filenames(sky_coord, cutout_size, log_level=logutil.logging.INFO): - """ - This function retrieves a table of MVM drizzled image filenames with additional - information from the archive. The user can then further cull the table to use as - input to obtain a list of files from the archive. This function will return filter-level - products. At this time, both ACS and WFC3 are searched by default. - - Parameters - ---------- - sky_coord : str or `astropy.coordinates.SkyCoord `_ object - The position around which to cutout. It may be specified as a string ("ra dec" in degrees) - or as the appropriate - `astropy.coordinates.SkyCoord `_ - object. - - cutout_size : int, array-like, `astropy.units.Quantity `_ - The size of the cutout array. If ``cutout_size`` is a scalar number or a scalar - `astropy.units.Quantity `_, - then a square cutout of ``cutout_size`` will be created. - If ``cutout_size`` has two elements, they should be in ``(ny, nx)`` order. Scalar numbers - in ``cutout_size`` are assumed to be in units of arcseconds. - `astropy.units.Quantity `_ objects - must be in angular units. - - log_level : int, optional - The desired level of verbosity in the log statements displayed on the screen and written to the - .log file. Default value is 20, or 'info'. - - Returns - ------- - final_table : `astropy.table.Table `_ object - - This utility also writes an output ECSV file version of the in-memory filtered data product table, - final_table. The output filename is in the form: - mvm_query-ra<###>d<####>-dec<##>d<####>__cutout.ecsv - (e.g., mvm_query-ra84d8208-decs69d8516_354_cutout.ecsv, where radius has been computed from the - cutout dimensions. - """ - - # set logging level to user-specified level - log.setLevel(log_level) - - # If the cutout_size is not an astropy.units.Quantity object, the scalar(s) - # are assumed to be arcseconds. The variable must be cast as a Quantity. - if not isinstance(cutout_size, Quantity): - cutout_size *= u.arcsec - cutout_size = np.atleast_1d(cutout_size) - if len(cutout_size) == 1: - cutout_size = np.repeat(cutout_size, 2) - - if not isinstance(sky_coord, SkyCoord): - sky_coord = SkyCoord(sky_coord, unit="deg") - - # From HST data, Search for the list of images based upon: coordinates, search region, data - # product type, and the instrument name (with wildcard), project (HAP), and observation - # collection (HST). Use the wildcard to get all the detectors for the instrument. Multiple - # instruments cannot be searched at the same time. Use the diagonal of the cutout to define - # the search radius for the archive. Images which fall outside the desired cutout need to - # be filtered from the solution later. - radius = math.ceil(math.sqrt(math.pow(cutout_size.value[0], 2) + math.pow(cutout_size.value[1], 2)) / 2.0) - - # Careful - the radius must be a str or Quantity - radius *= u.arcsec - log.info("Radius for query: {}.".format(radius)) - log.info("Performing query for ACS images.") - - acs_query_table = Observations.query_criteria(coordinates=sky_coord, - radius=radius, - dataproduct_type="IMAGE", - instrument_name="ACS*", - project="HAP", - obs_collection="HST") - - log.info("Performing query for WFC3 images.") - wfc3_query_table = Observations.query_criteria(coordinates=sky_coord, - radius=radius, - dataproduct_type="IMAGE", - instrument_name="WFC3*", - project="HAP", - obs_collection="HST") - - query_table = vstack([acs_query_table, wfc3_query_table]) - del acs_query_table - del wfc3_query_table - - # Catch the case where no files are found which satisfied the Query - if not query_table: - log.warning("Query for objects within {} of {} returned NO RESULTS!".format(radius, (str_ra, str_dec))) - return query_table - - # Compute the limits of the cutout region - deg_cutout_size = cutout_size.to(u.deg) - ra_min = sky_coord.ra.degree - deg_cutout_size.value[0] - ra_max = sky_coord.ra.degree + deg_cutout_size.value[0] - dec_min = sky_coord.dec.degree - deg_cutout_size.value[1] - dec_max = sky_coord.dec.degree + deg_cutout_size.value[1] - str_ra = "{:.4f}".format(sky_coord.ra.degree) - str_dec = "{:.4f}".format(sky_coord.dec.degree) - - # Filter the output as necessary to include only MVM filenames (MVM prefix: hst_skycell). - # Also, filter out images which are not actually in the requested cutout region as the - # archive search had to be done using a radius. - good_rows = [] - updated_query_table = None - for old_row in query_table: - if old_row["obs_id"].startswith("hst_skycell"): - if old_row["s_ra"] >= ra_min and old_row["s_ra"] <= ra_max and \ - old_row["s_dec"] >= dec_min and old_row["s_dec"] <= dec_max: - good_rows.append(old_row) - - # Catch the case where no files are found which satisfy the clean up criteria - if len(good_rows) == 0: - log.warning("Query for objects within cutout {} of {} returned NO RESULTS!".format(cutout_size, (str_ra, str_dec))) - return updated_query_table - - # Make the cleaned up table - updated_query_table = Table(rows=good_rows, names=query_table.colnames) - del query_table - - # Get the data product list associated with the elements of the table - log.info("Get the product list for all entries in the query table.") - dp_table = Observations.get_product_list(updated_query_table) - del updated_query_table - - # Filter on MVM drizzled products only - suffix = ["DRZ", "DRC"] - log.info("Filter the product list table for only {} filenames.".format(suffix)) - filtered_dp_table = Observations.filter_products(dp_table, - productSubGroupDescription=suffix, - extension="fits") - - if not filtered_dp_table: - log.warning("No MVM drizzle product datasets (DRZ/DRC) found within {} of {}.".format(radius, (str_ra, str_dec))) - return filtered_dp_table - del dp_table - - # Need to filter out any non-hst-skycell entries AGAIN which may have - # crept back into the list via the get_product_list() function. - good_rows = [] - output_table = None - for old_row in filtered_dp_table: - if old_row["obs_id"].startswith("hst_skycell"): - good_rows.append(old_row) - - # Catch the case where no files are found which satisfy the criteria - if len(good_rows) == 0: - log.warning("After filtering datasets there are NO RESULTS within {} of {}!".format(radius, (str_ra, str_dec))) - return output_table - - # Make the output table - output_table = Table(rows=good_rows, names=filtered_dp_table.colnames) - del filtered_dp_table - - # Finally, make sure the entries are unique - final_table = None - final_table = unique(output_table, keys="productFilename") - del output_table - - # Write the table to a file. This allows for further manipulation of - # the information before a list of filenames is distilled from the table. - # Output filename in the form: mvm_query-ra<###>d<####>-dec<##>d<####>__cutout.ecsv - # (e.g., mvm_query-ra84d9208-decs69d1483_71_cutout.ecsv), where radius has been computed from the - # cutout dimensions. - # - # Get the whole number and fractional components of the RA and Dec - ns = "s" if sky_coord.dec.degree < 0.0 else "n" - ra_whole = int(sky_coord.ra.value) - ra_frac = str(sky_coord.ra.value).split(".")[1][0:4] - dec_whole = abs(int(sky_coord.dec.value)) - dec_frac = str(sky_coord.dec.value).split(".")[1][0:4] - log.info("coords2. {} {} {}".format(sky_coord.ra.value, sky_coord.dec.value, dec_frac)) - - query_filename = "mvm_query-ra" + str(ra_whole) + "d" + ra_frac + "-dec" + ns + \ - str(dec_whole) + "d" + dec_frac + "_{:.0f}".format(radius.value) + "_cutout.ecsv" - - log.info("Writing out the MVM product list table to {}.".format(query_filename)) - log.info("Number of entries in table: {}.".format(len(final_table))) - final_table.write(query_filename, format="ascii.ecsv") - - return final_table - - -def mvm_retrieve_files(products, archive=False, clobber=False, log_level=logutil.logging.INFO): - """ - This function retrieves specified files from the archive - unless the file is found - to be locally resident on disk. Upon completion, The function returns a list of - filenames available on disk. - - Parameters - ---------- - products : `astropy.table.Table `_ object - A Table of products as returned by the mvm_id_filenames function. - - archive : Boolean, optional - Retain copies of the downloaded files in the astroquery created - sub-directories? Default is "False". - - clobber : Boolean, optional - Download and Overwrite existing files? Default is "False". - - log_level : int, optional - The desired level of verbosity in the log statements displayed on the screen and written to the - .log file. Default value is 20, or 'info'. - - Returns - ------- - local_files : list - List of filenames - - Note: Code here cribbed from retrieve_obsevation in astroquery_utils module. - """ - - # set logging level to user-specified level - log.setLevel(log_level) - - # Determine if the files of interest are already on the local disk. If so, - # remove the filename from the download list. - all_images = [] - all_images = products['productFilename'].tolist() - if not clobber: - rows_to_remove = [] - for row_idx, row in enumerate(products): - fname = row['productFilename'] - if os.path.isfile(fname): - log.info(fname + " already exists. File download skipped.") - rows_to_remove.append(row_idx) - products.remove_rows(rows_to_remove) - - # Only download files as necessary - if products: - # Actual download of products - log.info("Downloading files now...") - manifest = Observations.download_products(products, mrp_only=False) - else: - log.info("There are no files to download as they are all resident on disk.") - - # Manifest has the following columns: "Local Path", "Status", "Message", and "URL" - if not clobber: - for rownum in rows_to_remove[::-1]: - if manifest: - manifest.insert_row(rownum, - vals=[all_images[rownum], "LOCAL", "None", "None"]) - else: - return all_images - - download_dir = None - local_files = [] - for file, file_status in zip(manifest['Local Path'], manifest['Status']): - if file_status != "LOCAL": - # Identify what sub-directory was created by astroquery for the - # download - if download_dir is None: - download_dir = os.path.dirname(os.path.abspath(file)) - # Move or copy downloaded file to current directory - local_file = os.path.abspath(os.path.basename(file)) - if archive: - shutil.copy(file, local_file) - else: - shutil.move(file, local_file) - # Record what files were downloaded and their current location - local_files.append(os.path.basename(local_file)) - else: - local_files.append(file) - if not archive: - # Remove astroquery created sub-directories - shutil.rmtree('mastDownload') - - return local_files - - -def make_the_cut(input_files, sky_coord, cutout_size, output_dir=".", log_level=logutil.logging.INFO, verbose=False): - """ - This function makes the actual cut in the input MVM drizzled filter- and exposure-level FITS - files. As such it is a high-level interface for the - `astrocut.cutouts.fits_cut `_ functionality. - - Parameters - ---------- - input_files : list - List of fits image filenames from which to create cutouts. - - sky_coord : str or `astropy.coordinates.SkyCoord `_ object - The position around which to cutout. It may be specified as a string ("ra dec" in degrees) - or as the appropriate `astropy.coordinates.SkyCoord - `_ object. - - cutout_size : int, array-like, `astropy.units.Quantity `_ - The size of the cutout array. If ``cutout_size`` is a scalar number or a scalar - `astropy.units.Quantity `_, - then a square cutout of ``cutout_size`` will be created. - If ``cutout_size`` has two elements, they should be in ``(ny, nx)`` order. Scalar numbers - in ``cutout_size`` are assumed to be in units of arcseconds. - `astropy.units.Quantity `_ objects - must be in angular units. - - output_dir : str - Default value '.'. The directory where the cutout file(s) will be saved. - - log_level : int, optional - The desired level of verbosity in the log statements displayed on the screen and written to the - .log file. Default value is 20, or 'info'. - - verbose : bool - Default False. If True, additional intermediate information is printed for the underlying - `spacetelescope.astrocut `_ utilities. - - Returns - ------- - response : list - Returns a list of all the output filenames. - - - Note: For each input file designated for a cutout, there will be a corresponding output file. - Since both the SCI and WHT extensions of the input files are actually cut, individual fits files - will contain two image extensions, a SCI followed by the WHT. - - While the standard pipeline processing does not produce an MVM exposure-level drizzled - product, it is possible for a user to turn on this capability in the pipeline while performing - custom processing. As such this routine will perform cutouts of the exposure-level drizzled files. - - Each filter-level output filename will be of the form: - hst_cutout_skycell-p-ra<##>d<####>-dec<##>d<####>_instrument_detector_filter[_platescale].fits - Each exposure-level filename will be of the form: - hst_cutout_skycell-p-ra<##>d<####>-dec<##>d<####>_instrument_detector_filter[_platescale]-ipppssoo.fits - - where platescale has the value of "coarse" representing 0.12"/pixel for WFC3/IR, or there - is no platescale value present which is the default and represents a "fine" platescale of 0.04"/pixel. - - """ - - # set logging level to user-specified level - log.setLevel(log_level) - - # Set the values for fits_cut that we are not allowing the user to modify - CORRECT_WCS = False - EXTENSION = [1, 2] # SCI and WHT - OUTPUT_PREFIX = "hst_cutout_skycell-" - MEMORY_ONLY = True # This code will modify the output before it is written. - SINGLE_OUTFILE = False - - # Making sure we have an array of images - if type(input_files) == str: - input_files = [input_files] - - # Retain the fully qualified pathname for each file - input_paths = {} - for files in input_files: - input_paths[os.path.basename(files)] = os.path.dirname(files) - - # If the cutout_size is not an astropy.units.Quantity object, the scalar(s) - # are assumed to be arcseconds. The variable must be cast as a Quantity. - if not isinstance(cutout_size, Quantity): - cutout_size *= u.arcsec - - if not isinstance(sky_coord, SkyCoord): - sky_coord = SkyCoord(sky_coord, unit="deg") - - # Call the cutout workhorse - # SINGLE_OUTFILE = FALSE ==> MULTIPLE FILES: For each file cutout, there is an HDUList - # comprised of a PHDU and one or more EHDUs. The out_HDUList is then a list of HDULists. - # - # Loop over the input list so if there is an exception with a file, the - # remaining files can still be used to generate cutout images. - tmp_HDUList = [] - out_HDUList = [] - for infile in input_files: - try: - tmp_HDUList = fits_cut(infile, sky_coord, cutout_size, correct_wcs=CORRECT_WCS, - extension=EXTENSION, single_outfile=SINGLE_OUTFILE, cutout_prefix=OUTPUT_PREFIX, - output_dir=output_dir, memory_only=MEMORY_ONLY, verbose=True) - - # Copy and append the first (and it turns out the only) entry/list in the list - out_HDUList.append(copy.deepcopy(tmp_HDUList[0])) - except Exception as x_cept: - log.error("") - log.error("Exception encountered during the cutout process: {}".format(x_cept)) - log.error("No cutout files were created for file: {}.".format(infile)) - - # hst_cutout_skycell-p-ra<##>d<####>-dec<##>d<####>_instrument_detector_filter[_platescale][-ipppssoo].fits - # Get the whole number and fractional components of the RA and Dec - ra_whole = int(sky_coord.ra.value) - ra_frac = str(sky_coord.ra.value).split(".")[1][0:4] - dec_whole = abs(int(sky_coord.dec.value)) - dec_frac = str(sky_coord.dec.value).split(".")[1][0:4] - ns = "s" if sky_coord.dec.degree < 0.0 else "n" - - filename_list = [] - for HDU in out_HDUList: - - # Update only the image extensions - extlist = HDU[1:] - - # Update the EXTNAME for all of the EHDUs - for index in range(len(extlist)): - input_filename = extlist[index].header["ORIG_FLE"] - tokens = input_filename.split("_") - skycell = tokens[1].split("-")[1] - instr = tokens[2] - detector = tokens[3] - filter = tokens[4] - label_plus = tokens[5] - old_extname= extlist[index].header["O_EXT_NM"].strip().upper() - extlist[index].header["EXTNAME"] = old_extname - - # Determine if the file is WFC3/IR which has both a "fine" (default) and - # "coarse" platescale. - plate_scale = "_coarse" if label_plus.upper().find("COARSE") != -1 else "" - - # Since the multiple output cutout files can also be input to the CutoutsCombiner, - # there is some additional keyword manipulation done in the header. - # - # SCI extensions are followed by WHT extensions - when the WHT extension - # has been updated, it is time to write out the file. - if old_extname == "WHT": - - # Construct an MVM-style output filename with detector and filter - output_filename = OUTPUT_PREFIX + skycell + "-ra" + str(ra_whole) + \ - "d" + ra_frac + "-dec" + ns + str(dec_whole) + "d" + \ - dec_frac + "_" + instr + "_" + detector + "_" + filter + plate_scale + ".fits" - - # Determine if the original file were a filter-level or exposure-level MVM product - # ORIG_FLE filter-level: hst_skycell-p1253x05y09_acs_wfc_f658n_all_drc.fits - # ORIG_FLE filter-level: hst_skycell-p0081x14y15_wfc3_ir_f128n_coarse-all_drz.fits - # ORIG_FLE filter-level: hst_skycell-p0081x14y15_wfc3_ir_f128n_all_drz.fits (fine scale) - # ORIG_FLE exposure-level: hst_skycell-p0081x14y15_wfc3_ir_f128n_coarse-all-ibp505mf_drz.fits - # NOTE: Be careful of the WFC3/IR filenames which can include "coarse". - ef_discriminant = label_plus.split("-")[-1] - if ef_discriminant.upper() != "ALL": - product_type="EXPOSURE" - output_filename = output_filename.replace(".fits", "-" + ef_discriminant + ".fits") - else: - product_type="FILTER" - - # Examples of output cutout filenames: - # hst_cutout_skycell-p0081x14y15-ra84d9207-decs69d8516_wfc3_uvis_f275w.fits - # hst_cutout_skycell-p0081x14y15-ra84d9207-decs69d8516_acs_wfc_f814w-jbp505jg.fits - # hst_cutout_skycell-p0081x14y15-ra84d9207-decs69d8516_wfc3_ir_f128n_coarse.fits - # hst_cutout_skycell-p0081x14y15-ra84d9207-decs69d8516_wfc3_ir_f128n_coarse-ibp505mf.fits - cutout_path = os.path.join(output_dir, output_filename) - - log.info("Cutout FITS filename: {}".format(cutout_path)) - - # Retain some keywords written in the PHDU of the cutout file - # by the astrocut software - ra_obj = HDU[0].header["RA_OBJ"] - dec_obj = HDU[0].header["DEC_OBJ"] - - # Replace the minimal primary header written by the astrocut - # software with the primary header from the corresponding input file, - # so we can retain a lot of information from the observation - HDU[0].header = fits.getheader(os.path.join(input_paths[input_filename], input_filename)) - - # Put the new RA/DEC_OBJ keywords back - HDU[0].header["RA_OBJ"] = (ra_obj, "[deg] right ascension") - HDU[0].header["DEC_OBJ"] = (dec_obj, "[deg] declination") - - # Update PHDU FILENAME keyword with the new filename - HDU[0].header['FILENAME'] = output_filename - - # Insert the new keyword, ORIG_FLE, in the PHDU which is the - # *input* filename. This keyword is also in the EHDUs. - HDU[0].header["ORIG_FLE"] = input_filename - - output_HDUs = fits.HDUList(HDU) - output_HDUs.writeto(cutout_path, overwrite=True) - - filename_list.append(output_filename) - - # Clean up any files left by `˜astrocut.cutouts.fits_cut` - try: - cruft_filenames = glob.glob(output_dir + "/hst_skycell*_astrocut.fits") - if cruft_filenames: - for cf in cruft_filenames: - os.remove(cf) - except Exception as x_cept: - log.warning("") - log.warning("Exception encountered: {}.".format(x_cept)) - log.warning("The following residual files could not be deleted from disk. " \ - "Please delete these files to avoid confusion at your earliest convenience:") - pprint(cruft_filenames) - - return filename_list - - -def mvm_combine(cutout_files, output_dir=".", log_level=logutil.logging.INFO): - """ - This function combines multiple MVM skycell cutout images from the same detector/filter combination - to create a single view of the requested data. All of the functions in this module are designed to - work in conjunction with one another, so the cutout images should be on the user's local disk. This - task is a high-level wrapper for the - `astrocut.cutout_processing.combine - `_ functionality. - - Specifically, this routine will combine filter-level cutouts from multiple skycells, all sharing - the same detector and filter. This routine will also combine exposure-level cutouts from - multiple skycells, all sharing the same detector, filter, and ipppssoo. Images which do not - share a detector and filter with any other image will be ignored. Individual exposures from - a single skycell will also be ignored. - - Parameters - ---------- - cutout_files : list - List of fits image cutout filenames where the cutouts are presumed to have been created - with `~drizzlepac.haputils.hapcut_utils.make_the_cut`. - - output_dir : str - Default value '.' - The directory where the output combined files will be saved. - - log_level : int, optional - The desired level of verbosity in the log statements displayed on the screen and written to the - .log file. Default value is 20, or 'info'. - - """ - # def mvm_combine(cutout_files, img_combiner=None, output_dir=".", log_level=logutil.logging.INFO): - img_combiner = None - - # set logging level to user-specified level - log.setLevel(log_level) - - # Make sure the cutout_files are really a list of MULTIPLE filenames - if type(cutout_files) == str or type(cutout_files) == list and len(cutout_files) < 2: - log.error("The 'mvm_combine' function requires a list of MULTIPLE cutout filenames where" \ - " the files were generated by 'make_the_cut'.") - - # Sort the cutout filenames by detector (primary) and filter (secondary) - cutout_files.sort(key = lambda x: (x.split("_")[3], x.split("_")[4])) - - # Report the cutout files submitted for the combination process - log.info("Input cutout files:") - for cf in cutout_files: - log.info("File: {}".format(cf)) - - # Examples of input cutout filenames - # Filter-level - # hst_cutout_skycell-p0081x14y15-ra84d9207-decs69d8516_wfc3_uvis_f275w.fits - # hst_cutout_skycell-p0081x14y15-ra84d9207-decs69d8516_wfc3_ir_f128n_coarse.fits - # Exposure-level - # hst_cutout_skycell-p0081x14y15-ra84d9207-decs69d8516_acs_wfc_f814w-jbp505jg.fits - # hst_cutout_skycell-p0081x14y15-ra84d9207-decs69d8516_wfc3_ir_f128n_coarse-ibp505mf.fits - # - # Combined filter-level files will be generated for each detector/filter combination - # Combined exposure-level files will be generated for each detector/filter combination - # where the ipppssoo is the same - # - # Walk the sorted input list and create filter-level and exposure-level dictionaries - filter_dict = {} - exposure_dict = {} - for cfile in cutout_files: - - # Since the filename could be modified, open the file and read the FILENAME keyword - hdu0 = fits.getheader(cfile, ext=0) - cf = hdu0["FILENAME"].replace(".fits", "") - - # Parse to get the important information - tokens = cf.split("_") - instr = tokens[3] - detector = tokens[4] - filter = tokens[5].split("-")[0] - str_tmp = tokens[-1].split("-") - ipppssoo = "" - if len(str_tmp) > 1: - ipppssoo = str_tmp[1] - - # Based upon type of input file, filter-level or exposure-level, populate - # the appropriate dictionary - instr_det_filt_ippp = "" - instr_det_filt = "" - if ipppssoo: - instr_det_filt_ippp = instr + "_" + detector + "_" + filter + "_" + ipppssoo - if instr_det_filt_ippp not in exposure_dict: - exposure_dict[instr_det_filt_ippp] = [cfile] - else: - exposure_dict[instr_det_filt_ippp].append(cfile) - else: - instr_det_filt = instr + "_" + detector + "_" + filter - if instr_det_filt not in filter_dict: - filter_dict[instr_det_filt] = [cfile] - else: - filter_dict[instr_det_filt].append(cfile) - - # FILTER-LEVEL COMBINATION - # For each detector/filter, generate the output filename and perform the combine - log.info("") - log.info("=== Combining filter-level files ===") - __combine_cutouts(filter_dict, type="FILTER", img_combiner=img_combiner, output_dir=output_dir, log_level=log_level) - - # EXPOSURE-LEVEL COMBINATION - log.info("") - log.info("=== Combining exposure-level files ===") - __combine_cutouts(exposure_dict, type="EXPOSURE", img_combiner=img_combiner, output_dir=output_dir, log_level=log_level) - - log.info("Cutout combination is done.") - - -def __combine_cutouts(input_dict, type="FILTER", img_combiner=None, output_dir=".", log_level=logutil.logging.INFO): - """ - This private function performs the actual combine of the multiple MVM skycell cutout images. - - Parameters - ---------- - input_dict : dictionary - A dictionary where the key is the instr_detector_filter or instr_detector_filter_ipppssoo string and - the corresponding value is a list of filenames corresponding to the key. - - type : string - A string to indicate whether the input_dict variable is for a filter-level or exposure-level - dictionary - - img_combiner : func - The function to be used to combine the images - - output_dir : str - Default value '.'. The directory to save the cutout file(s) to. - - log_level : int, optional - The desired level of verbosity in the log statements displayed on the screen and written to the - .log file. Default value is 20, or 'info'. - - """ - - # set logging level to user-specified level - log.setLevel(log_level) - - # Output prefix - OUTPUT_PREFIX = "hst_combined_skycells-" - - for key, file_list in input_dict.items(): - - # If there are multiple files to combine, then do it - if len(file_list) > 1: - - # Construct the combined filename based on the first file in the list - # Example: hst_combined_skycells-ra84d9207-decs69d8516_wfc3_uvis_f275w.fits - filename = fits.getheader(file_list[0], ext=0)['FILENAME'] - fname = filename.replace(".fits", "") - sky_tokens = fname.split("_")[2].split("-") - skycell = sky_tokens[1][1:5] - ra = sky_tokens[2] - dec = sky_tokens[3] - - instr = key.split("_")[0] - detector = key.split("_")[1] - filter = key.split("_")[2] - if type.upper() == "EXPOSURE": - exposure = key.split("_")[3] - output_filename = os.path.join(output_dir, OUTPUT_PREFIX + ra + "-" + dec + "_" + \ - instr + "_" + detector + "_" + filter + "_" + exposure + ".fits") - else: - output_filename = os.path.join(output_dir, OUTPUT_PREFIX + ra + "-" + dec + "_" + \ - instr + "_" + detector + "_" + filter + ".fits") - - - # Combine the SCI and then the WHT extensions in the specified files - log.info("Combining the SCI and then the WHT extensions of the input cutout files.") - try: - combined_cutout = astrocut.CutoutsCombiner(file_list, img_combiner=img_combiner).combine(output_file=output_filename, \ - memory_only=True) - except Exception as x_cept: - log.warning("The cutout combine was not successful for files, {}, due to {}.".format(file_list, x_cept)) - log.warning("Processing continuuing on next possible set of data.") - continue - - log.info("The combined output filename is {}.\n".format(output_filename)) - - # Add the FILENAME keyword to the PHDU of the in-memory output - if output_filename.startswith("./"): - output_filename = output_filename.replace("./", "") - combined_cutout[0].header["FILENAME"] = output_filename - - # Update the EXTNAMEs of the EHDUs - combined_cutout[1].header["EXTNAME"] = "SCI" - combined_cutout[2].header["EXTNAME"] = "WHT" - - # Write out the file - combined_cutout.writeto(output_filename, overwrite=True) - - # Only a single file - else: - log.warning("There is only one file for this detector/filter[/ipppssoo] combination, so there" \ - " is nothing to combine.") - log.warning("File {} will be ignored for combination purposes.\n".format(file_list)) - diff --git a/drizzlepac/haputils/poller_utils.py b/drizzlepac/haputils/poller_utils.py index a9b0f43ce..c3fa6df4e 100755 --- a/drizzlepac/haputils/poller_utils.py +++ b/drizzlepac/haputils/poller_utils.py @@ -25,7 +25,6 @@ from drizzlepac.haputils.product import ExposureProduct, FilterProduct, TotalProduct, GrismExposureProduct from drizzlepac.haputils.product import SkyCellProduct, SkyCellExposure from . import analyze -from . import astroquery_utils as aqutils from . import processing_utils from . import cell_utils @@ -914,6 +913,11 @@ def build_poller_table(input, log_level, all_mvm_exposures=[], poller_type='svm' poller_table : Table Astropy table object with the same columns as a poller file. + ### Need to eliminate the use of astroquery. As such, the files to be processed MUST be + ### available on disk for processing. The user is responsible for making the data available. + ### The poller file must contain individual full image filenames. + ### Determine if a error message must be added. MDD + """ log.setLevel(log_level) @@ -1036,13 +1040,7 @@ def build_poller_table(input, log_level, all_mvm_exposures=[], poller_type='svm' for filename in filenames: # Look for dataset in local directory. if "asn" in filename or not os.path.exists(filename): - # This retrieval will NOT overwrite any ASN members already on local disk - # Return value will still be list of all members - files = aqutils.retrieve_observation([filename[:9]], suffix=['FLC'], clobber=False) - if len(files) == 0: - log.error("Filename {} not found in archive!!".format(filename)) - log.error("Please provide ASN filename instead!") - raise ValueError + raise FileNotFoundError(f"File {filename} not found in working directory.") else: files = [filename] datasets += files diff --git a/pyproject.toml b/pyproject.toml index 645873a23..176fa105e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,6 @@ dependencies = [ 'bokeh', 'pandas', 'spherical_geometry>=1.2.22', - 'astroquery>=0.4', 'astrocut', 'photutils>1.5.0', 'lxml', diff --git a/requirements-dev.txt b/requirements-dev.txt index 188b3bcee..a3a327812 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -2,5 +2,4 @@ git+https://github.com/astropy/photutils.git#egg=photutils git+https://github.com/spacetelescope/stsci.tools.git#egg=stsci.tools git+https://github.com/astropy/astropy.git#egg=astropy git+https://github.com/spacetelescope/stwcs.git#egg=stwcs -git+https://github.com/astropy/astroquery.git#egg=astroquery numpy>=0.0.dev0 diff --git a/tests/hap/base_classes.py b/tests/hap/base_classes.py index 44ab8f03f..9b4088d3f 100644 --- a/tests/hap/base_classes.py +++ b/tests/hap/base_classes.py @@ -8,8 +8,6 @@ from ci_watson.artifactory_helpers import get_bigdata from ci_watson.artifactory_helpers import compare_outputs -from drizzlepac.haputils.astroquery_utils import retrieve_observation - # Base classes for actual tests. # NOTE: Named in a way so pytest will not pick them up here. @pytest.mark.bigdata @@ -98,16 +96,13 @@ def get_data(self, *args, **kwargs): This will return a list of all the files downloaded with the full path to the local copy of the file. """ - if len(args[0]) == 9: # Only a rootname provided - local_files = retrieve_observation(args[0]) - else: - # If user has specified action for no_copy, apply it with - # default behavior being whatever was defined in the base class. - docopy = kwargs.get('docopy', self.docopy) - local_files = get_bigdata(*self.get_input_path(), - *args, - docopy=docopy) - local_files = [local_files] + # If user has specified action for no_copy, apply it with + # default behavior being whatever was defined in the base class. + docopy = kwargs.get('docopy', self.docopy) + local_files = get_bigdata(*self.get_input_path(), + *args, + docopy=docopy) + local_files = [local_files] return local_files diff --git a/tests/hap/template_svm_demo.py b/tests/hap/template_svm_demo.py index 8a8c5babc..dd48d5d07 100644 --- a/tests/hap/template_svm_demo.py +++ b/tests/hap/template_svm_demo.py @@ -6,11 +6,12 @@ import os import pytest -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) + """ template_svm_demo.py @@ -24,6 +25,7 @@ originating directory. """ +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM_GAIA" POLLER_FILE = "acs_e28_1u_input.out" @@ -69,10 +71,12 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) diff --git a/tests/hap/test_align.py b/tests/hap/test_align.py index 8cc161f04..0d8381ecb 100644 --- a/tests/hap/test_align.py +++ b/tests/hap/test_align.py @@ -8,6 +8,8 @@ from stsci.tools import logutil +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) + log = logutil.create_logger('test_align', level=logutil.logging.INFO, stream=sys.stdout) # Nominal acceptable RMS limit for a good solution (IMPROVE THIS) diff --git a/tests/hap/test_alignpipe_randomlist.py b/tests/hap/test_alignpipe_randomlist.py index 39dc138ec..c3a29a2c2 100644 --- a/tests/hap/test_alignpipe_randomlist.py +++ b/tests/hap/test_alignpipe_randomlist.py @@ -17,9 +17,9 @@ from stsci.tools import logutil from astropy.io import fits -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runastrodriz +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) log = logutil.create_logger('test_alignpipe_randomlist', level=logutil.logging.INFO, stream=sys.stdout) @@ -245,8 +245,6 @@ def check_disk_get_data(input_list, **pars): list of full filenames """ - reload(aqutils) - empty_list = [] retrieve_list = [] # Actual files retrieved via astroquery and resident on disk candidate_list = [] # File names gathered from *_asn.fits file @@ -306,33 +304,7 @@ def check_disk_get_data(input_list, **pars): suffix)) return (empty_list) - # Input is an ipppssoot (association or singleton), nine characters by definition. - # This "else" block actually downloads the data specified as ipppssoot. - elif len(input_item) == 9: - try: - if input_item not in ipppssoot_list: - # An ipppssoot of an individual file which is part of an association cannot be - # retrieved from MAST - log.info("Collect data: {} Suffix: {}".format(input_item, suffix_to_retrieve)) - for filetype in suffix_to_retrieve: - retrieve_list += aqutils.retrieve_observation(input_item, suffix=filetype, - product_type='pipeline') - log.info("Collected data: {}".format(retrieve_list)) - - # If the retrieved list is not empty, add filename(s) to the total_input_list. - # Also, update the ipppssoot_list so we do not try to download the data again. Need - # to do this since retrieve_list can be empty because (1) data cannot be acquired (error) - # or (2) data is already on disk (ok). - if retrieve_list: - total_input_list += retrieve_list - ipppssoot_list.append(input_item) - else: - log.error('File {} cannot be retrieved from MAST.'.format(input_item)) - return(empty_list) - except Exception: - log.info("Exception in check_disk_get_data") - exc_type, exc_value, exc_tb = sys.exc_info() - traceback.print_exception(exc_type, exc_value, exc_tb, file=sys.stdout) + # May need to add code here to accommodate loss of astroquery_utils MDD # Only the retrieve_list files via astroquery have been put into the total_input_list thus far. # Now check candidate_list to detect or acquire the requested files from MAST via astroquery. diff --git a/tests/hap/test_apriori.py b/tests/hap/test_apriori.py index e9779e215..afdf7517a 100644 --- a/tests/hap/test_apriori.py +++ b/tests/hap/test_apriori.py @@ -9,6 +9,7 @@ from drizzlepac.haputils import testutils from ..resources import BaseACS, BaseWFC3 +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) def compare_apriori(dataset): diff --git a/tests/hap/test_pipeline.py b/tests/hap/test_pipeline.py index 33e4b4d04..8c75bfea9 100644 --- a/tests/hap/test_pipeline.py +++ b/tests/hap/test_pipeline.py @@ -9,9 +9,9 @@ from ci_watson.artifactory_helpers import get_bigdata from ci_watson.hst_helpers import download_crds, ref_from_image -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runastrodriz from astropy.io import fits +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) class BasePipeline: @@ -119,16 +119,18 @@ class BaseWFC3Pipeline(BasePipeline): class TestSingleton(BaseWFC3Pipeline): @pytest.mark.parametrize( - 'dataset_names', ['iaaua1n4q'] + 'dataset_names', ['iaaua1n4q_raw.fits', 'iaaua1n4q_flt.fits', 'iaaua1n4q_flc.fits'] ) def test_astrometric_singleton(self, dataset_names): """ Tests pipeline-style processing of a singleton exposure using runastrodriz. """ # Get sample data through astroquery + """ flcfile = aqutils.retrieve_observation(dataset_names, suffix=['FLC'])[0] fltfile = aqutils.retrieve_observation(dataset_names, suffix=['FLT'])[0] rawfile = aqutils.retrieve_observation(dataset_names, suffix=['RAW'])[0] + """ # Retrieve reference files for these as well self.get_input_file('', fltfile, docopy=False) diff --git a/tests/hap/test_randomlist.py b/tests/hap/test_randomlist.py index c04b6ed7e..4b97abdc7 100644 --- a/tests/hap/test_randomlist.py +++ b/tests/hap/test_randomlist.py @@ -13,6 +13,7 @@ from stsci.tools import logutil +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) log = logutil.create_logger('test_randomlist', level=logutil.logging.INFO, stream=sys.stdout) diff --git a/tests/hap/test_run_svmpoller.py b/tests/hap/test_run_svmpoller.py index 4743270ba..f9dc77910 100644 --- a/tests/hap/test_run_svmpoller.py +++ b/tests/hap/test_run_svmpoller.py @@ -14,8 +14,8 @@ from astropy.io import ascii from drizzlepac import runsinglehap from astropy.table import Table -from drizzlepac.haputils import astroquery_utils as aqutils +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) def pytest_generate_tests(metafunc): """Get the command line options.""" @@ -120,10 +120,12 @@ def test_run_svmpoller(tmpdir, dataset): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) diff --git a/tests/hap/test_svm_canary.py b/tests/hap/test_svm_canary.py index a646e9d9a..330360353 100644 --- a/tests/hap/test_svm_canary.py +++ b/tests/hap/test_svm_canary.py @@ -7,7 +7,6 @@ import pytest import numpy as np -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path @@ -23,6 +22,7 @@ * The POLLER_FILE exists in the tests/hap directory. """ +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM_GAIA" POLLER_FILE = "acs_hrc_sbc_input.out" @@ -68,10 +68,12 @@ def gather_data_for_processing(tmp_path_factory): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) diff --git a/tests/hap/test_svm_hrcsbc.py b/tests/hap/test_svm_hrcsbc.py index 13fc35f01..2517ef018 100644 --- a/tests/hap/test_svm_hrcsbc.py +++ b/tests/hap/test_svm_hrcsbc.py @@ -7,7 +7,6 @@ import pytest import numpy as np -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path @@ -23,6 +22,7 @@ * The POLLER_FILE exists in the tests/hap directory. """ +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM_GAIA" POLLER_FILE = "acs_hrc_sbc_input.out" @@ -76,10 +76,12 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) diff --git a/tests/hap/test_svm_ibqk07.py b/tests/hap/test_svm_ibqk07.py index 757153ccc..c42a86131 100644 --- a/tests/hap/test_svm_ibqk07.py +++ b/tests/hap/test_svm_ibqk07.py @@ -8,7 +8,6 @@ import numpy as np import pytest -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runsinglehap from astropy.io import fits, ascii from astropy.table import Table @@ -27,6 +26,7 @@ originating directory. """ +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) POLLER_FILE = "wfc3_bqk_07_input.out" WCS_SUB_NAME = "HSC30" @@ -88,10 +88,12 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) diff --git a/tests/hap/test_svm_ibyt50.py b/tests/hap/test_svm_ibyt50.py index 608249652..a61aaebd4 100644 --- a/tests/hap/test_svm_ibyt50.py +++ b/tests/hap/test_svm_ibyt50.py @@ -8,7 +8,6 @@ import numpy as np import pytest -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runsinglehap from astropy.io import fits, ascii from astropy.table import Table @@ -27,6 +26,7 @@ originating directory. """ +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) POLLER_FILE = "wfc3_byt_50_input.out" WCS_UVIS_SUB_NAME = "FIT_SVM_GAIA" @@ -81,10 +81,12 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) diff --git a/tests/hap/test_svm_j97e06.py b/tests/hap/test_svm_j97e06.py index 2c91aa4e9..519509c39 100644 --- a/tests/hap/test_svm_j97e06.py +++ b/tests/hap/test_svm_j97e06.py @@ -8,7 +8,6 @@ import numpy as np import pytest -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runsinglehap from astropy.io import fits, ascii from astropy.table import Table @@ -27,6 +26,7 @@ originating directory. """ +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) # Expectation values used directly or indirectly for the test assert statements WCS_SUB_NAME = "IDC_4BB1536OJ" @@ -91,10 +91,12 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) diff --git a/tests/hap/test_svm_je281u.py b/tests/hap/test_svm_je281u.py index 19b4effcb..74858c862 100644 --- a/tests/hap/test_svm_je281u.py +++ b/tests/hap/test_svm_je281u.py @@ -6,7 +6,6 @@ import os import pytest -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path @@ -24,6 +23,7 @@ originating directory. """ +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM_GAIA" POLLER_FILE = "acs_e28_1u_input.out" @@ -69,10 +69,12 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) diff --git a/tests/hap/test_svm_wfc3ir.py b/tests/hap/test_svm_wfc3ir.py index 77d1598b9..a663bc9ab 100644 --- a/tests/hap/test_svm_wfc3ir.py +++ b/tests/hap/test_svm_wfc3ir.py @@ -6,7 +6,6 @@ import pytest import numpy as np -from drizzlepac.haputils import astroquery_utils as aqutils from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path @@ -22,6 +21,7 @@ * The POLLER_FILE exists in the tests/hap directory. """ +pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM" POLLER_FILE = "wfc3_ir_ib6807_input.out" @@ -72,10 +72,12 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): # avoid downloading too many images which are not needed for processing. flcfiles = [] fltfiles = [] + """ if flc_flag: flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") if flt_flag: fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") + """ flcfiles.extend(fltfiles) From ad994dd09edf1f765ece7837df268fc27ace52da Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Wed, 28 Jun 2023 17:32:00 -0400 Subject: [PATCH 02/15] Modified tests to run using Artifactory or still Skip. --- tests/hap/template_svm_demo.py | 59 ++-------- tests/hap/test_align.py | 3 +- tests/hap/test_alignpipe_randomlist.py | 7 +- tests/hap/test_pipeline.py | 22 ++-- tests/hap/test_randomlist.py | 8 +- tests/hap/test_run_svmpoller.py | 58 ++------- tests/hap/test_svm_canary.py | 157 ------------------------- tests/hap/test_svm_hrcsbc.py | 53 +-------- tests/hap/test_svm_ibqk07.py | 51 +------- tests/hap/test_svm_ibyt50.py | 51 +------- tests/hap/test_svm_j97e06.py | 51 +------- tests/hap/test_svm_je281u.py | 51 +------- tests/hap/test_svm_wfc3ir.py | 52 +------- 13 files changed, 79 insertions(+), 544 deletions(-) delete mode 100644 tests/hap/test_svm_canary.py diff --git a/tests/hap/template_svm_demo.py b/tests/hap/template_svm_demo.py index dd48d5d07..48f336b11 100644 --- a/tests/hap/template_svm_demo.py +++ b/tests/hap/template_svm_demo.py @@ -9,23 +9,21 @@ from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path - -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) +from ci_watson.artifactory_helpers import get_bigdata """ template_svm_demo.py This test file can be executed in the following manner: - $ pytest -s --basetemp=/internal/hladata/yourUniqueDirectoryHere template_svm.py >& template_svm.log & - $ tail -f template_svm.log + $ pytest -s --basetemp=/internal/hladata/yourUniqueDirectoryHere template_svm_demo.py >& template_svm_demo.log & + $ tail -f template_svm_demo.log * Note: When running this test, the `--basetemp` directory should be set to a unique existing directory to avoid deleting previous test output. * The POLLER_FILE exists in the tests/hap directory. - * If running manually with `--basetemp`, the template_svm.log file will still be written to the + * If running manually with `--basetemp`, the template_svm_demo.log file will still be written to the originating directory. """ -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM_GAIA" POLLER_FILE = "acs_e28_1u_input.out" @@ -48,52 +46,11 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) os.chdir(curdir) - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - for fn in read_csv_for_filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process= set(read_csv_for_filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(read_csv_for_filenames).symmetric_difference(set(flcfiles)) - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") + # Get the data from Artifactory + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + for i in read_csv_for_filenames] + files_to_process = read_csv_for_filenames print("\ngather_data_for_processing. Gathered data: {}".format(files_to_process)) return files_to_process diff --git a/tests/hap/test_align.py b/tests/hap/test_align.py index 0d8381ecb..edfe6c4eb 100644 --- a/tests/hap/test_align.py +++ b/tests/hap/test_align.py @@ -8,8 +8,6 @@ from stsci.tools import logutil -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) - log = logutil.create_logger('test_align', level=logutil.logging.INFO, stream=sys.stdout) # Nominal acceptable RMS limit for a good solution (IMPROVE THIS) @@ -244,6 +242,7 @@ def test_align_fail_single_visit(self): assert 0.0 < total_rms <= RMS_LIMIT + pytest.skip("Skipping all tests using astroquery.", allow_module_level=True) def test_astroquery(self): """Verify that new astroquery interface will work""" diff --git a/tests/hap/test_alignpipe_randomlist.py b/tests/hap/test_alignpipe_randomlist.py index c3a29a2c2..0d1f008b8 100644 --- a/tests/hap/test_alignpipe_randomlist.py +++ b/tests/hap/test_alignpipe_randomlist.py @@ -19,7 +19,12 @@ from drizzlepac import runastrodriz -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) +# This file will NOT run as-is. Code has to be modified to accommodate properly +# files which will actually need to be processed. This test is probably obsolete and +# should be deprecated as it was used to find random datasets and try to see how +# successful SVM processing could be. The results were used for statistics to +# determine if HAP were a project that could succeed. +pytest.skip("Skipping all tests where files to be processed are not known in advance.", allow_module_level=True) log = logutil.create_logger('test_alignpipe_randomlist', level=logutil.logging.INFO, stream=sys.stdout) diff --git a/tests/hap/test_pipeline.py b/tests/hap/test_pipeline.py index 8c75bfea9..797f4e4eb 100644 --- a/tests/hap/test_pipeline.py +++ b/tests/hap/test_pipeline.py @@ -11,7 +11,6 @@ from drizzlepac import runastrodriz from astropy.io import fits -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) class BasePipeline: @@ -88,8 +87,8 @@ def get_input_file(self, *args, refsep='$', docopy=True): The associated CRDS reference files in ``refstr`` are also downloaded, if necessary. """ - # filename = self.get_data(*args, docopy=docopy) - filename = args[1] + filename = self.get_data(*args, docopy=docopy) + #filename = args[1] ref_files = ref_from_image(filename, ['IDCTAB', 'OFFTAB', 'NPOLFILE', 'D2IMFILE', 'DGEOFILE', 'MDRIZTAB']) print("Looking for REF_FILES: {}".format(ref_files)) @@ -118,22 +117,29 @@ class BaseWFC3Pipeline(BasePipeline): class TestSingleton(BaseWFC3Pipeline): - @pytest.mark.parametrize( - 'dataset_names', ['iaaua1n4q_raw.fits', 'iaaua1n4q_flt.fits', 'iaaua1n4q_flc.fits'] - ) + #@pytest.mark.parametrize( + # 'dataset_names', ['iaaua1n4q_raw.fits', 'iaaua1n4q_flt.fits', 'iaaua1n4q_flc.fits'] + #) - def test_astrometric_singleton(self, dataset_names): + def test_astrometric_singleton(self): + #def test_astrometric_singleton(self, dataset_names): """ Tests pipeline-style processing of a singleton exposure using runastrodriz. """ # Get sample data through astroquery + rawfile = [os.path.basename(self.get_input_file('drizzlepac', 'dev', 'wfc3', 'input', 'iaaua1n4q_raw.fits'))] + flcfile = [os.path.basename(self.get_input_file('drizzlepac', 'dev', 'wfc3', 'input', 'iaaua1n4q_flc.fits'))] + fltfile = [os.path.basename(self.get_input_file('drizzlepac', 'dev', 'wfc3', 'input', 'iaaua1n4q_flt.fits'))] """ + rawfile = [os.path.basename(self.get_data('drizzlepac', 'dev', 'wfc3', 'input', 'iaaua1n4q_raw.fits'))] + flcfile = [os.path.basename(self.get_data('drizzlepac', 'dev', 'wfc3', 'input', 'iaaua1n4q_flc.fits'))] + fltfile = [os.path.basename(self.get_data('drizzlepac', 'dev', 'wfc3', 'input', 'iaaua1n4q_flt.fits'))] flcfile = aqutils.retrieve_observation(dataset_names, suffix=['FLC'])[0] fltfile = aqutils.retrieve_observation(dataset_names, suffix=['FLT'])[0] rawfile = aqutils.retrieve_observation(dataset_names, suffix=['RAW'])[0] """ # Retrieve reference files for these as well - self.get_input_file('', fltfile, docopy=False) + #self.get_input_file('', fltfile, docopy=False) # Insure environment variables are set for full processing os.environ['ASTROMETRY_STEP_CONTROL'] = 'on' diff --git a/tests/hap/test_randomlist.py b/tests/hap/test_randomlist.py index 4b97abdc7..357556764 100644 --- a/tests/hap/test_randomlist.py +++ b/tests/hap/test_randomlist.py @@ -13,7 +13,13 @@ from stsci.tools import logutil -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) +# This file will NOT run as-is. Code has to be modified to accommodate properly +# files which will actually need to be processed. This test is probably obsolete and +# should be deprecated as it was used to find random datasets and try to see how +# successful SVM processing could be. The results were used for statistics to +# determine if HAP were a project that could succeed. +pytest.skip("Skipping all tests where files to be processed are not known in advance.", allow_module_level=True) + log = logutil.create_logger('test_randomlist', level=logutil.logging.INFO, stream=sys.stdout) diff --git a/tests/hap/test_run_svmpoller.py b/tests/hap/test_run_svmpoller.py index f9dc77910..5a19a6c3b 100644 --- a/tests/hap/test_run_svmpoller.py +++ b/tests/hap/test_run_svmpoller.py @@ -14,8 +14,13 @@ from astropy.io import ascii from drizzlepac import runsinglehap from astropy.table import Table +from ci_watson.artifactory_helpers import get_bigdata -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) +# This file will NOT run as-is. Code has to be modified to accommodate properly +# files which will actually need to be processed. In particular, the test_run_svmpoller() +# will have to be updated. It is not clear this test is useful any longer and perhaps +# it should be deprecated. +pytest.skip("Skipping all tests where files to be processed are not known in advance.", allow_module_level=True) def pytest_generate_tests(metafunc): """Get the command line options.""" @@ -97,52 +102,11 @@ def test_run_svmpoller(tmpdir, dataset): filenames = list(table[filename_column]) print("\nread_csv_for_filenames. Filesnames from poller: {}".format(filenames)) - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - for fn in filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process= set(filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(filenames).symmetric_difference(set(flcfiles)) - - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") + # Get the data from Artifactory + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + for i in filenames] + + files_to_process = filenames # Run the SVM processing path = os.path.join(os.path.dirname(__file__), dataset) diff --git a/tests/hap/test_svm_canary.py b/tests/hap/test_svm_canary.py deleted file mode 100644 index 330360353..000000000 --- a/tests/hap/test_svm_canary.py +++ /dev/null @@ -1,157 +0,0 @@ -""" This module tests full pipeline SVM processing as a demonstration template. - -""" -import datetime -import glob -import os -import pytest -import numpy as np - -from drizzlepac import runsinglehap -from astropy.io import fits, ascii -from pathlib import Path - -""" - test_svm_demo.py - - This test file can be executed in the following manner: - $ pytest -s --basetemp=/internal/hladata/yourUniqueDirectoryHere test_svm.py >& test_svm.log & - $ tail -f test_svm.log - * Note: When running this test, the `--basetemp` directory should be set to a unique - existing directory to avoid deleting previous test output. - * The POLLER_FILE exists in the tests/hap directory. - -""" -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) - -WCS_SUB_NAME = "FIT_SVM_GAIA" -POLLER_FILE = "acs_hrc_sbc_input.out" - -def read_csv_for_filenames(): - # Read the CSV poller file residing in the tests directory to extract the individual visit FLT/FLC filenames - path = os.path.join(os.path.dirname(__file__), POLLER_FILE) - table = ascii.read(path, format="no_header") - filename_column = table.colnames[0] - filenames = list(table[filename_column]) - print("\nread_csv_for_filenames. Filesnames from poller: {}".format(filenames)) - - return filenames - - -def gather_data_for_processing(tmp_path_factory): - # create working directory specified for the test - curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) - os.chdir(curdir) - - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - filenames = read_csv_for_filenames() - - for fn in filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process = set(filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(filenames).symmetric_difference(set(flcfiles)) - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") - - print("\ngather_data_for_processing. Gathered data: {}".format(files_to_process)) - - return list(files_to_process) - - -def gather_output_data(manifest_filename): - # Determine the filenames of all the output files from the manifest - print(f"\nManifest Filename: {manifest_filename}") - files = [] - with open(manifest_filename, 'r') as fout: - for line in fout.readlines(): - files.append(line.rstrip('\n')) - print("\ngather_output_data. Output data files: {}".format(files)) - - return files - - -def construct_manifest_filename(filenames): - # Construct the output manifest filename from input file keywords - inst = fits.getval(filenames[0], "INSTRUME", ext=0).lower() - root = fits.getval(filenames[0], "ROOTNAME", ext=0).lower() - tokens_tuple = (inst, root[1:4], root[4:6], "manifest.txt") - manifest_filename = "_".join(tokens_tuple) - print("\nconstruct_manifest_filename. Manifest filename: {}".format(manifest_filename)) - - return manifest_filename - - -def test_driver(tmp_path_factory): - # Act: Process the input data by executing runsinglehap - time consuming activity - - current_dt = datetime.datetime.now() - print(str(current_dt)) - - # Read the "poller file" and download the input files, as necessary - input_names = gather_data_for_processing(tmp_path_factory) - - # Construct the manifest filename for later - manifest_filename = construct_manifest_filename(input_names) - - # Run the SVM processing - path = os.path.join(os.path.dirname(__file__), POLLER_FILE) - try: - status = runsinglehap.perform(path, log_level="debug") - - output_files = gather_output_data(manifest_filename) - - # Check the output primary WCSNAME includes FIT_SVM_GAIA as part of the string value - tdp_files = [files for files in output_files if - files.lower().find("total") > -1 and files.lower().endswith(".fits")] - - for tdp in tdp_files: - wcsname = fits.getval(tdp, "WCSNAME", ext=1).upper() - print("\ntest_svm_wcs. WCSNAME: {} Output file: {}".format(wcsname, tdp)) - assert WCS_SUB_NAME in wcsname, f"WCSNAME is not as expected for file {tdp}." - - # Catch anything that happens and report it. This is meant to catch unexpected errors and - # generate sufficient output exception information so algorithmic problems can be addressed. - except Exception as except_details: - print(except_details) - pytest.fail("\nsvm_setup. Exception Visit: {}\n", path) - - current_dt = datetime.datetime.now() - print(str(current_dt)) diff --git a/tests/hap/test_svm_hrcsbc.py b/tests/hap/test_svm_hrcsbc.py index 2517ef018..2a5266e5e 100644 --- a/tests/hap/test_svm_hrcsbc.py +++ b/tests/hap/test_svm_hrcsbc.py @@ -10,9 +10,10 @@ from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path +from ci_watson.artifactory_helpers import get_bigdata """ - test_svm_demo.py + test_svm.py This test file can be executed in the following manner: $ pytest -s --basetemp=/internal/hladata/yourUniqueDirectoryHere test_svm.py >& test_svm.log & @@ -22,7 +23,6 @@ * The POLLER_FILE exists in the tests/hap directory. """ -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM_GAIA" POLLER_FILE = "acs_hrc_sbc_input.out" @@ -53,52 +53,11 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) os.chdir(curdir) - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - for fn in read_csv_for_filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process = set(read_csv_for_filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(read_csv_for_filenames).symmetric_difference(set(flcfiles)) - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") + # Get the data from Artifactory + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + for i in read_csv_for_filenames] + files_to_process = read_csv_for_filenames print("\ngather_data_for_processing. Gathered data: {}".format(files_to_process)) return files_to_process diff --git a/tests/hap/test_svm_ibqk07.py b/tests/hap/test_svm_ibqk07.py index c42a86131..97c86c2b7 100644 --- a/tests/hap/test_svm_ibqk07.py +++ b/tests/hap/test_svm_ibqk07.py @@ -12,6 +12,7 @@ from astropy.io import fits, ascii from astropy.table import Table from pathlib import Path +from ci_watson.artifactory_helpers import get_bigdata """ test_svm_ibqk07.py @@ -26,7 +27,6 @@ originating directory. """ -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) POLLER_FILE = "wfc3_bqk_07_input.out" WCS_SUB_NAME = "HSC30" @@ -65,52 +65,11 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) os.chdir(curdir) - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - for fn in read_csv_for_filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process= set(read_csv_for_filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(read_csv_for_filenames).symmetric_difference(set(flcfiles)) - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") + # Get the data from Artifactory + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + for i in read_csv_for_filenames] + files_to_process = read_csv_for_filenames print("\ngather_data_for_processing. Gathered data: {}".format(files_to_process)) return files_to_process diff --git a/tests/hap/test_svm_ibyt50.py b/tests/hap/test_svm_ibyt50.py index a61aaebd4..6fb0383a8 100644 --- a/tests/hap/test_svm_ibyt50.py +++ b/tests/hap/test_svm_ibyt50.py @@ -12,6 +12,7 @@ from astropy.io import fits, ascii from astropy.table import Table from pathlib import Path +from ci_watson.artifactory_helpers import get_bigdata """ test_svm_ibyt50.py @@ -26,7 +27,6 @@ originating directory. """ -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) POLLER_FILE = "wfc3_byt_50_input.out" WCS_UVIS_SUB_NAME = "FIT_SVM_GAIA" @@ -58,52 +58,11 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) os.chdir(curdir) - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - for fn in read_csv_for_filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process= set(read_csv_for_filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(read_csv_for_filenames).symmetric_difference(set(flcfiles)) - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") + # Get the data from Artifactory + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'wfc3', 'input', i)) + for i in read_csv_for_filenames] + files_to_process = read_csv_for_filenames print("\ngather_data_for_processing. Gathered data: {}".format(files_to_process)) return files_to_process diff --git a/tests/hap/test_svm_j97e06.py b/tests/hap/test_svm_j97e06.py index 519509c39..655387268 100644 --- a/tests/hap/test_svm_j97e06.py +++ b/tests/hap/test_svm_j97e06.py @@ -12,6 +12,7 @@ from astropy.io import fits, ascii from astropy.table import Table from pathlib import Path +from ci_watson.artifactory_helpers import get_bigdata """ test_svm_j97e06.py @@ -26,7 +27,6 @@ originating directory. """ -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) # Expectation values used directly or indirectly for the test assert statements WCS_SUB_NAME = "IDC_4BB1536OJ" @@ -68,52 +68,11 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) os.chdir(curdir) - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - for fn in read_csv_for_filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process= set(read_csv_for_filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(read_csv_for_filenames).symmetric_difference(set(flcfiles)) - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") + # Get the data from Artifactory + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + for i in read_csv_for_filenames] + files_to_process = read_csv_for_filenames print("\ngather_data_for_processing. Gathered data: {}".format(files_to_process)) return files_to_process diff --git a/tests/hap/test_svm_je281u.py b/tests/hap/test_svm_je281u.py index 74858c862..cd2d046a9 100644 --- a/tests/hap/test_svm_je281u.py +++ b/tests/hap/test_svm_je281u.py @@ -9,6 +9,7 @@ from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path +from ci_watson.artifactory_helpers import get_bigdata """ test_svm_je281u.py @@ -23,7 +24,6 @@ originating directory. """ -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM_GAIA" POLLER_FILE = "acs_e28_1u_input.out" @@ -46,52 +46,11 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) os.chdir(curdir) - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - for fn in read_csv_for_filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process= set(read_csv_for_filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(read_csv_for_filenames).symmetric_difference(set(flcfiles)) - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") + # Get the data from Artifactory + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + for i in read_csv_for_filenames] + files_to_process = read_csv_for_filenames print("\ngather_data_for_processing. Gathered data: {}".format(files_to_process)) return files_to_process diff --git a/tests/hap/test_svm_wfc3ir.py b/tests/hap/test_svm_wfc3ir.py index a663bc9ab..c51d294bc 100644 --- a/tests/hap/test_svm_wfc3ir.py +++ b/tests/hap/test_svm_wfc3ir.py @@ -9,6 +9,7 @@ from drizzlepac import runsinglehap from astropy.io import fits, ascii from pathlib import Path +from ci_watson.artifactory_helpers import get_bigdata """ test_svm_demo.py @@ -21,7 +22,6 @@ * The POLLER_FILE exists in the tests/hap directory. """ -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) WCS_SUB_NAME = "FIT_SVM" POLLER_FILE = "wfc3_ir_ib6807_input.out" @@ -49,51 +49,11 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): curdir = tmp_path_factory.mktemp(os.path.basename(__file__)) os.chdir(curdir) - # Establish FLC/FLT lists and obtain the requested data - flc_flag = "" - flt_flag = "" - # In order to obtain individual FLC or FLT images from MAST (if the files are not reside on disk) which - # may be part of an ASN, use only IPPPSS with a wildcard. The unwanted images have to be removed - # after-the-fact. - for fn in read_csv_for_filenames: - if fn.lower().endswith("flc.fits") and flc_flag == "": - flc_flag = fn[0:6] + "*" - elif fn.lower().endswith("flt.fits") and flt_flag == "": - flt_flag = fn[0:6] + "*" - - # If both flags have been set, then break out the loop early. It may be - # that all files have to be checked which means the for loop continues - # until its natural completion. - if flc_flag and flt_flag: - break - - # Get test data through astroquery - only retrieve the pipeline processed FLC and/or FLT files - # (e.g., j*_flc.fits) as necessary. The logic here and the above for loop is an attempt to - # avoid downloading too many images which are not needed for processing. - flcfiles = [] - fltfiles = [] - """ - if flc_flag: - flcfiles = aqutils.retrieve_observation(flc_flag, suffix=["FLC"], product_type="pipeline") - if flt_flag: - fltfiles = aqutils.retrieve_observation(flt_flag, suffix=["FLT"], product_type="pipeline") - """ - - flcfiles.extend(fltfiles) - - # Keep only the files which exist in BOTH lists for processing - files_to_process = set(read_csv_for_filenames).intersection(set(flcfiles)) - - # Identify unwanted files from the download list and remove from disk - files_to_remove = set(read_csv_for_filenames).symmetric_difference(set(flcfiles)) - try: - for ftr in files_to_remove: - os.remove(ftr) - except Exception as x_cept: - print("") - print("Exception encountered: {}.".format(x_cept)) - print("The file {} could not be deleted from disk. ".format(ftr)) - print("Remove files which are not used for processing from disk manually.") + # Get the data from Artifactory + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + for i in read_csv_for_filenames] + + files_to_process = read_csv_for_filenames print("\ngather_data_for_processing. Gathered data: {}".format(files_to_process)) From 079639690dc351b7827bc39140a86e8700cc2ed0 Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Thu, 29 Jun 2023 08:00:38 -0400 Subject: [PATCH 03/15] Fix typo acs->wfc3. --- tests/hap/test_svm_wfc3ir.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/hap/test_svm_wfc3ir.py b/tests/hap/test_svm_wfc3ir.py index c51d294bc..0d4d8c3d9 100644 --- a/tests/hap/test_svm_wfc3ir.py +++ b/tests/hap/test_svm_wfc3ir.py @@ -50,7 +50,7 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): os.chdir(curdir) # Get the data from Artifactory - inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'wfc3', 'input', i)) for i in read_csv_for_filenames] files_to_process = read_csv_for_filenames From a5853cf68c72053f8dbbf454aa0e16d39c376e55 Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Thu, 29 Jun 2023 08:06:37 -0400 Subject: [PATCH 04/15] Yet another typo acs->wfc3. --- tests/hap/test_svm_ibqk07.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/hap/test_svm_ibqk07.py b/tests/hap/test_svm_ibqk07.py index 97c86c2b7..7cdd75544 100644 --- a/tests/hap/test_svm_ibqk07.py +++ b/tests/hap/test_svm_ibqk07.py @@ -66,7 +66,7 @@ def gather_data_for_processing(read_csv_for_filenames, tmp_path_factory): os.chdir(curdir) # Get the data from Artifactory - inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'acs', 'input', i)) + inputs = [os.path.basename(get_bigdata('drizzlepac', 'dev', 'wfc3', 'input', i)) for i in read_csv_for_filenames] files_to_process = read_csv_for_filenames From 7e255ace8785f0916bdb127e01fdece468ba8a70 Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Thu, 29 Jun 2023 08:58:15 -0400 Subject: [PATCH 05/15] Skip test until properly handling replacement of astroquery. --- tests/hap/test_pipeline.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/hap/test_pipeline.py b/tests/hap/test_pipeline.py index 797f4e4eb..885eec4d6 100644 --- a/tests/hap/test_pipeline.py +++ b/tests/hap/test_pipeline.py @@ -12,6 +12,10 @@ from drizzlepac import runastrodriz from astropy.io import fits +# This file will NOT run as-is. Code has to be modified to accommodate properly +# files which will actually need to be processed. +pytest.skip("Skipping all tests not updated to eliminate the use of deprecated software.", allow_module_level=True) + class BasePipeline: prevdir = os.getcwd() From 7cf2293cf30c686d910230f36fc7f6381b107edd Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Thu, 29 Jun 2023 13:51:19 -0400 Subject: [PATCH 06/15] Added CHANGLOG message checked align.py --- CHANGELOG.rst | 4 ++++ drizzlepac/align.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index d0907aa56..ca08e74c5 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,6 +21,10 @@ number of the code change for that issue. These PRs can be viewed at: 3.6.1rc0 (unreleased) ===================== +- Removed all use of the AstroQuery interface to MAST from this package as + it seems to be causing unwanted interactions with PyTest and Artifactory. + XXX + - Fixed an incompatibility in the ``minmed`` code for cosmic ray rejection with the ``numpy`` version ``>=1.25``. [#1573] diff --git a/drizzlepac/align.py b/drizzlepac/align.py index 1059bc1c0..583735fb9 100644 --- a/drizzlepac/align.py +++ b/drizzlepac/align.py @@ -46,7 +46,7 @@ def _init_logger(): def check_and_get_data(input_list: list, **pars: object) -> list: - """Verify that all specified files are present. If not, retrieve them from MAST. + """Verify that all specified files are present. If not, warn the user. This function relies on the `AstroQuery interface to MAST `_ From 4cb5b18187db0e3903655d77510e0607c379e18e Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Wed, 5 Jul 2023 11:58:56 -0400 Subject: [PATCH 07/15] Removed code in align.py which wanted to download images from the archive, and remove the "skip" from the remaining PyTests which "know" the data that needs to be processed. This data had to be loaded onto Artifactory to support the tests. --- drizzlepac/align.py | 40 +++++++++++++++++++-------------------- tests/hap/test_align.py | 2 +- tests/hap/test_apriori.py | 6 ++---- 3 files changed, 22 insertions(+), 26 deletions(-) diff --git a/drizzlepac/align.py b/drizzlepac/align.py index 583735fb9..176f4df30 100644 --- a/drizzlepac/align.py +++ b/drizzlepac/align.py @@ -48,16 +48,13 @@ def _init_logger(): def check_and_get_data(input_list: list, **pars: object) -> list: """Verify that all specified files are present. If not, warn the user. - This function relies on the `AstroQuery interface to MAST + This function formerly relied on the `AstroQuery interface to MAST `_ - to retrieve the exposures from the ``input_list`` that are not found in the current directory. This - function calls the simplified interface in - :func:`haputils/astroquery_utils/retrieve_observation` - to get the files through AstroQuery. - - ### Need to eliminate the use of astroquery. As such, the files to be processed MUST be - ### available on disk for processing. The user is responsible for making the data available. - ### Add code to indicate files need to be on disk. MDD + to retrieve the exposures from the ``input_list`` that are not found in + the current directory. However, Astroquery was found to be interferring + in some manner with the PyTests and Artifactory, so Astroquery functionality + was removed from Drizzlepac. Files are now expected to be available on disk + for processing, or an error is generated. Parameters ---------- @@ -71,22 +68,15 @@ def check_and_get_data(input_list: list, **pars: object) -> list: Returns ======= total_input_list: list - list of full filenames - ipppssoot names are no longer allowed. Data must be on disk. - - See Also - ======== - haputils/astroquery_utils/retrieve_observation + list of full filenames """ empty_list = [] - retrieve_list = [] # Actual files retrieved via astroquery and resident on disk candidate_list = [] # File names gathered from *_asn.fits file - ipppssoot_list = [] # ipppssoot names used to avoid duplicate downloads total_input_list = [] # Output full filename list of data on disk # Loop over the input_list to determine if the item in the input_list is a full association file - # (*_asn.fits), a full individual image file (aka singleton, *_flt.fits), or a root name specification - # (association or singleton, ipppssoot). + # (*_asn.fits), or a full individual image file (aka singleton, *_flt.fits). for input_item in input_list: log.info('Input item: {}'.format(input_item)) indx = input_item.find('_') @@ -111,9 +101,14 @@ def check_and_get_data(input_list: list, **pars: object) -> list: '"flc.fits", or "flt.fits".'.format( suffix)) return (empty_list) - - # Only the retrieve_list files via astroquery have been put into the total_input_list thus far. - # Now check candidate_list to detect or acquire the requested files from MAST via astroquery. + else: + log.error( + 'Inappropriate file specification. Looking for "asn.fits", ' + '"flc.fits", or "flt.fits". Input files must be resident in' + 'the working directory.') + return(empty_list) + + # Now check candidate_list is actually on disk. for file in candidate_list: # If the file is found on disk, add it to the total_input_list and continue if glob.glob(file): @@ -285,6 +280,9 @@ def perform_align(input_list, catalog_list, num_sources, archive=False, clobber= zero_dt = starting_dt = datetime.datetime.now() log.info(str(starting_dt)) imglist = check_and_get_data(input_list, archive=archive, clobber=clobber, product_type=product_type) + if not imglist: + log.error("Data not found on disk. Retrieve data and try again.") + return None log.info("SUCCESS") log.info(f"Processing: {imglist}") diff --git a/tests/hap/test_align.py b/tests/hap/test_align.py index edfe6c4eb..67fc284c5 100644 --- a/tests/hap/test_align.py +++ b/tests/hap/test_align.py @@ -242,7 +242,7 @@ def test_align_fail_single_visit(self): assert 0.0 < total_rms <= RMS_LIMIT - pytest.skip("Skipping all tests using astroquery.", allow_module_level=True) + pytest.skip("Skipping the test to validate the use of the astroquery functionality.", allow_module_level=True) def test_astroquery(self): """Verify that new astroquery interface will work""" diff --git a/tests/hap/test_apriori.py b/tests/hap/test_apriori.py index afdf7517a..0cd95cfa4 100644 --- a/tests/hap/test_apriori.py +++ b/tests/hap/test_apriori.py @@ -9,8 +9,6 @@ from drizzlepac.haputils import testutils from ..resources import BaseACS, BaseWFC3 -pytest.skip("Skipping all tests using astroquery as an experiment", allow_module_level=True) - def compare_apriori(dataset): """This test will perform fits between ALL a priori solutions and GAIA. @@ -116,7 +114,7 @@ class TestAcsApriori(BaseACS): """ @pytest.mark.bigdata - @pytest.mark.parametrize('dataset', ['jb1601020', 'J9I408010']) + @pytest.mark.parametrize('dataset', ['jb1601020_asn.fits', 'J9I408010_asn.fits']) def test_apriori(self, dataset): compare_apriori(dataset) @@ -135,7 +133,7 @@ class TestWFC3Apriori(BaseWFC3): @pytest.mark.bigdata @pytest.mark.parametrize( - 'dataset', ['ic0g0l010', 'icnw34040'] + 'dataset', ['ic0g0l010_asn.fits', 'icnw34040_asn.fits'] ) def test_apriori(self, dataset): compare_apriori(dataset) From b741f2375be4b6b925d06a68ced620f0c554be8b Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Wed, 5 Jul 2023 14:45:08 -0400 Subject: [PATCH 08/15] Fixed a test missing the ci_watson helper functions --- tests/hap/test_apriori.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/hap/test_apriori.py b/tests/hap/test_apriori.py index 0cd95cfa4..60a26ebe2 100644 --- a/tests/hap/test_apriori.py +++ b/tests/hap/test_apriori.py @@ -7,6 +7,8 @@ import numpy as np from drizzlepac.haputils import testutils +from ci_watson.artifactory_helpers import get_bigdata +from ci_watson.hst_helpers import raw_from_asn from ..resources import BaseACS, BaseWFC3 @@ -26,6 +28,14 @@ def compare_apriori(dataset): in which case, that WCS is ignored (silently). """ + # Get the data from Artifactory + dataset = dataset.lower() + instrument = "acs" if dataset[0] == "j" else "wfc3" + get_bigdata('drizzlepac', 'dev', instrument, 'input', dataset) + files = raw_from_asn(dataset, suffix='_flt.fits') + for input_file in files: + get_bigdata('drizzlepac', 'dev', instrument, 'input', input_file) + # Perform alignment of all WCS solutions with GAIA results_dict = testutils.compare_wcs_alignment(dataset) limit = 0.001 @@ -114,11 +124,13 @@ class TestAcsApriori(BaseACS): """ @pytest.mark.bigdata - @pytest.mark.parametrize('dataset', ['jb1601020_asn.fits', 'J9I408010_asn.fits']) + #@pytest.mark.parametrize('dataset', ['jb1601020_asn.fits', 'j9i408010_asn.fits']) + @pytest.mark.parametrize('dataset', ['j9i408010_asn.fits']) def test_apriori(self, dataset): compare_apriori(dataset) +pytest.mark.skip("Already tested") class TestWFC3Apriori(BaseWFC3): """ Tests which validate whether mosaics can be aligned to an astrometric standard, evaluate the quality of the fit, and generate a new WCS. From 6bc60606a92feab97a9eb1847238d57660bcfd1d Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Wed, 5 Jul 2023 14:46:39 -0400 Subject: [PATCH 09/15] Oops. Removed temporary skip and commented out code from test_apriori.py --- tests/hap/test_apriori.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/hap/test_apriori.py b/tests/hap/test_apriori.py index 60a26ebe2..30caaccbf 100644 --- a/tests/hap/test_apriori.py +++ b/tests/hap/test_apriori.py @@ -124,13 +124,11 @@ class TestAcsApriori(BaseACS): """ @pytest.mark.bigdata - #@pytest.mark.parametrize('dataset', ['jb1601020_asn.fits', 'j9i408010_asn.fits']) - @pytest.mark.parametrize('dataset', ['j9i408010_asn.fits']) + @pytest.mark.parametrize('dataset', ['jb1601020_asn.fits', 'j9i408010_asn.fits']) def test_apriori(self, dataset): compare_apriori(dataset) -pytest.mark.skip("Already tested") class TestWFC3Apriori(BaseWFC3): """ Tests which validate whether mosaics can be aligned to an astrometric standard, evaluate the quality of the fit, and generate a new WCS. From 95ebd56235898d9978b788da3bc22c7da2d48011 Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Wed, 5 Jul 2023 16:38:57 -0400 Subject: [PATCH 10/15] Impose an upper limit on the Numpy version until issues are resolved. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 176fa105e..be0350001 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ runmultihap = 'drizzlepac.runmultihap:main' requires = ["setuptools>=61", "setuptools_scm[toml]>=3.4", "wheel", - "numpy>=1.18", + "numpy>=1.18, <=1.24", "astropy>=5.0.4", "markupsafe<=2.0.1"] build-backend = "setuptools.build_meta" From bc9f5a21a844acc4e7e083ce223f7fe90d2c56b3 Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Thu, 6 Jul 2023 08:42:23 -0400 Subject: [PATCH 11/15] Removed change to pyproject.toml which should be its own PR --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index be0350001..176fa105e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,7 +85,7 @@ runmultihap = 'drizzlepac.runmultihap:main' requires = ["setuptools>=61", "setuptools_scm[toml]>=3.4", "wheel", - "numpy>=1.18, <=1.24", + "numpy>=1.18", "astropy>=5.0.4", "markupsafe<=2.0.1"] build-backend = "setuptools.build_meta" From e3f38b21e263af93485071ede628de278687071a Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Fri, 7 Jul 2023 11:31:49 -0400 Subject: [PATCH 12/15] Re-enable the b3 dev test in JenkinsfileRT --- JenkinsfileRT | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/JenkinsfileRT b/JenkinsfileRT index 7e5d82bac..244c20ff2 100644 --- a/JenkinsfileRT +++ b/JenkinsfileRT @@ -55,4 +55,4 @@ bc4.conda_packages = ['python=3.11'] // Spawn a host (or workdir) for each combination and run in parallel. // Also apply the job configuration defined in `jobconfig` above. //utils.run([bc1, bc3, bc4, jobconfig]) -utils.run([bc1, jobconfig]) +utils.run([bc1, b3, jobconfig]) From 1b49746ceab44493dc470a947c6ab79a3a2b8904 Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Fri, 7 Jul 2023 13:40:14 -0400 Subject: [PATCH 13/15] Test: Remove bc3 from building and force Pytests to be 'not bigdata' --- JenkinsfileRT | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/JenkinsfileRT b/JenkinsfileRT index 244c20ff2..4e6522841 100644 --- a/JenkinsfileRT +++ b/JenkinsfileRT @@ -25,7 +25,8 @@ bc1.conda_packages = ['python=3.9'] bc1.build_cmds = ["pip install numpy astropy codecov pytest-cov ci-watson==0.5", "pip install --upgrade -e '.[test]'", "pip freeze"] -bc1.test_cmds = ["pytest --cov=./ --basetemp=tests_output --bigdata", +//bc1.test_cmds = ["pytest --cov=./ --basetemp=tests_output --bigdata", +bc1.test_cmds = ["pytest --basetemp=/tests_output -s -v -m "not bigdata", "codecov"] bc1.test_configs = [data_config] @@ -55,4 +56,4 @@ bc4.conda_packages = ['python=3.11'] // Spawn a host (or workdir) for each combination and run in parallel. // Also apply the job configuration defined in `jobconfig` above. //utils.run([bc1, bc3, bc4, jobconfig]) -utils.run([bc1, b3, jobconfig]) +utils.run([bc1, jobconfig]) From dc7689776a487cab7126866a497b75db3f477684 Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Fri, 7 Jul 2023 16:02:57 -0400 Subject: [PATCH 14/15] Removed the 'not bigdata' option for the bc1 in JenkinsfileRT --- JenkinsfileRT | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/JenkinsfileRT b/JenkinsfileRT index 4e6522841..7e5d82bac 100644 --- a/JenkinsfileRT +++ b/JenkinsfileRT @@ -25,8 +25,7 @@ bc1.conda_packages = ['python=3.9'] bc1.build_cmds = ["pip install numpy astropy codecov pytest-cov ci-watson==0.5", "pip install --upgrade -e '.[test]'", "pip freeze"] -//bc1.test_cmds = ["pytest --cov=./ --basetemp=tests_output --bigdata", -bc1.test_cmds = ["pytest --basetemp=/tests_output -s -v -m "not bigdata", +bc1.test_cmds = ["pytest --cov=./ --basetemp=tests_output --bigdata", "codecov"] bc1.test_configs = [data_config] From c0f726f61cfafb54ff6c2e20e8b0a1f1e64d3eaf Mon Sep 17 00:00:00 2001 From: Michele De La Pena Date: Mon, 10 Jul 2023 17:06:19 -0400 Subject: [PATCH 15/15] Comment out files for debug, remove deprecated code, add --slow to comments --- tests/hap/test_align.py | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/tests/hap/test_align.py b/tests/hap/test_align.py index 67fc284c5..eaf52f7ac 100644 --- a/tests/hap/test_align.py +++ b/tests/hap/test_align.py @@ -51,7 +51,7 @@ class TestAlignMosaic(BaseHLATest): is stored. The full path is TEST_BIGDATA plus the path components provided in the get_bigdata() invocation. This test file can be executed in the following manner: - $ pytest -s --bigdata test_align.py >& test_align_output.txt & + $ pytest -s --bigdata --slow test_align.py >& test_align_output.txt & $ tail -f test_align_output.txt """ @@ -126,6 +126,8 @@ def test_align_47tuc(self): assert 0.0 < total_rms <= RMS_LIMIT + """ + Test for j8ura1j* needs to be debugged. @pytest.mark.parametrize("input_filenames", [['j8ura1j1q_flt.fits', 'j8ura1j2q_flt.fits', 'j8ura1j4q_flt.fits', 'j8ura1j6q_flt.fits', 'j8ura1j7q_flt.fits', 'j8ura1j8q_flt.fits', @@ -148,6 +150,21 @@ def test_align_47tuc(self): 'ibnh02c7q_flc.fits', 'ibnh02c5q_flc.fits', 'ibnh02cpq_flc.fits', 'ibnh02c9q_flc.fits', 'ibnh02bfq_flc.fits', 'ibnh02beq_flc.fits']]) + """ + @pytest.mark.parametrize("input_filenames", [ ['j92c01b4q_flc.fits', 'j92c01b5q_flc.fits', + 'j92c01b7q_flc.fits', 'j92c01b9q_flc.fits'], + ['jbqf02gzq_flc.fits', 'jbqf02h5q_flc.fits', + 'jbqf02h7q_flc.fits', 'jbqf02hdq_flc.fits', + 'jbqf02hjq_flc.fits', 'jbqf02hoq_flc.fits', + 'jbqf02hqq_flc.fits', 'jbqf02hxq_flc.fits', + 'jbqf02i3q_flc.fits', 'jbqf02i8q_flc.fits', + 'jbqf02iaq_flc.fits'], + ['ib2u12kaq_flt.fits', 'ib2u12keq_flt.fits', + 'ib2u12kiq_flt.fits', 'ib2u12klq_flt.fits'], + ['ibnh02coq_flc.fits', 'ibnh02cmq_flc.fits', + 'ibnh02c7q_flc.fits', 'ibnh02c5q_flc.fits', + 'ibnh02cpq_flc.fits', 'ibnh02c9q_flc.fits', + 'ibnh02bfq_flc.fits', 'ibnh02beq_flc.fits']]) @pytest.mark.slow def test_align_single_visits(self, input_filenames): """ Verify whether single-visit exposures can be aligned to an astrometric standard. @@ -242,24 +259,3 @@ def test_align_fail_single_visit(self): assert 0.0 < total_rms <= RMS_LIMIT - pytest.skip("Skipping the test to validate the use of the astroquery functionality.", allow_module_level=True) - def test_astroquery(self): - """Verify that new astroquery interface will work""" - - total_rms = 0.01 - - dataset_table = alignimages.perform_align(['IB6V06060'], - catalog_list=['GAIADR2', 'GAIADR1'], - num_sources=250, - archive=False, clobber=True, - debug=False, update_hdr_wcs=False, - print_fit_parameters=True, print_git_info=False, - product_type='pipeline', - output=False) - - # Examine the output table to extract the RMS for the entire fit and the compromised - # information - if dataset_table: - total_rms = dataset_table.filtered_table['total_rms'][0] - - assert 0.0 < total_rms <= RMS_LIMIT