spacetelescope · alphasentaurii · Apr 6, 2024 · Apr 5, 2024 · Apr 5, 2024 · Apr 5, 2024
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -4,6 +4,8 @@
 new features
 ------------
 
+- `builder.trained_networks` jwst_cal.zip includes updated (v2) `img3_reg` and new `spec3_reg` predictive models for image and spectroscopic data [#58]
+
 - `preprocessor.ingest.JwstCalIngest` class and cmdline script for automated training data ingest [#57]
 
 - `extractor.radio.JwstCalRadio` subclass for scraping datasets from MAST using ASN metadata [#51]
@@ -13,6 +15,11 @@ new features
 enhancements
 ------------
 
+- `skopes.jwst.cal.predict` generates predictions for spectrosopic datasets in addition to image data. This update also allows further customization of user arguments: [#58]
+    - `obs` to specify selection of a program ID + observation number
+    - `input_path` accepts either a directory (default) or a filename. If filename, the script will try to find any input exposures that belong to the same program and observation number as that file.
+    - `sfx` attribute is now customizable on instantiation of the class object (default is '_uncal.fits')
+
 - `architect.builder.Builder.save_model` uses preferred keras archive format by default [#50]
 
 - `preprocessor.transform.SkyTransformer` set offsets to 0 for gs/targ fiducial NaN values; custom filename for tx_file [#54]
@@ -31,6 +38,7 @@ bug fixes
 - `preprocessor.encode.PairEncoder.handle_unknowns` create single new encoding value per unidentified variable [#53]
 
 
+
 1.0.1 (2024-04-03)
 ==================
 

diff --git a/conftest.py b/conftest.py
@@ -7,7 +7,7 @@
 from spacekit.analyzer.scan import HstSvmScanner, HstCalScanner, import_dataset
 from spacekit.extractor.load import load_datasets, extract_file
 from spacekit.skopes.jwst.cal.config import KEYPAIR_DATA
-
+from spacekit.preprocessor.scrub import JwstCalScrubber
 
 TESTED_VERSIONS = {}
 
@@ -52,7 +52,7 @@ def __init__(self, env):
         self.kwargs = {
             "svm": dict(index_col="index"), 
             "hstcal": dict(index_col="ipst"),
-            "jwstcal": dict(index_col="img_name")
+            "jwstcal": dict(index_col="Dataset")
         }[env]
 
         self.decoder = {
@@ -97,7 +97,7 @@ def __init__(self, env):
         self.rename_cols = {
             "svm": "_scl",
             "hstcal": ["x_files", "x_size"],
-            "jwstcal": "_scl"
+            "jwstcal": None
         }[env]
 
         self.enc_cols = {
@@ -125,7 +125,7 @@ def __init__(self, env):
         self.tx_file = {
             "svm": "tests/data/svm/tx_data.json",
             "hstcal": "tests/data/hstcal/tx_data.json",
-            "jwstcal": "tests/data/jwstcal/tx_data.json"
+            "jwstcal": "tests/data/jwstcal/tx_data-{}.json"
         }[env]
 
         self.visits = {
@@ -139,25 +139,8 @@ def __init__(self, env):
 
 
 def pytest_addoption(parser):
-    # parser.addoption("--env", action="store", default="hstcal", help="Environment to run tests against")
     parser.addoption("--env", action="store", default=None, help="Environment to run tests against")
 
-# def pytest_configure(config):
-#     config.addinivalue_line("markers", "skope_svm: only run in svm skope")
-#     config.addinivalue_line("markers", "skope_cal: only run in cal skope")
-
-# def pytest_collection_modifyitems(config, items, skope):
-#     if skope.env == "hstcal"
-    # env_param = config.getoption("--env")
-    # if env_param:
-    #     skope_param = pytest.mark.parametrize("skope", [(env_param)], indirect=True)
-    # else:
-    #     skope_param = pytest.mark.parametrize("skope", [("hstcal", "svm")], indirect=True)
-    #     # skip_param = pytest.mark.skipif(reason="skip params based on --env")
-    # for item in items:
-    #     if "skopes" in item.keywords:
-    #         item.add_marker(skope_param)
-
 
 @fixture(scope="session")
 def env(request):
@@ -348,3 +331,79 @@ def hst_cal_predict_visits():
 @fixture(scope="function")
 def jwstcal_input_path():
     return "tests/data/jwstcal/predict/inputs"
+
+
+@fixture(scope="module")
+def jwstcal_scrub_filepath():
+    return "tests/data/jwstcal/scrub/{}-inputs.csv"
+
+
+@fixture(scope="module")
+def jwst_cal_img_data(jwstcal_scrub_filepath):
+    data = pd.read_csv(jwstcal_scrub_filepath.format('img'), index_col="Dataset")
+    data['PROGRAM'] = data['PROGRAM'].apply(lambda x: '{:0>5}'.format(x))
+    data['OBSERVTN'] = data['OBSERVTN'].apply(lambda x: '{:0>3}'.format(x))
+    return data
+
+
+@fixture(scope="function")
+def jwst_cal_wfsc_data(jwstcal_scrub_filepath):
+    data = pd.read_csv(jwstcal_scrub_filepath.format('wfsc'), index_col="Dataset")
+    data['PROGRAM'] = data['PROGRAM'].apply(lambda x: '{:0>5}'.format(x))
+    data['OBSERVTN'] = data['OBSERVTN'].apply(lambda x: '{:0>3}'.format(x))
+    return data
+
+
+@fixture(scope="module")
+def jwst_cal_spec_data(jwstcal_scrub_filepath):
+    data = pd.read_csv(jwstcal_scrub_filepath.format('spec'), index_col="Dataset")
+    data['PROGRAM'] = data['PROGRAM'].apply(lambda x: '{:0>5}'.format(x))
+    data['OBSERVTN'] = data['OBSERVTN'].apply(lambda x: '{:0>3}'.format(x))
+    return data
+
+
+@fixture(scope="module")
+def jwst_cal_tac_data(jwstcal_scrub_filepath):
+    data = pd.read_csv(jwstcal_scrub_filepath.format('tac'), index_col="Dataset")
+    data['PROGRAM'] = data['PROGRAM'].apply(lambda x: '{:0>5}'.format(x))
+    data['OBSERVTN'] = data['OBSERVTN'].apply(lambda x: '{:0>3}'.format(x))
+    return data
+
+
+@fixture(scope="module")
+def jwst_cal_img_df(jwst_cal_img_data):
+    scrubber = JwstCalScrubber(
+        "tmp",
+        data=jwst_cal_img_data, 
+        mode='df',
+        encoding_pairs=KEYPAIR_DATA
+    )
+    df = pd.DataFrame.from_dict(scrubber.imgpix, orient='index')
+    df['name'] = ['_'.join([n.split('_')[0]] + n.split('_')[2:]) for n in list(df.index)]
+    return df
+
+
+@fixture(scope="module")
+def jwst_cal_spec_df(jwst_cal_spec_data):
+    scrubber = JwstCalScrubber(
+        "tmp",
+        data=jwst_cal_spec_data, 
+        mode='df',
+        encoding_pairs=KEYPAIR_DATA
+    )
+    df = pd.DataFrame.from_dict(scrubber.specpix, orient='index')
+    df['name'] = ['_'.join([n.split('_')[0]] + n.split('_')[2:]) for n in list(df.index)]
+    return df
+
+
+@fixture(scope="module")
+def jwst_cal_tac_df(jwst_cal_tac_data):
+    scrubber = JwstCalScrubber(
+        "tmp",
+        data=jwst_cal_tac_data, 
+        mode='df',
+        encoding_pairs=KEYPAIR_DATA
+    )
+    df = pd.DataFrame.from_dict(scrubber.tacpix, orient='index')
+    df['name'] = ['_'.join([n.split('_')[0]] + n.split('_')[2:]) for n in list(df.index)]
+    return df
diff --git a/previews/spec3_reg.png b/previews/spec3_reg.png
diff --git a/setup.cfg b/setup.cfg
@@ -38,7 +38,7 @@ install_requires =
     boto3
     numpy>=1.19
     pandas
-    scikit-learn
+    scikit-learn>=1.3.2
 
 [options.extras_require]
 test =

diff --git a/spacekit/builder/architect.py b/spacekit/builder/architect.py
@@ -340,14 +340,15 @@ def set_callbacks(self, patience=15):
 
     def save_keras_model(self, model_path):
         dpath = os.path.dirname(model_path)
+        os.makedirs(dpath, exist_ok=True)
         name = os.path.basename(model_path)
         if not name.endswith("keras"):
             name += ".keras"
         keras_model_path = os.path.join(dpath, name)
         self.model.save(keras_model_path)
         self.model_path = keras_model_path
 
-    def save_model(self, weights=True, output_path=".", keras_archive=True):
+    def save_model(self, weights=True, output_path=".", keras_archive=True, parent_dir=""):
         """The model architecture, and training configuration (including the optimizer, losses, and metrics)
         are stored in saved_model.pb. The weights are saved in the variables/ directory.
 
@@ -367,7 +368,7 @@ def save_model(self, weights=True, output_path=".", keras_archive=True):
         else:
             model_name = self.name
 
-        model_path = os.path.join(output_path, "models", model_name)
+        model_path = os.path.join(output_path, "models", parent_dir, model_name)
 
         if keras_archive is True:
             self.save_keras_model(model_path)

diff --git a/spacekit/builder/blueprints.py b/spacekit/builder/blueprints.py
@@ -16,6 +16,7 @@ def build_params(self):
             "hst_mem_reg": self.hst_mem_reg,
             "hst_wall_reg": self.hst_wall_reg,
             "jwst_img3_reg": self.jwst_img3_reg,
+            "jwst_spec3_reg": self.jwst_spec3_reg,
         }[self.architecture]
 
     def fit_params(self):
@@ -26,6 +27,7 @@ def fit_params(self):
             "hst_mem_reg": self.draft_hst_mem_reg,
             "hst_wall_reg": self.draft_hst_wall_reg,
             "jwst_img3_reg": self.draft_jwst_img3_reg,
+            "jwst_spec3_reg": self.draft_jwst_spec3_reg,
         }[self.architecture]
 
     def svm_mlp(self):
@@ -180,3 +182,30 @@ def draft_jwst_img3_reg(self):
             early_stopping=None,
             verbose=0,
         )
+
+    def jwst_spec3_reg(self):
+        return dict(
+            input_shape=18,
+            output_shape=1,
+            layers=[18, 36, 72, 144, 288, 144, 72, 36, 18],
+            activation="relu",
+            cost_function="linear",
+            lr_sched=True,
+            optimizer=Adam,
+            loss="mse",
+            metrics=[RMSE(name="rmse")],
+            input_name="jwst_cal_spec3",
+            output_name="spec3_regressor",
+            name="spec3_reg",
+            algorithm="linreg",
+        )
+
+    def draft_jwst_spec3_reg(self):
+        return dict(
+            batch_size=32,
+            epochs=2000,
+            lr=1e-4,
+            decay=[100000, 0.96],
+            early_stopping=None,
+            verbose=0,
+        )
diff --git a/spacekit/builder/trained_networks/hst_cal.zip b/spacekit/builder/trained_networks/hst_cal.zip
diff --git a/spacekit/builder/trained_networks/jwst_cal.zip b/spacekit/builder/trained_networks/jwst_cal.zip
diff --git a/spacekit/builder/trained_networks/svm_align.zip b/spacekit/builder/trained_networks/svm_align.zip
diff --git a/spacekit/datasets/meta.py b/spacekit/datasets/meta.py
@@ -1,5 +1,5 @@
 calcloud = {
-    "uri": "https://zenodo.org/record/8231215/files",
+    "uri": "https://zenodo.org/record/10895592/files",
     "data": {
         "2022-02-14": {
             "fname": "hst_cal_std_2022-02-14.zip?download=1",
@@ -32,15 +32,15 @@
     },
     "model": {
         "fname": "hst_cal.zip",
-        "hash": "370f9950c6f0f0412af039617eebea93",
+        "hash": "ffe7d00504ad5d6ebc79488abe612474",
         "desc": "hst calcloud resource prediction models",
         "key": "hst_cal",
-        "size": "2.1MB",
+        "size": "385.42kB",
     },
 }
 
 svm = {
-    "uri": "https://zenodo.org/record/8231215/files",
+    "uri": "https://zenodo.org/record/10895592/files",
     "data": {
         "2022-02-14": {
             "fname": "hst_drz_svm_2022-02-14.zip?download=1",
@@ -80,16 +80,16 @@
     },
     "model": {
         "fname": "svm_align.zip",
-        "hash": "01468fae74ceb6b31fd073ed3b9b599f",
+        "hash": "af3b019cc3079cb29fd01bae58e9560d",
         "desc": "hst svm alignment prediction models",
         "key": "svm_align",
-        "size": "17.9MB",
+        "size": "4.92MB",
     },
 }
 
 
 jwst_cal = {
-    "uri": "https://zenodo.org/record/8231215/files",
+    "uri": "https://zenodo.org/record/10895592/files",
     "data": {
         "2023-08-02": {
             "fname": "",
@@ -101,16 +101,16 @@
     },
     "model": {
         "fname": "jwst_cal.zip",
-        "hash": "92a32f33468807793b51a0b5e761dcfb",
+        "hash": "f3b6dbcc3fc92a6f806f5b5464e68c02",
         "desc": "JWST Calibration Processing Resource Prediction Models",
         "key": "jwst_cal",
-        "size": "110kB",
+        "size": "1.5MB",
     },
 }
 
 
 k2 = {
-    "uri": "https://zenodo.org/record/8231215/files",
+    "uri": "https://zenodo.org/record/10895592/files",
     "data": {
         "test": {
             "fname": "k2-exo-flux-ts-test.csv.zip?download=1",
@@ -142,19 +142,19 @@
     "calcloud": {
         "basepath": "spacekit.builder.trained_networks",
         "fname": "hst_cal.zip",
-        "hash": "84abd317355c73667e5c08ada6868f0ca9563bd87717abe0680200d05457937b",
-        "size": "2.1MB",
+        "hash": "f3422c98d93520898e10d068a1e434dd5caf40e632760687df377c1b2bccff57",
+        "size": "385.42kB",
     },
     "svm": {
         "basepath": "spacekit.builder.trained_networks",
         "fname": "svm_align.zip",
-        "hash": "09550dc36499422453079ba7d1536cff2154373d7f0f6d0126e9002ce9ce3ed9",
-        "size": "17.9MB",
+        "hash": "577a87fab157fdadc657af0dcd67b55af2d89d855d5597bd96671b20ff135636",
+        "size": "4.92MB",
     },
     "jwst_cal": {
         "basepath": "spacekit.builder.trained_networks",
         "fname": "jwst_cal.zip",
-        "hash": "e9880f2e33fe9ab6d5aee066ab9e957abef1154fddc1297dcbaee495367ac222",
-        "size": "110kB",
+        "hash": "b61d0281fd149e2b0ffcded81ba54b24b479c5b7191072be2bfb59464d779187",
+        "size": "1.5MB",
     },
 }