Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

networks/jwst-spec-v1-and-image-v2 #58

Merged
merged 12 commits into from
Apr 6, 2024
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
new features
------------

- `builder.trained_networks` jwst_cal.zip includes updated (v2) `img3_reg` and new `spec3_reg` predictive models for image and spectroscopic data [#58]

- `preprocessor.ingest.JwstCalIngest` class and cmdline script for automated training data ingest [#57]

- `extractor.radio.JwstCalRadio` subclass for scraping datasets from MAST using ASN metadata [#51]
Expand All @@ -13,6 +15,11 @@ new features
enhancements
------------

- `skopes.jwst.cal.predict` generates predictions for spectrosopic datasets in addition to image data. This update also allows further customization of user arguments: [#58]
- `obs` to specify selection of a program ID + observation number
- `input_path` accepts either a directory (default) or a filename. If filename, the script will try to find any input exposures that belong to the same program and observation number as that file.
- `sfx` attribute is now customizable on instantiation of the class object (default is '_uncal.fits')

- `architect.builder.Builder.save_model` uses preferred keras archive format by default [#50]

- `preprocessor.transform.SkyTransformer` set offsets to 0 for gs/targ fiducial NaN values; custom filename for tx_file [#54]
Expand All @@ -31,6 +38,7 @@ bug fixes
- `preprocessor.encode.PairEncoder.handle_unknowns` create single new encoding value per unidentified variable [#53]



1.0.1 (2024-04-03)
==================

Expand Down
101 changes: 80 additions & 21 deletions conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from spacekit.analyzer.scan import HstSvmScanner, HstCalScanner, import_dataset
from spacekit.extractor.load import load_datasets, extract_file
from spacekit.skopes.jwst.cal.config import KEYPAIR_DATA

from spacekit.preprocessor.scrub import JwstCalScrubber

TESTED_VERSIONS = {}

Expand Down Expand Up @@ -52,7 +52,7 @@ def __init__(self, env):
self.kwargs = {
"svm": dict(index_col="index"),
"hstcal": dict(index_col="ipst"),
"jwstcal": dict(index_col="img_name")
"jwstcal": dict(index_col="Dataset")
}[env]

self.decoder = {
Expand Down Expand Up @@ -97,7 +97,7 @@ def __init__(self, env):
self.rename_cols = {
"svm": "_scl",
"hstcal": ["x_files", "x_size"],
"jwstcal": "_scl"
"jwstcal": None
}[env]

self.enc_cols = {
Expand Down Expand Up @@ -125,7 +125,7 @@ def __init__(self, env):
self.tx_file = {
"svm": "tests/data/svm/tx_data.json",
"hstcal": "tests/data/hstcal/tx_data.json",
"jwstcal": "tests/data/jwstcal/tx_data.json"
"jwstcal": "tests/data/jwstcal/tx_data-{}.json"
}[env]

self.visits = {
Expand All @@ -139,25 +139,8 @@ def __init__(self, env):


def pytest_addoption(parser):
# parser.addoption("--env", action="store", default="hstcal", help="Environment to run tests against")
parser.addoption("--env", action="store", default=None, help="Environment to run tests against")

# def pytest_configure(config):
# config.addinivalue_line("markers", "skope_svm: only run in svm skope")
# config.addinivalue_line("markers", "skope_cal: only run in cal skope")

# def pytest_collection_modifyitems(config, items, skope):
# if skope.env == "hstcal"
# env_param = config.getoption("--env")
# if env_param:
# skope_param = pytest.mark.parametrize("skope", [(env_param)], indirect=True)
# else:
# skope_param = pytest.mark.parametrize("skope", [("hstcal", "svm")], indirect=True)
# # skip_param = pytest.mark.skipif(reason="skip params based on --env")
# for item in items:
# if "skopes" in item.keywords:
# item.add_marker(skope_param)


@fixture(scope="session")
def env(request):
Expand Down Expand Up @@ -348,3 +331,79 @@ def hst_cal_predict_visits():
@fixture(scope="function")
def jwstcal_input_path():
return "tests/data/jwstcal/predict/inputs"


@fixture(scope="module")
def jwstcal_scrub_filepath():
return "tests/data/jwstcal/scrub/{}-inputs.csv"


@fixture(scope="module")
def jwst_cal_img_data(jwstcal_scrub_filepath):
data = pd.read_csv(jwstcal_scrub_filepath.format('img'), index_col="Dataset")
data['PROGRAM'] = data['PROGRAM'].apply(lambda x: '{:0>5}'.format(x))
data['OBSERVTN'] = data['OBSERVTN'].apply(lambda x: '{:0>3}'.format(x))
return data


@fixture(scope="function")
def jwst_cal_wfsc_data(jwstcal_scrub_filepath):
data = pd.read_csv(jwstcal_scrub_filepath.format('wfsc'), index_col="Dataset")
data['PROGRAM'] = data['PROGRAM'].apply(lambda x: '{:0>5}'.format(x))
data['OBSERVTN'] = data['OBSERVTN'].apply(lambda x: '{:0>3}'.format(x))
return data


@fixture(scope="module")
def jwst_cal_spec_data(jwstcal_scrub_filepath):
data = pd.read_csv(jwstcal_scrub_filepath.format('spec'), index_col="Dataset")
data['PROGRAM'] = data['PROGRAM'].apply(lambda x: '{:0>5}'.format(x))
data['OBSERVTN'] = data['OBSERVTN'].apply(lambda x: '{:0>3}'.format(x))
return data


@fixture(scope="module")
def jwst_cal_tac_data(jwstcal_scrub_filepath):
data = pd.read_csv(jwstcal_scrub_filepath.format('tac'), index_col="Dataset")
data['PROGRAM'] = data['PROGRAM'].apply(lambda x: '{:0>5}'.format(x))
data['OBSERVTN'] = data['OBSERVTN'].apply(lambda x: '{:0>3}'.format(x))
return data


@fixture(scope="module")
def jwst_cal_img_df(jwst_cal_img_data):
scrubber = JwstCalScrubber(
"tmp",
data=jwst_cal_img_data,
mode='df',
encoding_pairs=KEYPAIR_DATA
)
df = pd.DataFrame.from_dict(scrubber.imgpix, orient='index')
df['name'] = ['_'.join([n.split('_')[0]] + n.split('_')[2:]) for n in list(df.index)]
return df


@fixture(scope="module")
def jwst_cal_spec_df(jwst_cal_spec_data):
scrubber = JwstCalScrubber(
"tmp",
data=jwst_cal_spec_data,
mode='df',
encoding_pairs=KEYPAIR_DATA
)
df = pd.DataFrame.from_dict(scrubber.specpix, orient='index')
df['name'] = ['_'.join([n.split('_')[0]] + n.split('_')[2:]) for n in list(df.index)]
return df


@fixture(scope="module")
def jwst_cal_tac_df(jwst_cal_tac_data):
scrubber = JwstCalScrubber(
"tmp",
data=jwst_cal_tac_data,
mode='df',
encoding_pairs=KEYPAIR_DATA
)
df = pd.DataFrame.from_dict(scrubber.tacpix, orient='index')
df['name'] = ['_'.join([n.split('_')[0]] + n.split('_')[2:]) for n in list(df.index)]
return df
Binary file added previews/spec3_reg.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ install_requires =
boto3
numpy>=1.19
pandas
scikit-learn
scikit-learn>=1.3.2

[options.extras_require]
test =
Expand Down
5 changes: 3 additions & 2 deletions spacekit/builder/architect.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,14 +340,15 @@ def set_callbacks(self, patience=15):

def save_keras_model(self, model_path):
dpath = os.path.dirname(model_path)
os.makedirs(dpath, exist_ok=True)
name = os.path.basename(model_path)
if not name.endswith("keras"):
name += ".keras"
keras_model_path = os.path.join(dpath, name)
self.model.save(keras_model_path)
self.model_path = keras_model_path

def save_model(self, weights=True, output_path=".", keras_archive=True):
def save_model(self, weights=True, output_path=".", keras_archive=True, parent_dir=""):
"""The model architecture, and training configuration (including the optimizer, losses, and metrics)
are stored in saved_model.pb. The weights are saved in the variables/ directory.

Expand All @@ -367,7 +368,7 @@ def save_model(self, weights=True, output_path=".", keras_archive=True):
else:
model_name = self.name

model_path = os.path.join(output_path, "models", model_name)
model_path = os.path.join(output_path, "models", parent_dir, model_name)

if keras_archive is True:
self.save_keras_model(model_path)
Expand Down
29 changes: 29 additions & 0 deletions spacekit/builder/blueprints.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ def build_params(self):
"hst_mem_reg": self.hst_mem_reg,
"hst_wall_reg": self.hst_wall_reg,
"jwst_img3_reg": self.jwst_img3_reg,
"jwst_spec3_reg": self.jwst_spec3_reg,
}[self.architecture]

def fit_params(self):
Expand All @@ -26,6 +27,7 @@ def fit_params(self):
"hst_mem_reg": self.draft_hst_mem_reg,
"hst_wall_reg": self.draft_hst_wall_reg,
"jwst_img3_reg": self.draft_jwst_img3_reg,
"jwst_spec3_reg": self.draft_jwst_spec3_reg,
}[self.architecture]

def svm_mlp(self):
Expand Down Expand Up @@ -180,3 +182,30 @@ def draft_jwst_img3_reg(self):
early_stopping=None,
verbose=0,
)

def jwst_spec3_reg(self):
return dict(
input_shape=18,
output_shape=1,
layers=[18, 36, 72, 144, 288, 144, 72, 36, 18],
activation="relu",
cost_function="linear",
lr_sched=True,
optimizer=Adam,
loss="mse",
metrics=[RMSE(name="rmse")],
input_name="jwst_cal_spec3",
output_name="spec3_regressor",
name="spec3_reg",
algorithm="linreg",
)

def draft_jwst_spec3_reg(self):
return dict(
batch_size=32,
epochs=2000,
lr=1e-4,
decay=[100000, 0.96],
early_stopping=None,
verbose=0,
)
Binary file modified spacekit/builder/trained_networks/hst_cal.zip
Binary file not shown.
Binary file modified spacekit/builder/trained_networks/jwst_cal.zip
Binary file not shown.
Binary file modified spacekit/builder/trained_networks/svm_align.zip
Binary file not shown.
32 changes: 16 additions & 16 deletions spacekit/datasets/meta.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
calcloud = {
"uri": "https://zenodo.org/record/8231215/files",
"uri": "https://zenodo.org/record/10895592/files",
"data": {
"2022-02-14": {
"fname": "hst_cal_std_2022-02-14.zip?download=1",
Expand Down Expand Up @@ -32,15 +32,15 @@
},
"model": {
"fname": "hst_cal.zip",
"hash": "370f9950c6f0f0412af039617eebea93",
"hash": "ffe7d00504ad5d6ebc79488abe612474",
"desc": "hst calcloud resource prediction models",
"key": "hst_cal",
"size": "2.1MB",
"size": "385.42kB",
},
}

svm = {
"uri": "https://zenodo.org/record/8231215/files",
"uri": "https://zenodo.org/record/10895592/files",
"data": {
"2022-02-14": {
"fname": "hst_drz_svm_2022-02-14.zip?download=1",
Expand Down Expand Up @@ -80,16 +80,16 @@
},
"model": {
"fname": "svm_align.zip",
"hash": "01468fae74ceb6b31fd073ed3b9b599f",
"hash": "af3b019cc3079cb29fd01bae58e9560d",
"desc": "hst svm alignment prediction models",
"key": "svm_align",
"size": "17.9MB",
"size": "4.92MB",
},
}


jwst_cal = {
"uri": "https://zenodo.org/record/8231215/files",
"uri": "https://zenodo.org/record/10895592/files",
"data": {
"2023-08-02": {
"fname": "",
Expand All @@ -101,16 +101,16 @@
},
"model": {
"fname": "jwst_cal.zip",
"hash": "92a32f33468807793b51a0b5e761dcfb",
"hash": "f3b6dbcc3fc92a6f806f5b5464e68c02",
"desc": "JWST Calibration Processing Resource Prediction Models",
"key": "jwst_cal",
"size": "110kB",
"size": "1.5MB",
},
}


k2 = {
"uri": "https://zenodo.org/record/8231215/files",
"uri": "https://zenodo.org/record/10895592/files",
"data": {
"test": {
"fname": "k2-exo-flux-ts-test.csv.zip?download=1",
Expand Down Expand Up @@ -142,19 +142,19 @@
"calcloud": {
"basepath": "spacekit.builder.trained_networks",
"fname": "hst_cal.zip",
"hash": "84abd317355c73667e5c08ada6868f0ca9563bd87717abe0680200d05457937b",
"size": "2.1MB",
"hash": "f3422c98d93520898e10d068a1e434dd5caf40e632760687df377c1b2bccff57",
"size": "385.42kB",
},
"svm": {
"basepath": "spacekit.builder.trained_networks",
"fname": "svm_align.zip",
"hash": "09550dc36499422453079ba7d1536cff2154373d7f0f6d0126e9002ce9ce3ed9",
"size": "17.9MB",
"hash": "577a87fab157fdadc657af0dcd67b55af2d89d855d5597bd96671b20ff135636",
"size": "4.92MB",
},
"jwst_cal": {
"basepath": "spacekit.builder.trained_networks",
"fname": "jwst_cal.zip",
"hash": "e9880f2e33fe9ab6d5aee066ab9e957abef1154fddc1297dcbaee495367ac222",
"size": "110kB",
"hash": "b61d0281fd149e2b0ffcded81ba54b24b479c5b7191072be2bfb59464d779187",
"size": "1.5MB",
},
}
Loading
Loading