Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

misc/cnn2d-fixes #93

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ preprocessor
------------
- explicitly pass `encoding=bytes` in transform.hypersonic_pliers for numpy 2 compatibility where this will no longer be the default for np.loadtxt [#92]

builder
-------
- Various minor fixes relating to CNN 2d model usage [#93]


1.1.1 (2024-07-11)
==================
Expand Down
14 changes: 7 additions & 7 deletions spacekit/builder/architect.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,7 +898,7 @@ def ensemble_cnn(self):
self.cnn.output_name = "svm_image_output"
self.cnn.name = "svm_cnn"
self.cnn.ensemble = True
self.cnn.input_shape = self.X_train[1].shape[1:] if self.X_train else None
self.cnn.input_shape = self.X_train[1].shape[1:] if self.X_train is not None else None
self.cnn.output_shape = 1
self.cnn.layers = [18, 32, 64, 32, 18]
self.cnn.activation = "leaky_relu"
Expand Down Expand Up @@ -1016,7 +1016,7 @@ def __init__(
**builder_kwargs,
)
self.blueprint = blueprint
self.input_shape = self.X_train.shape[1:] if self.X_train else None
self.input_shape = self.X_train.shape[1:] if self.X_train is not None else None
self.output_shape = 1
self.input_name = "cnn2d_inputs"
self.output_name = "cnn2d_output"
Expand All @@ -1035,7 +1035,7 @@ def __init__(
self.early_stopping = None
self.batch_size = 32
self.cost_function = "sigmoid"
self.step_size = X_train.shape[1] if X_train else None
self.step_size = X_train.shape[1] if X_train is not None else None
self.steps_per_epoch = self.step_size // self.batch_size
self.batch_maker = self.batch

Expand All @@ -1054,17 +1054,17 @@ def build(self):
)(inputs)
x = MaxPool1D(strides=self.strides)(x)
x = BatchNormalization()(x)
count = 1
for f in self.filters[1:]:
for f in list(range(len(self.filters))):
if f == 0:
continue
x = Conv1D(
filters=self.filters[f],
kernel_size=self.kernel,
activation=self.activation,
)(x)
x = MaxPool1D(strides=self.strides)(x)
if count < len(self.filters):
if f < len(self.filters) - 1:
x = BatchNormalization()(x)
count += 1
else:
x = Flatten()(x)
self.log.info("DROPOUT")
Expand Down
8 changes: 4 additions & 4 deletions spacekit/extractor/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,10 +325,10 @@ def __init__(
self.fpaths = []

def scrape(self):
"""Using the key-pair values in `dataset` dictionary attribute, download the files from a github
repo and check the hash keys match before extracting. Extraction and hash-key checking is handled
externally by the `keras.utils.data_utils.get_file` method. If extraction is successful, the
archive file will be deleted.
"""Using the key-pair values in `dataset` dictionary attribute, download the files from a website
(such as zenodo) and check the hash keys match before extracting. Extraction and hash-key checking
is handled externally by the `keras.utils.data_utils.get_file` method. If extraction is successful,
the archive file will be deleted. See spacekit.datasets.meta for dictionary formatting examples.

Returns
-------
Expand Down
9 changes: 4 additions & 5 deletions spacekit/preprocessor/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -869,7 +869,7 @@ def tensors_to_arrays(X_train, y_train, X_test, y_test):


def hypersonic_pliers(
path_to_train, path_to_test, y_col=[0], skip=1, dlm=",", encoding=bytes, subtract_y=0.0
path_to_train, path_to_test, y_col=[0], skip=1, dlm=",", encoding='bytes', subtract_y=0.0, reshape=False
):
"""Extracts data into 1-dimensional arrays, using separate target classes (y) for training and test data. Assumes y (target)
is first column in dataframe. If the target (y) classes in the raw data are 0 and 2, but you'd like them to be binaries (0
Expand Down Expand Up @@ -900,16 +900,15 @@ def hypersonic_pliers(
Train = np.loadtxt(path_to_train, skiprows=skip, delimiter=dlm, encoding=encoding)
cols = list(range(Train.shape[1]))
xcols = [c for c in cols if c not in y_col]
# X_train = Train[:, 1:]
X_train = Train[:, xcols]
# y_train = Train[:, 0, np.newaxis] - subtract_y
y_train = Train[:, y_col, np.newaxis] - subtract_y

Test = np.loadtxt(path_to_test, skiprows=skip, delimiter=dlm, encoding=encoding)
X_test = Test[:, xcols]
y_test = Test[:, y_col, np.newaxis] - subtract_y
# X_test = Test[:, 1:]
# y_test = Test[:, 0, np.newaxis] - subtract_y
if reshape is True:
y_train = y_train.reshape(y_train.shape[0], 1)
y_test = y_test.reshape(y_test.shape[0], 1)

del Train, Test
print("X_train: ", X_train.shape)
Expand Down
46 changes: 27 additions & 19 deletions spacekit/skopes/kepler/light_curves.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,21 @@
babel_fish_dispenser,
)
from spacekit.builder.architect import BuilderCNN2D
from spacekit.datasets.k2_exo import k2_uri, k2_data
from spacekit.datasets.meta import k2 as k2meta
from spacekit.extractor.scrape import WebScraper

def downloads_exist(scraper, k2_meta):
base_path = os.path.join(scraper.cache_dir, scraper.cache_subdir)
filepaths = []
for k, v in k2_meta.items():
fpath = os.path.join(base_path, v['key'])
filepaths.append(fpath)
for fp in filepaths:
if not os.path.exists(fp):
return []
print("Found existing datasets, skipping download.")
return filepaths


class LaunchK2:
def __init__(self, fpaths):
Expand All @@ -20,54 +32,50 @@ def __init__(self, fpaths):
self.history = None

def launch_prep(self):
self.X_train, self.X_test, self.y_train, self.y_test = self.split_data()
self.X_train, self.X_test = self.scale_data()
self.X_train, self.X_test = self.add_filter()
return self.X_train, self.X_test, self.y_train, self.y_test
self.split_data()
self.scale_data()
self.add_filter()

def split_data(self):
print("Splitting train-test feature and target data...")
for fpath in self.fpaths:
if fpath.endswith("Train"):
if "Train" in fpath:
train = fpath
else:
test = fpath
self.X_train, self.X_test, self.y_train, self.y_test = hypersonic_pliers(
train, test
train, test, subtract_y=1.0, reshape=True
)
print("Data split successful")
return self.X_train, self.X_test, self.y_train, self.y_test

def scale_data(self):
print("Scaling data to Zero Mean and Unit Variance...")
self.X_train, self.X_test = thermo_fusion_chisel(self.X_train, self.X_test)
print("Data scaling successful.")
return self.X_train, self.X_test

def add_filter(self):
print("Adding noise filter...")
self.X_train, self.X_test = babel_fish_dispenser(self.X_train, self.X_test)
print("Noise filter added successfully.")
return self.X_train, self.X_test

def deploy(self):
self.builder = BuilderCNN2D(
self.X_train, self.y_train, self.X_test, self.y_test
X_train=self.X_train, y_train=self.y_train, X_test=self.X_test, y_test=self.y_test
)
self.builder.build()
return self.builder

def takeoff(self):
self.history = self.builder.batch_fit()


if __name__ == "__main__":
home = os.getcwd()
data = os.path.join(home, "data")
print("Extracting data...")
fpaths = WebScraper(k2_uri, k2_data).scrape_repo()
print("Data extraction successful.")
k2 = LaunchK2(fpaths)
scraper = WebScraper(k2meta['uri'], k2meta['data'])
scraper.fpaths = downloads_exist(scraper, k2meta['data'])
if not scraper.fpaths:
scraper.scrape()
print("Data extraction successful.")
k2 = LaunchK2(scraper.fpaths)
k2.launch_prep()
k2.builder = k2.deploy()
k2.history = k2.takeoff()
k2.deploy()
k2.takeoff()
Loading