Skip to content

Commit

Permalink
Merge pull request #24 from arjunsavel/test_pca_opt
Browse files Browse the repository at this point in the history
actually test PCA optimization!
  • Loading branch information
arjunsavel authored Feb 10, 2024
2 parents 83c8f01 + 4b80047 commit 49445dd
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 37 deletions.
12 changes: 8 additions & 4 deletions src/cortecs/fit/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, opac, method="pca", **fitter_kwargs):
# todo: figure out how to change the fitting...based on the fit axis?
return

def fit(self, parallel=False):
def fit(self, parallel=False, verbose=1):
"""
fits the opacity data to a neural network. loops over all wavelengths.
Can loop over a list of species? ...should be parallelized!
Expand All @@ -89,13 +89,13 @@ def fit(self, parallel=False):
prep_method = self.prep_method_dict[self.method]
self.prep_res = prep_method(self.opac.cross_section, **self.fitter_kwargs)
if not parallel:
self.fit_serial()
self.fit_serial(verbose=verbose)
else:
self.fit_parallel()

return # will I need to save and stuff like that?

def fit_serial(self):
def fit_serial(self, verbose=0):
"""
fits the opacity data with a given method. Serial.
:return:
Expand All @@ -105,7 +105,11 @@ def fit_serial(self):
# loop over the wavelengths and fit
res = []
with warnings.catch_warnings():
for i, w in tqdm(enumerate(self.wl), total=len(self.wl)):
if verbose == 1:
iterator = tqdm(enumerate(self.wl), total=len(self.wl))
else:
iterator = enumerate(self.wl)
for i, w in iterator:
cross_section = self.opac.cross_section[:, :, i]
res += [
self.fit_func(
Expand Down
10 changes: 1 addition & 9 deletions src/cortecs/opt/opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,5 @@ def optimize(self, max_size, max_evaluations, **kwargs):
optimizes the fit.
:return:
"""
self.best_params = self.opt_func(
max_size,
max_evaluations,
self.opac.cross_section,
self.P,
self.T,
self.wl,
**kwargs
)
self.best_params = self.opt_func(max_size, max_evaluations, self.opac, **kwargs)
return
8 changes: 4 additions & 4 deletions src/cortecs/opt/optimize_neural_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,7 @@
def optimize_neural_net(
max_size,
max_evaluations,
cross_section,
P,
T,
wl,
opac,
min_layers=2,
min_neurons=2,
max_layers=3,
Expand All @@ -33,6 +30,9 @@ def optimize_neural_net(
max_evaluations: int
maximum number of evaluations of the neural network.
"""
cross_section = opac.cross_section
T = opac.T
P = opac.P
if max_evaluations < 1:
raise ValueError("max_evaluations must be greater than 0.")

Expand Down
49 changes: 29 additions & 20 deletions src/cortecs/opt/optimize_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@
import numpy as np
from tqdm import tqdm

from cortecs.fit.fit import Fitter
from cortecs.fit.fit_neural_net import *
from cortecs.fit.fit_pca import *


def optimize_pca(
max_size,
max_evaluations,
cross_section,
P,
T,
wl,
min_components=2,
max_components=4,
opac,
min_components=3,
max_components=5,
wav_ind_start=3573,
):
"""
Expand All @@ -32,40 +32,50 @@ def optimize_pca(
"""

T = opac.T
P = opac.P
wl = opac.wl
cross_section = opac.cross_section
# each axis — the wavelength index being tested and the number of components — will be tested n times.
# n * n = max_evaluations.
n_test_each_axis = math.floor(np.power(max_evaluations, 1 / 2))

n_pc_range = np.linspace(min_components, max_components, n_test_each_axis).astype(
int
)
wav_ind_range = np.linspace(0, len(wl), n_test_each_axis).astype(int)

wav_ind_range = np.linspace(wav_ind_start, len(wl) - 1, n_test_each_axis).astype(
int
)
print("len wl")
print(len(wl))
print("wl range")
print(wav_ind_range)
(
n_pc_grid,
wav_ind_grid,
) = np.meshgrid(n_pc_range, wav_ind_range)

# max_size currently isn't used.
final_errors = []
lin_samples = []
# ah. we're supposed to fit at every wavelength.
for sample in tqdm(
range(len(n_pc_range.flatten())),
desc="Optimizing neural network hyperparameters",
range(len(n_pc_grid.flatten())),
desc="Optimizing PCA hyperparameters",
):
n_pc, wav_ind = (
n_pc_grid.flatten()[sample],
wav_ind_grid.flatten()[sample],
)
xMat = prep_pca(cross_section, wav_ind=wav_ind, nc=n_pc)
fit_pca(cross_section, P, T, xMat, nc=3, wav_ind=1, savename=None)
fitter = Fitter(opac, method="pca", wav_ind=wav_ind, nc=n_pc)
try:
fitter.fit(verbose=0)

# evaluate the fit
vals, orig_vals, abs_diffs, percent_diffs = calc_metrics(
opac_obj, fitter, plot=True
)
mse = np.mean(np.square(abs_diffs))
# evaluate the fit
vals, orig_vals, abs_diffs, percent_diffs = calc_metrics(fitter, plot=False)
mse = np.mean(np.square(abs_diffs))

except ValueError as e:
mse = np.inf

final_errors += [mse]
lin_samples += [sample]
Expand All @@ -76,5 +86,4 @@ def optimize_pca(
"n_pc": n_pc_grid.flatten()[best_sample_ind],
"wav_ind": wav_ind_grid.flatten()[best_sample_ind],
}

return best_params
32 changes: 32 additions & 0 deletions src/cortecs/tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,38 @@ def test_optimize(self):
}
)

def test_optimize_pca(self):
"""
just the tutorial. once more!
:return:
"""
# reset random seed
np.random.seed(seed)
tf.random.set_seed(seed)
os.environ["PYTHONHASHSEED"] = str(seed)
random.seed(seed)
load_kwargs = {
"T_filename": self.T_filename,
"P_filename": self.P_filename,
"wl_filename": self.wl_filename,
}
opac_obj = Opac(
self.cross_sec_filename, loader="platon", load_kwargs=load_kwargs
)
fitter = Fitter(opac_obj, method="pca", wav_ind=-2, nc=3)
optimizer = Optimizer(fitter)
max_size = 1.6
max_evaluations = 4
optimizer.optimize(max_size, max_evaluations)
print(optimizer.best_params)
self.assertTrue(
optimizer.best_params
== {
"n_pc": 5,
"wav_ind": 4615,
}
)

def test_pca_temperature_axis(self):
"""
test that the PCA fit works well enough even when fit along a different axis.
Expand Down

0 comments on commit 49445dd

Please sign in to comment.