Skip to content

Commit

Permalink
Merge branch 'main' into another_integration_test
Browse files Browse the repository at this point in the history
  • Loading branch information
arjunsavel committed Feb 9, 2024
2 parents 084f4f9 + 31d90a9 commit 2cdea54
Show file tree
Hide file tree
Showing 11 changed files with 443 additions and 82 deletions.
1 change: 1 addition & 0 deletions src/cortecs/eval/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def __init__(self, opac, fitter, **eval_kwargs):
self.wl = opac.wl
self.P = opac.P
self.T = opac.T
self.fit_kwargs = fitter.fitter_kwargs

if not hasattr(fitter, "fitter_results"):
raise ValueError("Fitter must be run before being passed to an Evaluator.")
Expand Down
9 changes: 8 additions & 1 deletion src/cortecs/eval/eval_neural_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,14 @@ def feed_forward(x, n_layers, weights, biases):

# @jax.jit
def eval_neural_net(
T, P, temperatures, pressures, wavelengths, n_layers=None, weights=None, biases=None
T,
P,
temperatures=None,
pressures=None,
wavelengths=None,
n_layers=None,
weights=None,
biases=None,
):
"""
evaluates the neural network at a given temperature and pressure.
Expand Down
25 changes: 23 additions & 2 deletions src/cortecs/eval/eval_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ def eval_pca_ind_wav(temperature_ind, pressure_ind, vectors, pca_coeffs):
return xsec_val


def eval_pca(temperature, pressure, wavelength, T, P, wl, fitter_results):
def eval_pca(
temperature, pressure, wavelength, T, P, wl, fitter_results, fit_axis="pressure"
):
"""
Evaluates the PCA fit at a given temperature, pressure, and wavelength.
Expand All @@ -64,4 +66,23 @@ def eval_pca(temperature, pressure, wavelength, T, P, wl, fitter_results):
pca_vectors, pca_coeffs_all_wl = fitter_results
pca_coeffs = pca_coeffs_all_wl[wavelength_ind, :, :]

return eval_pca_ind_wav(temperature_ind, pressure_ind, pca_vectors, pca_coeffs)
# todo: figure out how to order the pressure and temperature inds!
if fit_axis == "pressure":
first_arg = temperature_ind
second_arg = pressure_ind
elif fit_axis == "temperature":
first_arg = pressure_ind
second_arg = temperature_ind
elif fit_axis == "best":
T_length = len(T)
P_length = len(P)

# todo: what if equal?
if T_length > P_length:
first_arg = pressure_ind
second_arg = temperature_ind
else:
first_arg = pressure_ind
second_arg = temperature_ind

return eval_pca_ind_wav(first_arg, second_arg, pca_vectors, pca_coeffs)
4 changes: 3 additions & 1 deletion src/cortecs/fit/fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,8 @@ def __init__(self, opac, method="pca", **fitter_kwargs):
'polynomial', 'pca', and 'neural_net'. The more complex the model, the larger the model size (i.e., potentially
the lower the compression factor), and the more likely it is to fit well.
fitter_kwargs: dict
kwargs that are passed to the fitter.
kwargs that are passed to the fitter. one kwarg, for instance, is the fit_axis: for PCA,
this determines what axis is fit against.
"""
self.opac = opac
self.fitter_kwargs = fitter_kwargs
Expand All @@ -70,6 +71,7 @@ def __init__(self, opac, method="pca", **fitter_kwargs):
self.P = self.opac.P
self.T = self.opac.T

# todo: figure out how to change the fitting...based on the fit axis?
return

def fit(self, parallel=False):
Expand Down
41 changes: 37 additions & 4 deletions src/cortecs/fit/fit_pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,14 +97,14 @@ def do_pca(cube, nc=3):
try:
xMat, s, vh, u = do_svd(standardized_cube, nc, nx)

except np.linalg.LinAlgError:
except np.linalg.LinAlgError as e:
print("SVD did not converge.")
return
raise e

return xMat, standardized_cube, s, vh, u


def fit_pca(cross_section, P, T, xMat, **kwargs):
def fit_pca(cross_section, P, T, xMat, fit_axis="pressure", **kwargs):
"""
Fits the PCA to the opacity data.
Expand All @@ -119,12 +119,41 @@ def fit_pca(cross_section, P, T, xMat, **kwargs):
-------
:beta: (nc x pixels) PCA coefficients
"""
print("shapes for everything:", cross_section.shape, P.shape, T.shape, xMat.shape)
cross_section = move_cross_section_axis(cross_section, fit_axis)

beta = fit_mlr(cross_section, xMat)
return beta


def prep_pca(cross_section, wav_ind=-1, nc=2, force_fit_constant=False):
def move_cross_section_axis(cross_section, fit_axis):
"""
todo: add docstring
:param cross_section:
:param fit_axis:
:return:
"""
fit_axis_options = ["best", "temperature", "pressure"]
if fit_axis not in fit_axis_options:
raise ValueError(f"fit_axis param must be one of: {fit_axis_options}")

# the current shape prefers pressure. let's auto-check, though
if fit_axis == "best":
# actually want SECOND longest.
longest_axis = np.argmax(cross_section[:, :, 0].shape)

# now move that longest axis to 0.
cross_section = np.moveaxis(cross_section, longest_axis, 1)

elif fit_axis == "temperature":
cross_section = np.moveaxis(cross_section, 0, 1)

return cross_section


def prep_pca(
cross_section, wav_ind=-1, nc=2, force_fit_constant=False, fit_axis="pressure"
):
"""
Prepares the opacity data for PCA. That is, it calculates the PCA components to be fit along the entire
dataset by fitting the PCA to a single wavelength.
Expand All @@ -144,11 +173,15 @@ def prep_pca(cross_section, wav_ind=-1, nc=2, force_fit_constant=False):
:force_fit_constant: (bool) if True, will allow the PCA to fit an opacity function without temperature and pressure
dependence. This usually isn't recommended, if these PCA vectors are to e used to fit other wavelengths that
*do* have temperature and pressure dependence.
:fit_axis: (str) the axis to fit against. determines the shape of the final vectors and components.
if "best", chooses the largest axis. otherwise, can select "temperature" or "pressure".
Returns
-------
:xMat: (n_exp x nc) PCA components
"""
cross_section = move_cross_section_axis(cross_section, fit_axis)

single_pres_single_temp = cross_section[:, :, wav_ind]
if (
np.all(single_pres_single_temp == single_pres_single_temp[0, 0])
Expand Down
94 changes: 36 additions & 58 deletions src/cortecs/opac/chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,16 +24,24 @@
"""

import os
from glob import glob

import numpy as np
from tqdm import tqdm

from cortecs.opac.opac import *


def chunk_wavelengths(file, nchunks=None, wav_per_chunk=None, adjust_wavelengths=False):
def chunk_wavelengths(
file,
nchunks=None,
wav_per_chunk=None,
adjust_wavelengths=False,
loader="exotransmit",
):
"""
Performs wavelength-chunking.
todo: yell if the chunked results are already in the directory?
Inputs
-------
Expand Down Expand Up @@ -66,7 +74,7 @@ def chunk_wavelengths(file, nchunks=None, wav_per_chunk=None, adjust_wavelengths
)

if not wav_per_chunk:
opac = Opac(file, loader="exotransmit")
opac = Opac(file, loader=loader)
num_wavelengths = len(opac.wl)
del opac # clean it up
wav_per_chunk = round(num_wavelengths / nchunks)
Expand All @@ -82,21 +90,27 @@ def chunk_wavelengths(file, nchunks=None, wav_per_chunk=None, adjust_wavelengths
file_suffix = 0

# read through all lines in the opacity file. todo: from already read in opacity?
for x in tqdm(f1):
# past the header
write_to_file(header, file, file_suffix)
for x in tqdm(f1[2:]):
# print(x)
if not x:
continue
commad = x.replace(" ", ",")

# pdb.set_trace()
if len(np.array([eval(commad)]).flatten()) == 1: # if a wavelength line
ticker += 1
elif len(x.split(" ")) == 48 and adjust_wavelengths: # this is ntemp, I believe
x = adjust_wavelength_unit(x, 1e-4, style="full")
# pass # don't need to adust wavelengths anymore!

if ticker == wav_per_chunk:
print(x)
# print(file_suffix)
# elif len(x.split(" ")) == 48 and adjust_wavelengths: # this is ntemp, I believe
# x = adjust_wavelength_unit(x, 1e-4, style="full")
# # pass # don't need to adust wavelengths anymore!
if ticker == wav_per_chunk + 1:
file_suffix += 1 # start writing to different file

ticker = 0
write_to_file(header, file, file_suffix)

write_to_file(x, file, file_suffix)

f.close()
Expand Down Expand Up @@ -191,49 +205,6 @@ def add_lams(max_lam_to_add_ind, file, next_file):
return


def add_previous(num_to_add, file, previous_file):
"""
Adds a certain number of wavelength points to a file from a previous one.
Inputs:
:num_to_add: (int) number of wavelength points to add from one file to the other.
:file: (str) (str) path to file to which wavelength points are being *added*.
:previous_file: (str) path to file from which wavelength points are being drawn.
Outputs:
None
Side effects:
Modifies file.
"""
try:
f = open(previous_file)
except FileNotFoundError:
print(f"{previous_file} not found. Moving on!")
return

f1 = f.readlines()[2:] # first two files of opacity are header info
f.close()

ticker = 0

# read through all lines in the opacity file
for x in f1[::-1]:
if not x:
continue

commad = x.replace(" ", ",")
if len(np.array([eval(commad)]).flatten()) == 1: # if a wavelength line
ticker += 1

# append line to file
f2 = open(file, "a")
f2.write(x)
f2.close()
if ticker == num_to_add:
return


def add_overlap(filename, v_max=11463.5):
"""
Adds overlap from file n+1 to file n. The last file has nothing added to it. This
Expand All @@ -257,10 +228,14 @@ def add_overlap(filename, v_max=11463.5):
Side effects:
Modifies every 'filename*.dat' file.
"""
for i in tqdm(
range(len(os.listdir()[:-1])), position=0, leave=True
print("for sure adding overlap")
files = glob(filename + "*.dat")
for i, file in tqdm(
enumerate(files), total=len(files), position=0, leave=True, desc="eeeeee"
): # don't include the last file
file = filename + str(i) + ".dat"
if file == filename + ".dat":
continue # only add over lap to the chunked file
print("for sure adding overlapppp")

next_file = filename + str(i + 1) + ".dat"

Expand All @@ -282,8 +257,11 @@ def add_overlap(filename, v_max=11463.5):
delta_lam = 2 * max_curr_lam * v_max / c # delta_lambda/lambda = v/c

# add another 20 indices to be safe!
max_lam_to_add_ind = (
np.argmin(np.abs(next_lams - (max_curr_lam + delta_lam))) + 20
)
# max_lam_to_add_ind = (
# np.argmin(np.abs(next_lams - (max_curr_lam + delta_lam))) + 20
# )
max_lam_to_add_ind = np.argmin(np.abs(next_lams - (max_curr_lam + delta_lam)))
print("max lam to add")
print(max_lam_to_add_ind)

add_lams(max_lam_to_add_ind, file, next_file)
Loading

0 comments on commit 2cdea54

Please sign in to comment.