Merge branch 'main' into another_integration_test

arjunsavel · Feb 9, 2024 · 2cdea54 · 2cdea54
2 parents 084f4f9 + 31d90a9
commit 2cdea54
Show file tree

Hide file tree

Showing 11 changed files with 443 additions and 82 deletions.
diff --git a/src/cortecs/eval/eval.py b/src/cortecs/eval/eval.py
@@ -28,6 +28,7 @@ def __init__(self, opac, fitter, **eval_kwargs):
         self.wl = opac.wl
         self.P = opac.P
         self.T = opac.T
+        self.fit_kwargs = fitter.fitter_kwargs
 
         if not hasattr(fitter, "fitter_results"):
             raise ValueError("Fitter must be run before being passed to an Evaluator.")

diff --git a/src/cortecs/eval/eval_neural_net.py b/src/cortecs/eval/eval_neural_net.py
@@ -61,7 +61,14 @@ def feed_forward(x, n_layers, weights, biases):
 
 # @jax.jit
 def eval_neural_net(
-    T, P, temperatures, pressures, wavelengths, n_layers=None, weights=None, biases=None
+    T,
+    P,
+    temperatures=None,
+    pressures=None,
+    wavelengths=None,
+    n_layers=None,
+    weights=None,
+    biases=None,
 ):
     """
     evaluates the neural network at a given temperature and pressure.

diff --git a/src/cortecs/eval/eval_pca.py b/src/cortecs/eval/eval_pca.py
@@ -40,7 +40,9 @@ def eval_pca_ind_wav(temperature_ind, pressure_ind, vectors, pca_coeffs):
     return xsec_val
 
 
-def eval_pca(temperature, pressure, wavelength, T, P, wl, fitter_results):
+def eval_pca(
+    temperature, pressure, wavelength, T, P, wl, fitter_results, fit_axis="pressure"
+):
     """
     Evaluates the PCA fit at a given temperature, pressure, and wavelength.
 
@@ -64,4 +66,23 @@ def eval_pca(temperature, pressure, wavelength, T, P, wl, fitter_results):
     pca_vectors, pca_coeffs_all_wl = fitter_results
     pca_coeffs = pca_coeffs_all_wl[wavelength_ind, :, :]
 
-    return eval_pca_ind_wav(temperature_ind, pressure_ind, pca_vectors, pca_coeffs)
+    # todo: figure out how to order the pressure and temperature inds!
+    if fit_axis == "pressure":
+        first_arg = temperature_ind
+        second_arg = pressure_ind
+    elif fit_axis == "temperature":
+        first_arg = pressure_ind
+        second_arg = temperature_ind
+    elif fit_axis == "best":
+        T_length = len(T)
+        P_length = len(P)
+
+        # todo: what if equal?
+        if T_length > P_length:
+            first_arg = pressure_ind
+            second_arg = temperature_ind
+        else:
+            first_arg = pressure_ind
+            second_arg = temperature_ind
+
+    return eval_pca_ind_wav(first_arg, second_arg, pca_vectors, pca_coeffs)
diff --git a/src/cortecs/fit/fit.py b/src/cortecs/fit/fit.py
@@ -54,7 +54,8 @@ def __init__(self, opac, method="pca", **fitter_kwargs):
             'polynomial', 'pca', and 'neural_net'. The more complex the model, the larger the model size (i.e., potentially
             the lower the compression factor), and the more likely it is to fit well.
         fitter_kwargs: dict
-            kwargs that are passed to the fitter.
+            kwargs that are passed to the fitter. one kwarg, for instance, is the fit_axis: for PCA,
+            this determines what axis is fit against.
         """
         self.opac = opac
         self.fitter_kwargs = fitter_kwargs
@@ -70,6 +71,7 @@ def __init__(self, opac, method="pca", **fitter_kwargs):
         self.P = self.opac.P
         self.T = self.opac.T
 
+        # todo: figure out how to change the fitting...based on the fit axis?
         return
 
     def fit(self, parallel=False):

diff --git a/src/cortecs/fit/fit_pca.py b/src/cortecs/fit/fit_pca.py
@@ -97,14 +97,14 @@ def do_pca(cube, nc=3):
     try:
         xMat, s, vh, u = do_svd(standardized_cube, nc, nx)
 
-    except np.linalg.LinAlgError:
+    except np.linalg.LinAlgError as e:
         print("SVD did not converge.")
-        return
+        raise e
 
     return xMat, standardized_cube, s, vh, u
 
 
-def fit_pca(cross_section, P, T, xMat, **kwargs):
+def fit_pca(cross_section, P, T, xMat, fit_axis="pressure", **kwargs):
     """
     Fits the PCA to the opacity data.
 
@@ -119,12 +119,41 @@ def fit_pca(cross_section, P, T, xMat, **kwargs):
     -------
         :beta: (nc x pixels) PCA coefficients
     """
+    print("shapes for everything:", cross_section.shape, P.shape, T.shape, xMat.shape)
+    cross_section = move_cross_section_axis(cross_section, fit_axis)
 
     beta = fit_mlr(cross_section, xMat)
     return beta
 
 
-def prep_pca(cross_section, wav_ind=-1, nc=2, force_fit_constant=False):
+def move_cross_section_axis(cross_section, fit_axis):
+    """
+    todo: add docstring
+    :param cross_section:
+    :param fit_axis:
+    :return:
+    """
+    fit_axis_options = ["best", "temperature", "pressure"]
+    if fit_axis not in fit_axis_options:
+        raise ValueError(f"fit_axis param must be one of: {fit_axis_options}")
+
+    # the current shape prefers pressure. let's auto-check, though
+    if fit_axis == "best":
+        # actually want SECOND longest.
+        longest_axis = np.argmax(cross_section[:, :, 0].shape)
+
+        # now move that longest axis to 0.
+        cross_section = np.moveaxis(cross_section, longest_axis, 1)
+
+    elif fit_axis == "temperature":
+        cross_section = np.moveaxis(cross_section, 0, 1)
+
+    return cross_section
+
+
+def prep_pca(
+    cross_section, wav_ind=-1, nc=2, force_fit_constant=False, fit_axis="pressure"
+):
     """
     Prepares the opacity data for PCA. That is, it calculates the PCA components to be fit along the entire
     dataset by fitting the PCA to a single wavelength.
@@ -144,11 +173,15 @@ def prep_pca(cross_section, wav_ind=-1, nc=2, force_fit_constant=False):
         :force_fit_constant: (bool) if True, will allow the PCA to fit an opacity function without temperature and pressure
         dependence. This usually isn't recommended, if these PCA vectors are to e used to fit other wavelengths that
         *do* have temperature and pressure dependence.
+        :fit_axis: (str) the axis to fit against. determines the shape of the final vectors and components.
+        if "best", chooses the largest axis. otherwise, can select "temperature" or "pressure".
 
     Returns
     -------
         :xMat: (n_exp x nc) PCA components
     """
+    cross_section = move_cross_section_axis(cross_section, fit_axis)
+
     single_pres_single_temp = cross_section[:, :, wav_ind]
     if (
         np.all(single_pres_single_temp == single_pres_single_temp[0, 0])

diff --git a/src/cortecs/opac/chunking.py b/src/cortecs/opac/chunking.py
@@ -24,16 +24,24 @@
 """
 
 import os
+from glob import glob
 
 import numpy as np
 from tqdm import tqdm
 
 from cortecs.opac.opac import *
 
 
-def chunk_wavelengths(file, nchunks=None, wav_per_chunk=None, adjust_wavelengths=False):
+def chunk_wavelengths(
+    file,
+    nchunks=None,
+    wav_per_chunk=None,
+    adjust_wavelengths=False,
+    loader="exotransmit",
+):
     """
     Performs wavelength-chunking.
+    todo: yell if the chunked results are already in the directory?
 
     Inputs
     -------
@@ -66,7 +74,7 @@ def chunk_wavelengths(file, nchunks=None, wav_per_chunk=None, adjust_wavelengths
         )
 
     if not wav_per_chunk:
-        opac = Opac(file, loader="exotransmit")
+        opac = Opac(file, loader=loader)
         num_wavelengths = len(opac.wl)
         del opac  # clean it up
         wav_per_chunk = round(num_wavelengths / nchunks)
@@ -82,21 +90,27 @@ def chunk_wavelengths(file, nchunks=None, wav_per_chunk=None, adjust_wavelengths
     file_suffix = 0
 
     # read through all lines in the opacity file. todo: from already read in opacity?
-    for x in tqdm(f1):
+    # past the header
+    write_to_file(header, file, file_suffix)
+    for x in tqdm(f1[2:]):
+        # print(x)
         if not x:
             continue
         commad = x.replace(" ", ",")
-
+        # pdb.set_trace()
         if len(np.array([eval(commad)]).flatten()) == 1:  # if a wavelength line
             ticker += 1
-        elif len(x.split(" ")) == 48 and adjust_wavelengths:  # this is ntemp, I believe
-            x = adjust_wavelength_unit(x, 1e-4, style="full")
-        #                pass # don't need to adust wavelengths anymore!
-
-        if ticker == wav_per_chunk:
+            print(x)
+            # print(file_suffix)
+        # elif len(x.split(" ")) == 48 and adjust_wavelengths:  # this is ntemp, I believe
+        #     x = adjust_wavelength_unit(x, 1e-4, style="full")
+        # #                pass # don't need to adust wavelengths anymore!
+        if ticker == wav_per_chunk + 1:
             file_suffix += 1  # start writing to different file
+
             ticker = 0
             write_to_file(header, file, file_suffix)
+
         write_to_file(x, file, file_suffix)
 
     f.close()
@@ -191,49 +205,6 @@ def add_lams(max_lam_to_add_ind, file, next_file):
             return
 
 
-def add_previous(num_to_add, file, previous_file):
-    """
-    Adds a certain number of wavelength points to a file from a previous one.
-
-    Inputs:
-        :num_to_add: (int) number of wavelength points to add from one file to the other.
-        :file: (str) (str) path to file to which wavelength points are being *added*.
-        :previous_file: (str) path to file from which wavelength points are being drawn.
-
-    Outputs:
-        None
-
-    Side effects:
-        Modifies file.
-    """
-    try:
-        f = open(previous_file)
-    except FileNotFoundError:
-        print(f"{previous_file} not found. Moving on!")
-        return
-
-    f1 = f.readlines()[2:]  # first two files of opacity are header info
-    f.close()
-
-    ticker = 0
-
-    # read through all lines in the opacity file
-    for x in f1[::-1]:
-        if not x:
-            continue
-
-        commad = x.replace(" ", ",")
-        if len(np.array([eval(commad)]).flatten()) == 1:  # if a wavelength line
-            ticker += 1
-
-        # append line to file
-        f2 = open(file, "a")
-        f2.write(x)
-        f2.close()
-        if ticker == num_to_add:
-            return
-
-
 def add_overlap(filename, v_max=11463.5):
     """
     Adds overlap from file n+1 to file n. The last file has nothing added to it. This
@@ -257,10 +228,14 @@ def add_overlap(filename, v_max=11463.5):
     Side effects:
         Modifies every 'filename*.dat' file.
     """
-    for i in tqdm(
-        range(len(os.listdir()[:-1])), position=0, leave=True
+    print("for sure adding overlap")
+    files = glob(filename + "*.dat")
+    for i, file in tqdm(
+        enumerate(files), total=len(files), position=0, leave=True, desc="eeeeee"
     ):  # don't include the last file
-        file = filename + str(i) + ".dat"
+        if file == filename + ".dat":
+            continue  # only add over lap to the chunked file
+        print("for sure adding overlapppp")
 
         next_file = filename + str(i + 1) + ".dat"
 
@@ -282,8 +257,11 @@ def add_overlap(filename, v_max=11463.5):
         delta_lam = 2 * max_curr_lam * v_max / c  # delta_lambda/lambda = v/c
 
         # add another 20 indices to be safe!
-        max_lam_to_add_ind = (
-            np.argmin(np.abs(next_lams - (max_curr_lam + delta_lam))) + 20
-        )
+        # max_lam_to_add_ind = (
+        #     np.argmin(np.abs(next_lams - (max_curr_lam + delta_lam))) + 20
+        # )
+        max_lam_to_add_ind = np.argmin(np.abs(next_lams - (max_curr_lam + delta_lam)))
+        print("max lam to add")
+        print(max_lam_to_add_ind)
 
         add_lams(max_lam_to_add_ind, file, next_file)