diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
index d88f9c7..973f71c 100644
--- a/.github/workflows/python-package.yml
+++ b/.github/workflows/python-package.yml
@@ -41,7 +41,7 @@ jobs:
     - name: Test with pytest
       run: |
         python3 -m pytest
-        python3 -m coverage run --source=./RadClass/ -m pytest
+        python3 -m coverage run --source=./RadClass/,./models/,./scripts/ -m pytest
         python3 -m coverage report
         python3 -m coverage html
         COVERALLS_REPO_TOKEN=${{ secrets.COVERALLS_REPO_TOKEN }} python3 -m coveralls --service=github
diff --git a/.gitignore b/.gitignore
index a2c6179..f8b2ece 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,3 +4,10 @@ __pycache__
 *.h5
 *.ipynb
 *.csv
+*.joblib
+*.log
+*.png
+*.pyc
+results/
+data/
+checkpoint/
diff --git a/README.md b/README.md
index b08bd07..40ed506 100644
--- a/README.md
+++ b/README.md
@@ -25,7 +25,14 @@ Versions 3.6-3.9 are currently supported by tests. The following Python packages
 * h5py
 * numpy
 * progressbar2
+* matplotlib
+* seaborn
 * scipy
+* sklearn
+* hyperopt
+* ray[tune]
+* torch
+* shadow-ssml
 
 Modules can be imported from the repository directory (e.g. `from RadClass.H0 import H0`) or `RadClass` can be installed using pip:
 
diff --git a/RadClass/models/LogReg.py b/RadClass/models/LogReg.py
new file mode 100644
index 0000000..42bfec8
--- /dev/null
+++ b/RadClass/models/LogReg.py
@@ -0,0 +1,170 @@
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# sklearn models
+from sklearn import linear_model
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score
+from scripts.utils import run_hyperopt
+import joblib
+
+
+class LogReg:
+    '''
+    Methods for deploying sklearn's logistic regression
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys max_iter, tol, and C supported.
+    alpha: float; weight for encouraging high recall
+    beta: float; weight for encouraging high precision
+    NOTE: if alpha=beta=0, default to favoring balanced accuracy.
+    random_state: int/float for reproducible intiailization.
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, alpha=0, beta=0, random_state=0):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # dictionary of parameters for logistic regression model
+        self.alpha, self.beta = alpha, beta
+        self.params = params
+        if self.params is None:
+            self.model = linear_model.LogisticRegression(
+                            random_state=self.random_state
+                        )
+        else:
+            self.model = linear_model.LogisticRegression(
+                            random_state=self.random_state,
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            C=params['C']
+                        )
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh logistic regression model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys max_iter, tol, and C supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, and testy required.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+
+        # supervised logistic regression
+        clf = LogReg(params=params, random_state=self.random_state)
+        # train and test model
+        clf.train(trainx, trainy)
+        # uses balanced_accuracy accounts for class imbalanced data
+        clf_pred, acc = clf.predict(testx, testy)
+        rec = recall_score(testy, clf_pred)
+        prec = precision_score(testy, clf_pred)
+
+        # loss function minimizes misclassification
+        # by maximizing metrics
+        return {'score': acc+(self.alpha*rec)+(self.beta*prec),
+                'loss': (1-acc) + self.alpha*(1-rec)+self.beta*(1-prec),
+                'model': clf,
+                'params': params,
+                'accuracy': acc,
+                'precision': prec,
+                'recall': rec}
+
+    def optimize(self, space, data_dict, max_evals=50, njobs=4, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a raytune compliant dictionary with defined optimization
+            spaces. For example:
+                space = {'max_iter': tune.qrandint(10, 10000, 10),
+                        'tol'      : tune.loguniform(1e-5, 1e-1),
+                        'C'        : tune.uniform(0.001, 1000.0)
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy required.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        njobs: (int) number of hyperparameter training iterations to complete
+            in parallel. Default is 4, but personal computing resources may
+            require less or allow more.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   njobs=njobs,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy):
+        '''
+        Wrapper method for sklearn's logisitic regression training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        '''
+
+        # supervised logistic regression
+        self.model.fit(trainx, trainy)
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for sklearn's logistic regression predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        pred = self.model.predict(testx)
+
+        acc = None
+        if testy is not None:
+            # uses balanced_accuracy_score to account for class imbalance
+            acc = balanced_accuracy_score(testy, pred)
+
+        return pred, acc
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/RadClass/models/PyTorch/__init__.py b/RadClass/models/PyTorch/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/RadClass/models/PyTorch/ann.py b/RadClass/models/PyTorch/ann.py
new file mode 100644
index 0000000..5b000af
--- /dev/null
+++ b/RadClass/models/PyTorch/ann.py
@@ -0,0 +1,385 @@
+from typing import Union, Tuple
+
+import numpy as np
+import pandas as pd
+
+from sklearn.metrics import r2_score
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/models/PyTorch/')
+from .critic import MSELoss
+
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+class EarlyStopper:
+    '''
+    Early stopping mechanism for neural networks.
+    Code adapted from user "isle_of_gods" from StackOverflow:
+    https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
+    Use this class to break a training loop if the validation loss is low.
+    Inputs:
+    patience: integer; forces stop if validation loss has not improved
+        for some time
+    min_delta: "fudge value" for how much loss to tolerate before stopping
+    '''
+
+    def __init__(self, patience=5, min_delta=0):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.min_validation_loss = np.inf
+
+    def early_stop(self, validation_loss):
+        '''
+        Tests for the early stopping condition if the validation loss
+        has not improved for a certain period of time (patience).
+        Inputs:
+        validation_loss: typically a float value for the loss function of
+            a neural network training loop
+        '''
+
+        if validation_loss < self.min_validation_loss:
+            # keep track of the smallest validation loss
+            # if it has been beaten, restart patience
+            self.min_validation_loss = validation_loss
+            self.counter = 0
+        elif validation_loss > (self.min_validation_loss + self.min_delta):
+            # keep track of whether validation loss has been decreasing
+            # by a tolerable amount
+            self.counter += 1
+        return self.counter >= self.patience
+
+
+class ConvNN(nn.Module):
+    '''
+    Neural Network constructor.
+    Also includes method for forward pass.
+    nn.Module: PyTorch object for neural networks.
+    Inputs:
+    layer1: int length for first layer.
+    layer2: int length for second layer.
+        Ideally a multiple of layer1.
+    layer3: int length for third layer.
+        Ideally a multiple of layer2.
+    kernel: convolutional kernel size.
+        NOTE: An optimal value is unclear for spectral data.
+    drop_rate: float (<1.) probability for reset/dropout layer.
+    length: single instance data length.
+        NOTE: Assumed to be 1000 for spectral data.
+    TODO: Allow hyperopt to optimize on arbitrary sized networks.
+    '''
+
+    def __init__(self, dim: int, mid: Union[int, list], kernel: int = 3,
+                 n_layers: int = 1, dropout_rate: float = 1.,
+                 n_epochs: int = 1000, out_bias: bool = False,
+                 criterion: nn.Module = MSELoss(), n_classes: int = None):
+        super().__init__()
+        activation = nn.ReLU
+        self.criterion = criterion
+        self.p = dropout_rate
+        self.n_epochs = n_epochs
+        self.mid = mid
+        # default max_pool1d kernel set by Shadow MNIST example
+        # NOTE: max_pool1d sets mp_kernel = mp_stride
+        self.mp_kernel = 2
+        if isinstance(mid, list) and len(mid) == 1 and n_layers > 1:
+            mid = np.full(n_layers, mid[0])
+        # if isinstance(mid, list) and (len(mid) != n_layers):
+        if len(mid) != n_layers:
+            raise ValueError('Specified layer architecture (mid)'
+                             + 'should match n_layers')
+        if isinstance(mid, int):
+            mid = np.full(n_layers, mid)
+        layers = [nn.Sequential(nn.Conv1d(1, mid[0], kernel, 1),
+                                activation(),
+                                nn.MaxPool1d(kernel_size=self.mp_kernel))]
+
+        for i in range(n_layers-1):
+            # max pooling after every convolution layer
+            layers.append(nn.Sequential(nn.Conv1d(mid[i],
+                                                  mid[i+1],
+                                                  kernel, 1),
+                                        activation(),
+                                        nn.MaxPool1d(
+                                            kernel_size=self.mp_kernel)))
+        # dropout, and flatten after convolutions
+        # layers.append(nn.MaxPool1d(kernel_size=self.mp_kernel))
+        layers.append(nn.Dropout(dropout_rate))
+        layers.append(nn.Flatten(1))
+        self.m = nn.ModuleList(layers)
+        if n_classes is not None:
+            self.out = nn.Linear(mid[-1], n_classes, bias=out_bias)
+        else:
+            self.out = None
+
+        # COMPUTE FLATTENED PARAMETERS FOR CNN
+        # calculating the number of parameters/weights before the flattened
+        # fully-connected layer:
+        #   first, there are two convolution layers, so the output length is
+        #   the input length (feature_vector.shape[0] - 2_layers*(kernel-1))
+        #   if, in the future, more layers are desired, 2 must be adjusted
+        #   next, calculate the output of the max_pool1d layer, which is
+        #   round((conv_out - (kernel=stride - 1) - 1)/2 + 1)
+        #   finally, multiply this by the number of channels in the last
+        #   convolutional layer = layer2
+        # NOTE: computation for max pooling after last convolution layer
+        # conv_out = dim-n_layers*(kernel-1)
+        # self.representation_dim = mid[-1]*self.pooling(conv_out)
+
+        conv_out = dim
+        for i in range(len(mid)):
+            conv_out -= (kernel-1)
+            conv_out = self.pooling(conv_out)
+        self.representation_dim = mid[-1]*conv_out
+        # self.var = nn.Linear(mid, 1, bias=out_bias)
+
+        optimizer_kwargs = dict(lr=0.001, betas=(0.8, 0.99), weight_decay=1e-6)
+        self.optimizer = torch.optim.AdamW(self.parameters(),
+                                           **optimizer_kwargs)
+
+    def pooling(self, conv_out):
+        return ((conv_out - (self.mp_kernel - 1) - 1
+                 )//self.mp_kernel) + 1
+
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        for m in self.m:
+            # x = F.dropout(m(x), p=self.p, training=True)
+            x = m(x.float())
+        if self.out is not None:
+            return self.out(x.float())  # , self.var(x)
+        else:
+            return x.float()
+
+    # def predict(self, x:torch.Tensor, n_samp:int=25, l2:bool=True):
+    def predict(self, X: torch.Tensor):
+        """ return predictions of the model
+            and the predicted model uncertainties """
+        if isinstance(X, pd.DataFrame):
+            X = torch.tensor(X.values)
+        elif isinstance(X, np.ndarray):
+            X = torch.from_numpy(X)
+        # out = [self.forward(x) for _ in range(x.shape[0])]
+        # out = [self(x) for _ in range(x.shape[0])]
+        out = [self(x) for x in X]
+        yhat = torch.stack([o[0] for o in out]).detach().cpu()
+        # s = torch.stack([o[1] for o in out]).detach().cpu()
+        # e, a = calc_uncertainty(yhat, s, l2)
+        # return (torch.mean(yhat, dim=0), torch.mean(s, dim=0), e, a)
+        return yhat
+
+    def score(self, X: torch.Tensor, y: torch.Tensor):
+        '''
+        NOTE: REGRESSION SCORE
+        '''
+        if type(X) != torch.Tensor:
+            X = torch.Tensor(X)
+        yhat = self.predict(X)
+
+        return r2_score(np.array(y), np.array(yhat))
+
+    def fit(self, train_loader, valid_loader):
+        self.device = torch.device('cpu')
+        stopper = EarlyStopper(patience=int(0.02*self.n_epochs), min_delta=0)
+        train_losses, valid_losses = [], []
+        for t in range(1, self.n_epochs+1):
+            # training
+            # t_losses, t_ep, t_al, t_sb = [], [], [], []
+            t_losses = []
+            self.train()
+            for i, (x, y) in enumerate(train_loader):
+                x, y = x.to(self.device), y.to(self.device)
+                self.optimizer.zero_grad()
+                out = self(x)
+                loss = self.criterion(out, y)
+                t_losses.append(loss.item())
+                loss.backward()
+                self.optimizer.step()
+                # if i % unc_rate == 0:
+                #     _, _, ep, al, sb = get_metrics(self, x, y,
+                #                                    n_samp, use_l2, eps)
+                #     t_ep.append(ep); t_al.append(al); t_sb.append(sb)
+            train_losses.append(t_losses)
+            # t_ep_unc.append(t_ep)
+            # t_al_unc.append(t_al)
+            # t_sb_unc.append(t_sb)
+
+            # validation
+            # v_losses, v_ep, v_al, v_sb = [], [], [], []
+            v_losses = []
+            self.eval()
+            with torch.no_grad():
+                for i, (x, y) in enumerate(valid_loader):
+                    x = x.to(self.device)
+                    # loss, out, ep, al, sb = get_metrics(self, x, y,
+                    #                                     n_samp, use_l2, eps)
+                    out = self.predict(x)
+                    loss = self.criterion(out, y.detach().cpu())
+                    v_losses.append(loss.item())
+                    # v_ep.append(ep); v_al.append(al); v_sb.append(sb)
+                valid_losses.append(v_losses)
+                # v_ep_unc.append(v_ep)
+                # v_al_unc.append(v_al)
+                # v_sb_unc.append(v_sb)
+
+            if not np.all(np.isfinite(t_losses)):
+                raise RuntimeError('NaN or Inf in training loss,\
+                                    cannot recover. Exiting.')
+            if t % 200 == 0:
+                log = (f'Epoch: {t} - TL: {np.mean(t_losses):.2e},'
+                       + ' VL: {np.mean(v_losses):.2e}, '
+                       f'out: {out[:5]} and y: {y[:5]}')
+                # f'tEU: {np.mean(t_ep):.2e}, vEU: {np.mean(v_ep):.2e}, '
+                # f'tAU: {np.mean(t_al):.2e}, vAU: {np.mean(v_al):.2e}, '
+                # f'tSU: {np.mean(t_sb):.2e}, vSU: {np.mean(v_sb):.2e}')
+                print(log)
+            # if use_scheduler: scheduler.step()
+            if stopper.early_stop(np.mean(v_losses)):
+                print(f'\t*-----Early stopping after {t} epochs b/c val-loss\
+                        ({np.mean(v_losses)}) is not improving.')
+                break
+
+        return train_losses, valid_losses
+
+
+class LinearNN(nn.Module):
+    def __init__(self, dim: int, mid: Union[int, list], n_layers: int = 1,
+                 dropout_rate: float = 1., n_epochs: int = 1000,
+                 mid_bias: bool = True, out_bias: bool = False,
+                 criterion: nn.Module = MSELoss(), n_classes: int = None):
+        super().__init__()
+        activation = nn.ReLU
+        self.criterion = criterion
+        self.p = dropout_rate
+        self.n_epochs = n_epochs
+        if isinstance(mid, list) and len(mid) == 1 and n_layers > 1:
+            mid = np.full(n_layers, mid[0])
+        # if isinstance(mid, list) and (len(mid) != n_layers):
+        if len(mid) != n_layers:
+            raise ValueError('Specified layer architecture (mid)'
+                             + 'should match n_layers')
+        if isinstance(mid, int):
+            mid = np.full(n_layers, mid)
+        layers = [nn.Sequential(nn.Linear(dim, mid[0], bias=mid_bias),
+                                activation())]
+
+        for i in range(n_layers-1):
+            layers.append(nn.Sequential(nn.Linear(mid[i],
+                                                  mid[i+1],
+                                                  bias=mid_bias),
+                                        activation()))
+        self.m = nn.ModuleList(layers)
+        if n_classes is not None:
+            self.out = nn.Linear(mid[-1], n_classes, bias=out_bias)
+        else:
+            self.out = None
+        self.representation_dim = mid[-1]
+        # self.var = nn.Linear(mid, 1, bias=out_bias)
+
+        optimizer_kwargs = dict(lr=0.001, betas=(0.8, 0.99), weight_decay=1e-6)
+        self.optimizer = torch.optim.AdamW(self.parameters(),
+                                           **optimizer_kwargs)
+
+    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
+        for m in self.m:
+            # x = F.dropout(m(x), p=self.p, training=True)
+            x = m(x.float())
+        if self.out is not None:
+            return self.out(x.float())  # , self.var(x)
+        else:
+            return x.float()
+
+    # def predict(self, x:torch.Tensor, n_samp:int=25, l2:bool=True):
+    def predict(self, X: torch.Tensor):
+        """ return predictions of the model
+            and the predicted model uncertainties """
+        if isinstance(X, pd.DataFrame):
+            X = torch.tensor(X.values)
+        elif isinstance(X, np.ndarray):
+            X = torch.from_numpy(X)
+        # out = [self.forward(x) for _ in range(x.shape[0])]
+        # out = [self(x) for _ in range(x.shape[0])]
+        out = [self(x) for x in X]
+        yhat = torch.stack([o[0] for o in out]).detach().cpu()
+        # s = torch.stack([o[1] for o in out]).detach().cpu()
+        # e, a = calc_uncertainty(yhat, s, l2)
+        # return (torch.mean(yhat, dim=0), torch.mean(s, dim=0), e, a)
+        return yhat
+
+    def score(self, X: torch.Tensor, y: torch.Tensor):
+        '''
+        NOTE: REGRESSION SCORE
+        '''
+        if type(X) != torch.Tensor:
+            X = torch.Tensor(X)
+        yhat = self.predict(X)
+
+        return r2_score(np.array(y), np.array(yhat))
+
+    def fit(self, train_loader, valid_loader):
+        self.device = torch.device('cpu')
+        stopper = EarlyStopper(patience=int(0.02*self.n_epochs), min_delta=0)
+        train_losses, valid_losses = [], []
+        for t in range(1, self.n_epochs+1):
+            # training
+            # t_losses, t_ep, t_al, t_sb = [], [], [], []
+            t_losses = []
+            self.train()
+            for i, (x, y) in enumerate(train_loader):
+                x, y = x.to(self.device), y.to(self.device)
+                self.optimizer.zero_grad()
+                out = self(x)
+                loss = self.criterion(out, y)
+                t_losses.append(loss.item())
+                loss.backward()
+                self.optimizer.step()
+                # if i % unc_rate == 0:
+                #     _, _, ep, al, sb = get_metrics(self, x, y,
+                #                                    n_samp, use_l2, eps)
+                #     t_ep.append(ep); t_al.append(al); t_sb.append(sb)
+            train_losses.append(t_losses)
+            # t_ep_unc.append(t_ep)
+            # t_al_unc.append(t_al)
+            # t_sb_unc.append(t_sb)
+
+            # validation
+            # v_losses, v_ep, v_al, v_sb = [], [], [], []
+            v_losses = []
+            self.eval()
+            with torch.no_grad():
+                for i, (x, y) in enumerate(valid_loader):
+                    x = x.to(self.device)
+                    # loss, out, ep, al, sb = get_metrics(self, x, y,
+                    #                                     n_samp, use_l2, eps)
+                    out = self.predict(x)
+                    loss = self.criterion(out, y.detach().cpu())
+                    v_losses.append(loss.item())
+                    # v_ep.append(ep); v_al.append(al); v_sb.append(sb)
+                valid_losses.append(v_losses)
+                # v_ep_unc.append(v_ep)
+                # v_al_unc.append(v_al)
+                # v_sb_unc.append(v_sb)
+
+            if not np.all(np.isfinite(t_losses)):
+                raise RuntimeError('NaN or Inf in training loss,\
+                                    cannot recover. Exiting.')
+            if t % 200 == 0:
+                log = (f'Epoch: {t} - TL: {np.mean(t_losses):.2e},'
+                       + ' VL: {np.mean(v_losses):.2e}, '
+                       f'out: {out[:5]} and y: {y[:5]}')
+                # f'tEU: {np.mean(t_ep):.2e}, vEU: {np.mean(v_ep):.2e}, '
+                # f'tAU: {np.mean(t_al):.2e}, vAU: {np.mean(v_al):.2e}, '
+                # f'tSU: {np.mean(t_sb):.2e}, vSU: {np.mean(v_sb):.2e}')
+                print(log)
+            # if use_scheduler: scheduler.step()
+            if stopper.early_stop(np.mean(v_losses)):
+                print(f'\t*-----Early stopping after {t} epochs b/c val-loss\
+                        ({np.mean(v_losses)}) is not improving.')
+                break
+
+        return train_losses, valid_losses
+
+    ##############################
diff --git a/RadClass/models/PyTorch/critic.py b/RadClass/models/PyTorch/critic.py
new file mode 100644
index 0000000..2b8d268
--- /dev/null
+++ b/RadClass/models/PyTorch/critic.py
@@ -0,0 +1,61 @@
+import torch
+from torch import nn
+import torch.nn.functional as F
+
+
+class MSELoss(nn.Module):
+    """ use just MSE loss with UncertainLinear network """
+    def forward(self, out: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        # yhat, _ = out
+        # print('out: {}'.format(out))
+        # print('y: {}'.format(y))
+        loss = F.mse_loss(out.reshape(-1, 1), y.reshape(-1, 1))
+        return loss
+
+
+class L1Loss(nn.Module):
+    """ use just L1 loss with UncertainLinear network """
+    def forward(self, out: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        # yhat, _ = out
+        loss = F.smooth_l1_loss(out.reshape(-1, 1), y.reshape(-1, 1))
+        return loss
+
+
+class LinearCritic(nn.Module):
+    '''
+    Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+    More information found here: https://github.com/ae-foster/pytorch-simclr
+    '''
+
+    def __init__(self, latent_dim, projection_dim=128, temperature=1.):
+        super(LinearCritic, self).__init__()
+        self.temperature = temperature
+        self.projection_dim = projection_dim
+        self.w1 = nn.Linear(latent_dim, latent_dim, bias=False)
+        self.bn1 = nn.BatchNorm1d(latent_dim)
+        # self.bn1 = nn.BatchNorm1d(1)
+        self.relu = nn.ReLU()
+        self.w2 = nn.Linear(latent_dim, self.projection_dim, bias=False)
+        self.bn2 = nn.BatchNorm1d(self.projection_dim, affine=False)
+        self.cossim = nn.CosineSimilarity(dim=-1)
+
+    def project(self, h):
+        return self.bn2(self.w2(self.relu(self.bn1(self.w1(h)))))
+
+    def forward(self, h1, h2):
+        z1, z2 = self.project(h1), self.project(h2)
+        sim11 = self.cossim(z1.unsqueeze(-2),
+                            z1.unsqueeze(-3)) / self.temperature
+        sim22 = self.cossim(z2.unsqueeze(-2),
+                            z2.unsqueeze(-3)) / self.temperature
+        sim12 = self.cossim(z1.unsqueeze(-2),
+                            z2.unsqueeze(-3)) / self.temperature
+        d = sim12.shape[-1]
+        sim11[..., range(d), range(d)] = float('-inf')
+        sim22[..., range(d), range(d)] = float('-inf')
+        raw_scores1 = torch.cat([sim12, sim11], dim=-1)
+        raw_scores2 = torch.cat([sim22, sim12.transpose(-1, -2)], dim=-1)
+        raw_scores = torch.cat([raw_scores1, raw_scores2], dim=-2)
+        targets = torch.arange(2 * d, dtype=torch.long,
+                               device=raw_scores.device)
+        return raw_scores, targets
diff --git a/RadClass/models/PyTorch/lightModel.py b/RadClass/models/PyTorch/lightModel.py
new file mode 100644
index 0000000..2deb802
--- /dev/null
+++ b/RadClass/models/PyTorch/lightModel.py
@@ -0,0 +1,383 @@
+import torch
+import torch.nn as nn
+import torch.optim as optim
+# from torchlars import LARS
+# from flash.core import LARS
+from tqdm import tqdm
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/scripts/')
+# sys.path.append(os.getcwd()+'/models/PyTorch/')
+# sys.path.append(os.getcwd()+'/models/SSL/')
+
+from ...scripts.configs import get_datasets
+from ...scripts.evaluate import save_checkpoint, encode_train_set, train_clf, test
+# from models import *
+from ...scripts.scheduler import CosineAnnealingWithLinearRampLR
+
+from pytorch_metric_learning.losses import SelfSupervisedLoss, NTXentLoss
+from pytorch_metric_learning import losses, reducers
+from pytorch_metric_learning.utils import loss_and_miner_utils as lmu
+
+import lightning.pytorch as pl
+
+import numpy as np
+from torchmetrics import ConfusionMatrix
+
+'''
+Author: Jordan Stomps
+
+Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+More information found here: https://github.com/ae-foster/pytorch-simclr
+
+MIT License
+
+Copyright (c) 2023 Jordan Stomps
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+'''
+
+'''Train an encoder using Contrastive Learning.'''
+
+
+''' Image implementation from PyTorch Lightning
+class SimCLR(pl.LightningModule):
+    # PyTorch Lightning Implementation of SimCLR as implemented in Tutorial 13
+    def __init__(self, hidden_dim, lr, temperature,
+                 weight_decay, max_epochs=500):
+        super().__init__()
+        self.save_hyperparameters()
+        assert self.hparams.temperature > 0.0, "The temperature \
+                                                must be a positive float!"
+        # Base model f(.)
+        self.convnet = torchvision.models.resnet18(
+            pretrained=False, num_classes=4 * hidden_dim
+        )  # num_classes is the output size of the last linear layer
+        # The MLP for g(.) consists of Linear->ReLU->Linear
+        self.convnet.fc = nn.Sequential(
+            self.convnet.fc,  # Linear(ResNet output, 4*hidden_dim)
+            nn.ReLU(inplace=True),
+            nn.Linear(4 * hidden_dim, hidden_dim),
+        )
+
+    def configure_optimizers(self):
+        optimizer = optim.AdamW(self.parameters(), lr=self.hparams.lr,
+                                weight_decay=self.hparams.weight_decay)
+        lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(
+            optimizer, T_max=self.hparams.max_epochs,
+            eta_min=self.hparams.lr / 50
+        )
+        return [optimizer], [lr_scheduler]
+
+    def info_nce_loss(self, batch, mode="train"):
+        imgs, _ = batch
+        imgs = torch.cat(imgs, dim=0)
+
+        # Encode all images
+        feats = self.convnet(imgs)
+        # Calculate cosine similarity
+        cos_sim = F.cosine_similarity(feats[:, None, :],
+                                      feats[None, :, :], dim=-1)
+        # Mask out cosine similarity to itself
+        self_mask = torch.eye(cos_sim.shape[0], dtype=torch.bool,
+                              device=cos_sim.device)
+        cos_sim.masked_fill_(self_mask, -9e15)
+        # Find positive example -> batch_size//2 away from the original example
+        pos_mask = self_mask.roll(shifts=cos_sim.shape[0] // 2, dims=0)
+        # InfoNCE loss
+        cos_sim = cos_sim / self.hparams.temperature
+        nll = -cos_sim[pos_mask] + torch.logsumexp(cos_sim, dim=-1)
+        nll = nll.mean()
+
+        # Logging loss
+        self.log(mode + "_loss", nll)
+        # Get ranking position of positive example
+        comb_sim = torch.cat(
+            # First position positive example
+            [cos_sim[pos_mask][:, None], cos_sim.masked_fill(pos_mask, -9e15)],
+            dim=-1,
+        )
+        sim_argsort = comb_sim.argsort(dim=-1, descending=True).argmin(dim=-1)
+        # Logging ranking metrics
+        self.log(mode + "_acc_top1", (sim_argsort == 0).float().mean())
+        self.log(mode + "_acc_top5", (sim_argsort < 5).float().mean())
+        self.log(mode + "_acc_mean_pos", 1 + sim_argsort.float().mean())
+
+        return nll
+
+    def training_step(self, batch, batch_idx):
+        return self.info_nce_loss(batch, mode="train")
+
+    def validation_step(self, batch, batch_idx):
+        self.info_nce_loss(batch, mode="val")
+'''
+
+
+class LitSimCLR(pl.LightningModule):
+    # PyTorch Lightning Implementation of SimCLR
+    # as manually implemented via A E Foster
+    def __init__(self, clf, net, proj, critic, batch_size, sub_batch_size, lr,
+                 momentum, cosine_anneal, num_epochs, alpha, n_classes,
+                 test_freq, testloader, convolution, betas=(0.8, 0.99), weight_decay=1e-6):
+        super().__init__()
+        # intiialize linear classifier used in validation and testing
+        self.clf = clf
+        self.net = net
+        self.proj = proj
+        self.critic = critic
+        self.batch_size = batch_size
+        self.sub_batch_size = sub_batch_size
+        self.lr, self.momentum, self.cosine_anneal, self.num_epochs, self.alpha, self.n_classes, self.test_freq, self.testloader = lr, momentum, cosine_anneal, num_epochs, alpha, n_classes, test_freq, testloader
+        self.betas, self.weight_decay = betas, weight_decay
+        self.save_hyperparameters(ignore=['critic', 'proj', 'net', 'testloader'])
+
+        # True if net is CNN
+        self.convolution = convolution
+
+        # EMA update for projection head to boost performance (see SSL Cookbook)
+        # must use additional library: https://github.com/fadel/pytorch_ema
+        # self.ema = ExponentialMovingAverage(self.encoder.parameters(), decay=0.995)
+
+    # def custom_histogram_adder(self):
+    #     # iterating through all parameters
+    #     for name, params in self.named_parameters():
+    #         self.logger.experiment.add_histogram(name,
+    #                                              params,
+    #                                              self.current_epoch)
+
+    def configure_optimizers(self):
+        # base_optimizer = optim.SGD(list(self.net.parameters())
+        #                            + list(self.proj.parameters()),
+        #                            #    + list(self.critic.parameters()),
+        #                            lr=self.lr, weight_decay=1e-6,
+        #                            momentum=self.momentum)
+        optimizer_kwargs = dict(lr=self.lr, betas=self.betas,
+                                weight_decay=self.weight_decay)
+        base_optimizer = torch.optim.AdamW(list(self.net.parameters())
+                                           + list(self.proj.parameters())
+                                           + list(self.critic.parameters()),
+                                           **optimizer_kwargs)
+
+        if self.cosine_anneal:
+            self.scheduler = CosineAnnealingWithLinearRampLR(base_optimizer,
+                                                             self.num_epochs)
+        # encoder_optimizer = LARS(base_optimizer, trust_coef=1e-3)
+        encoder_optimizer = base_optimizer
+        return encoder_optimizer
+
+    # see above for EMA update
+    # def on_before_zero_grad(self, *args, **kwargs):
+    #     self.ema.update(self.proj.parameters())
+
+    def training_step(self, batch, batch_idx):
+        inputs, targets, _ = batch
+        x1, x2 = inputs
+        if self.convolution:
+            x1, x2 = x1.unsqueeze(1), x2.unsqueeze(1)
+
+        # graph logging
+        # if self.current_epoch == 0:
+        #     self.logger.experiment.add_graph(self.net,
+        #                                      torch.randn(self.batch_size,
+        #                                                  1,
+        #                                                  1000))
+        # if (self.test_freq > 0) and (self.current_epoch %
+        #                              (self.test_freq*2) ==
+        #                              ((self.test_freq*2) - 1)):
+        #     self.custom_histogram_adder()
+
+        # x1, x2 = x1.to(device), x2.to(device)
+        # encoder_optimizer.zero_grad()
+        representation1, representation2 = self.net(x1), self.net(x2)
+        # projection head for contrastive loss
+        # optional: instead pass representations directly; benefit?
+        representation1 = self.proj.project(representation1)
+        representation2 = self.proj.project(representation2)
+        # labels1 = latent_clf(representation1)
+        # labels2 = latent_clf(representation2)
+
+        # semi-supervised: define labels for labeled data
+        # each (x1i, x2i) is a positive pair
+        labels = targets.detach().clone()
+        # -1 for the unlabeled class in targets
+        # providing a unique label for each unlabeled instance
+        # self.n_classes avoids repeating supervised class labels
+        labels[labels == -1] = torch.arange(self.n_classes,
+                                            len(labels[labels == -1])
+                                            + self.n_classes)
+        # sub-batching to preserve memory
+        all_losses = []
+        for s in range(0, self.batch_size, self.sub_batch_size):
+            # embedding/representation subset
+            curr_emb = representation1[s:s+self.sub_batch_size]
+            curr_labels = labels[s:s+self.sub_batch_size]
+            # apply loss across all of the second representations
+            curr_loss = self.critic(curr_emb, curr_labels,
+                                    ref_emb=representation2,
+                                    ref_labels=labels)
+
+            # scaled (only) for supervised contrastive loss term
+            # NOTE: if multiple positive samples appear, there will be one loss
+            # for each positive pair (i.e. more than one loss per class).
+            for c in range(self.n_classes):
+                if torch.any(curr_labels == c):
+                    # check for more than one positive pair
+                    indices = torch.where(
+                        torch.isin(curr_loss['loss']['indices'][1],
+                                   torch.where(labels == c)[0]))[0]
+                    # scale losses that match with indices for positive pairs
+                    curr_loss['loss']['losses'][indices] *= self.alpha
+            all_losses.append(curr_loss['loss']['losses'])
+            # ignore 0 loss when sub_batch is not full
+            all_losses = [loss for loss in all_losses
+                          if not isinstance(loss, int)]
+
+        # summarize loss and calculate gradient
+        all_losses = torch.cat(all_losses, dim=0)
+        loss = torch.mean(all_losses)
+        self.log("train_loss", loss)
+        # loss.backward()
+        # encoder_optimizer.step()
+        # train_loss += loss.item()
+        # free memory used by loss graph of this batch
+        # del loss, all_losses, curr_loss
+        # x1.detach(), x2.detach()
+
+        # return train_loss
+        return loss
+
+    def validation_step(self, batch, batch_idx):
+        with torch.enable_grad():
+            reg_weight = 1e-3
+            # encode validation set
+            inputs, targets = batch
+            if self.convolution:
+                inputs = inputs.unsqueeze(1)
+            representations = self.net(inputs)
+
+            criterion = nn.CrossEntropyLoss()
+            n_lbfgs_steps = 500
+            # Should be reset after each epoch
+            # for a completely independent evaluation
+            self.clf = nn.Linear(representations[1].shape[0], 2)
+            clf_optimizer = optim.LBFGS(self.clf.parameters(), lr=1e-2)
+            self.clf.train()
+
+            for i in range(n_lbfgs_steps):
+                def closure():
+                    clf_optimizer.zero_grad()
+                    raw_scores = self.clf(representations)
+                    loss = criterion(raw_scores, targets)
+                    loss += reg_weight * self.clf.weight.pow(2).sum()
+                    loss.backward(retain_graph=True)
+
+                    return loss
+
+                clf_optimizer.step(closure)
+
+            raw_scores = self.clf(representations)
+            _, predicted = raw_scores.max(1)
+            correct = predicted.eq(targets).sum().item()
+            loss = criterion(raw_scores, targets).item()
+            total = targets.size(0)
+            cmat = ConfusionMatrix(task='binary',
+                                   num_classes=self.n_classes)(predicted,
+                                                               targets)
+            acc = 100. * correct / total
+            bacc = 0.5 * ((cmat[0][0] / (cmat[0][0] + cmat[0][1]))
+                          + (cmat[1][1] / (cmat[1][1] + cmat[1][0])))
+            print('Loss: %.3f | Train Acc: %.3f%% ' %
+                  (loss, 100. * correct / targets.shape[0]))
+            self.log_dict({'val_acc': acc,
+                           'val_bacc': bacc,
+                           'val_tn': float(cmat[0][0]),
+                           'val_fp': float(cmat[0][1]),
+                           'val_fn': float(cmat[1][0]),
+                           'val_tp': float(cmat[1][1]),
+                           'val_loss': loss})
+
+        # rolling test/validation
+        with torch.no_grad():
+            for batch_idx, batch in enumerate(self.testloader):
+                _, bacc = self.test_step(batch, batch_idx)
+                print('Test BAcc: %.3f%% ' % (bacc))
+        return predicted
+
+    def test_step(self, batch, batch_idx):
+        criterion = nn.CrossEntropyLoss()
+        test_clf_loss = 0
+        correct = 0
+        total = 0
+        # if n_classes > 2:
+        #     confmat = ConfusionMatrix(task='multiclass',
+        #                               num_classes=n_classes)
+        #     cmat = torch.zeros(n_classes, n_classes)
+        # else:
+        confmat = ConfusionMatrix(task='binary', num_classes=self.n_classes)
+        cmat = torch.zeros(self.n_classes, self.n_classes)
+        inputs, targets = batch
+        if self.convolution:
+            inputs = inputs.unsqueeze(1)
+        representation = self.net(inputs)
+        # test_repr_loss = criterion(representation, targets)
+        raw_scores = self.clf(representation)
+        clf_loss = criterion(raw_scores, targets)
+
+        test_clf_loss += clf_loss.item()
+        _, predicted = raw_scores.max(1)
+        total += targets.size(0)
+        correct += predicted.eq(targets).sum().item()
+        cmat += confmat(predicted, targets)
+
+        print('Loss: %.3f | Test Acc: %.3f%% ' %
+              (test_clf_loss / (batch_idx + 1), 100. * correct / total))
+
+        acc = 100. * correct / total
+        bacc = 0.5 * ((cmat[0][0] / (cmat[0][0] + cmat[0][1]))
+                      + (cmat[1][1] / (cmat[1][1] + cmat[1][0])))
+        self.log_dict({'test_acc': acc, 'test_bacc': bacc,
+                       'test_tn': float(cmat[0][0]),
+                       'test_fp': float(cmat[0][1]),
+                       'test_fn': float(cmat[1][0]),
+                       'test_tp': float(cmat[1][1]),
+                       'test_loss': test_clf_loss})
+        return predicted, bacc
+
+    # def validation_step(self):
+    #     X, y = encode_train_set(valloader, device, net)
+    #     clf = train_clf(X, y, self.net.representation_dim,
+    #                     2, reg_weight=1e-5)
+    #     acc, bacc, cmat, test_loss = test(testloader, device,
+    #                                       net, clf, num_classes)
+    #     bacc_curve = np.append(bacc_curve, bacc)
+    #     test_loss_curve = np.append(test_loss_curve, test_loss)
+    #     confmat_curve = np.append(confmat_curve, cmat)
+    #     print(f'\t-> epoch {epoch} Balanced Accuracy = {bacc}')
+    #     print(f'\t-> with confusion matrix = {cmat}')
+    #     if acc > best_acc:
+    #         best_acc = acc
+    #     save_checkpoint(net, clf, critic, epoch,
+    #                     args, os.path.basename(__file__))
+    #     results = {'bacc_curve': bacc_curve,
+    #                'train_loss_curve': train_loss_curve,
+    #                'test_loss_curve': test_loss_curve,
+    #                'confmat_curve': confmat_curve}
+    #     joblib.dump(results,
+    #                 './checkpoint/'+args.filename+'-result_curves.joblib')
diff --git a/RadClass/models/SSL/ProjHyperOpt.py b/RadClass/models/SSL/ProjHyperOpt.py
new file mode 100644
index 0000000..0bb352b
--- /dev/null
+++ b/RadClass/models/SSL/ProjHyperOpt.py
@@ -0,0 +1,446 @@
+import argparse
+import os
+import subprocess
+import glob
+import time
+from itertools import combinations_with_replacement
+
+import torch
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+import lightning.pytorch as pl
+# from torchlars import LARS
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/scripts/')
+# sys.path.append(os.getcwd()+'/models/PyTorch/')
+# sys.path.append(os.getcwd()+'/models/SSL/')
+
+from ...scripts.utils import run_hyperopt
+from ...scripts.configs import get_datasets
+from ..PyTorch.critic import LinearCritic
+from ..PyTorch.lightModel import LitSimCLR
+from ...scripts.evaluate import save_checkpoint, encode_train_set, train_clf, test
+# from models import *
+from ...scripts.scheduler import CosineAnnealingWithLinearRampLR
+from ..PyTorch.ann import LinearNN, ConvNN
+
+from pytorch_metric_learning.losses import SelfSupervisedLoss, NTXentLoss
+from pytorch_metric_learning import losses, reducers
+from pytorch_metric_learning.utils import loss_and_miner_utils as lmu
+
+# hyperopt
+from hyperopt.pyll.base import scope
+from hyperopt import hp
+from hyperopt import STATUS_OK
+
+import numpy as np
+import joblib
+
+import logging
+
+# needed for lightning's distributed package
+# os.environ["PL_TORCH_DISTRIBUTED_BACKEND"] = "gloo"
+# torch.distributed.init_process_group("gloo")
+
+'''
+Author: Jordan Stomps
+
+Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+More information found here: https://github.com/ae-foster/pytorch-simclr
+
+MIT License
+
+Copyright (c) 2023 Jordan Stomps
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+'''
+
+'''Train an encoder using Contrastive Learning.'''
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='PyTorch'
+                                                 'Contrastive Learning.')
+    parser.add_argument('--base-lr', default=0.25, type=float,
+                        help='base learning rate, rescaled by batch_size/256')
+    parser.add_argument("--momentum", default=0.9, type=float,
+                        help='SGD momentum')
+    parser.add_argument('--resume', '-r', type=str, default=None,
+                        help='resume from checkpoint with this filename')
+    parser.add_argument('--checkpoint', type=str, default=None,
+                        help='filename to checkpoint for resuming raytune')
+    parser.add_argument('--dataset', '-d', type=str, default='minos',
+                        help='dataset keyword',
+                        choices=['minos', 'minos-ssml', 'minos-transfer-ssml',
+                                 'minos-curated', 'minos-2019',
+                                 'minos-2019-binary'])
+    parser.add_argument('--dfpath', '-p', type=str,
+                        help='filepath for dataset')
+    parser.add_argument('--valfpath', '-v', type=str,
+                        help='filepath for validation dataset')
+    parser.add_argument('--testfpath', '-t', type=str,
+                        help='filepath for test dataset')
+    parser.add_argument('--bfpath', '-f', type=str,
+                        help='filepath for background library augmentations')
+    parser.add_argument('--temperature', type=float, default=0.5,
+                        help='InfoNCE temperature')
+    parser.add_argument("--batch-size", type=int, default=512,
+                        help='Training batch size')
+    parser.add_argument("--num-epochs", type=int, default=100,
+                        help='Number of training epochs')
+    parser.add_argument("--max-evals", type=int, default=50,
+                        help='Number of raytune iterations')
+    parser.add_argument("--batches", type=float, default=0.75,
+                        help='Maximum number or percent of batches per epoch.')
+    parser.add_argument("--cosine-anneal", action='store_true',
+                        help="Use cosine annealing on the learning rate")
+    parser.add_argument("--normalization", action='store_true',
+                        help='Use normalization instead of'
+                             'standardization in pre-processing.')
+    parser.add_argument("--accounting", action='store_true',
+                        help='Remove estimated background before'
+                             'returning spectra in training.')
+    parser.add_argument("--convolution", action="store_true",
+                        help="Create a CNN rather than FCNN.")
+    parser.add_argument("--arch", type=str, default='minos',
+                        help='Encoder architecture',
+                        choices=['minos', 'minos-ssml', 'minos-transfer-ssml',
+                                 'minos-curated', 'minos-2019',
+                                 'minos-2019-binary'])
+    parser.add_argument("--num-workers", type=int, default=2,
+                        help='Number of threads for data loaders')
+    parser.add_argument("--test-freq", type=int, default=10,
+                        help='Frequency to fit a clf with L-BFGS for testing'
+                             'Not appropriate for large datasets.'
+                             'Set 0 to avoid classifier only training here.')
+    parser.add_argument("--filename", type=str, default='ckpt',
+                        help='Output file name')
+    parser.add_argument('--in-dim', '-i', type=int,
+                        help='number of input image dimensions')
+    parser.add_argument('--mid', '-m', type=int, nargs='+',
+                        help='hidden layer size')
+    parser.add_argument('--n-layers', '-n', type=int,
+                        help='number of hidden layers')
+    parser.add_argument('--n-classes', '-c', type=int, default=7,
+                        help='number of classes/labels in projection head')
+    parser.add_argument('--alpha', '-a', type=float, default=1.,
+                        help='weight for semi-supervised contrastive loss')
+    parser.add_argument('--augs', '-u', type=str, nargs='+', default=None,
+                        help='list of augmentations to be applied in SSL')
+    parser.add_argument('--trials', type=str, default=None,
+                        help='filename for pre-existing Trials object')
+    parser.add_argument('--net', type=str, default=None,
+                        help='filepath for pretrained representation model')
+
+    args = parser.parse_args()
+    return args
+
+
+def architecture(config):
+    # architectures = []
+    if config['convolution']:
+        # hidden_layers = [8, 16, 32, 64, 128]
+        # for combination in combinations_with_replacement(hidden_layers,
+        #                                                  config['n_layers']):
+        #     architectures.append(list(combination))
+        # for combination in combinations_with_replacement(
+        #     reversed(hidden_layers), config['n_layers']
+        # ):
+        #     architectures.append(list(combination))
+        # return hp.choice('mid', architectures)
+        return np.array([np.random.choice([8, 16, 32, 64, 128]) for i in range(config['n_layers'])])
+    else:
+        # hidden_layers = [512, 1024, 2048, 4096]
+        # for combination in combinations_with_replacement(hidden_layers,
+        #                                                  config['n_layers']):
+        #     architectures.append(list(combination))
+        # for combination in combinations_with_replacement(
+        #     reversed(hidden_layers), config['n_layers']
+        # ):
+        #     architectures.append(list(combination))
+        # return hp.choice('mid', architectures)
+        return np.array([np.random.choice([512, 1024, 2048, 4096]) for i in range(config['n_layers'])])
+
+
+def fresh_start(params, data, testset):
+    # device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    device = 'cpu'
+    # for use with a GPU
+    # if device == 'cuda':
+    #     torch.set_float32_matmul_precision('medium')
+    # print(f'device used={device}')
+    pin_memory = True if device == 'cuda' else False
+    print(f'pin_memory={pin_memory}')
+
+    # params['mid'] = architecture(params)
+
+    # Model
+    print('==> Building model..')
+    ##############################################################
+    # Encoder
+    ##############################################################
+    # load from checkpoint for prior trained net model
+    checkpoint = torch.load(params['net'])
+    net_dict = dict()
+    for key in list(checkpoint['state_dict'].keys()):
+        if 'net' in key:
+            net_key = key[4:]
+            net_dict[net_key] = checkpoint['state_dict'][key]
+
+    if params['convolution']:
+        print('-> running a convolutional NN')
+        net = ConvNN(dim=params['in_dim'], mid=params['mid'], kernel=3,
+                     n_layers=params['n_layers'], dropout_rate=0.1,
+                     n_epochs=params['num_epochs'], out_bias=True,
+                     n_classes=None)
+    elif not params['convolution']:
+        print('-> running a fully-connected NN')
+        net = LinearNN(dim=params['in_dim'], mid=params['mid'],
+                       n_layers=params['n_layers'], dropout_rate=1.,
+                       n_epochs=params['num_epochs'], mid_bias=True,
+                       out_bias=True, n_classes=None)
+
+    net = net.load_state_dict(net_dict)
+    net = net.to(device)
+    clf = nn.Linear(net.representation_dim, params['num_classes'])
+    print(f'net dimensions={net.representation_dim}')
+
+    ##############################################################
+    # Critic
+    ##############################################################
+    # projection head to reduce dimensionality for contrastive loss
+    proj_head = LinearCritic(latent_dim=net.representation_dim,
+                             projection_dim=params['projection_dim']).to(device)
+    # classifier for better decision boundaries
+    # latent_clf = nn.Linear(proj_head.projection_dim, num_classes).to(device)
+    # NTXentLoss on its own requires labels (all unique)
+    critic = NTXentLoss(temperature=params['temperature'],
+                        reducer=reducers.DoNothingReducer())
+    sub_batch_size = 64
+
+    # if device == 'cuda':
+    #     repr_dim = net.representation_dim
+    #     net = torch.nn.DataParallel(net)
+    #     net.representation_dim = repr_dim
+    #     cudnn.benchmark = True
+
+    # if args.resume:
+    #     # Load checkpoint.
+    #     print('==> Resuming from checkpoint..')
+    #     assert os.path.isdir('checkpoint'), \
+    #         'Error: no checkpoint directory found!'
+    #     resume_from = os.path.join('./checkpoint', args.resume)
+    #     checkpoint = torch.load(resume_from)
+    #     net.load_state_dict(checkpoint['net'])
+    #     critic.load_state_dict(checkpoint['critic'])
+
+    # make checkpoint directory
+    # ckpt_path = './checkpoint/'+args.filename+'/'
+    # if not os.path.isdir(ckpt_path):
+    #     os.mkdir(ckpt_path)
+
+    # if args.resume:
+    #     # the last version run
+    #     last_ver = glob.glob(ckpt_path+'lightning_logs/version_*/')[-1]
+    #     ckpt = ckpt_path + last_ver + glob.glob(last_ver+'checkpoints/*.ckpt')[-1]
+    # else:
+    #     ckpt = None
+
+    # save statistical data
+    # joblib.dump(trainset.mean, ckpt_path+args.filename+'-train_means.joblib')
+    # joblib.dump(trainset.std, ckpt_path+args.filename+'-train_stds.joblib')
+
+    testloader = torch.utils.data.DataLoader(testset,
+                                             batch_size=len(testset),
+                                             shuffle=False,
+                                             num_workers=0,
+                                             pin_memory=data.pin_memory)
+
+    if params['batch_size'] <= 1024:
+        lr = params['lr'] * (np.sqrt(params['batch_size']) / 256)
+    else:
+        lr = params['lr'] * (params['batch_size'] / 256)
+
+    lightning_model = LitSimCLR(clf, net, proj_head, critic,
+                                params['batch_size'],
+                                sub_batch_size, lr, params['momentum'],
+                                params['cosine_anneal'], params['num_epochs'],
+                                params['alpha'], params['num_classes'],
+                                params['test_freq'], testloader,
+                                params['convolution'],
+                                (params['beta1'], params['beta2']),
+                                params['weight_decay'])
+    # tb_logger = pl.loggers.TensorBoardLogger(save_dir=ckpt_path)
+    trainer = pl.Trainer(max_epochs=params['num_epochs'],
+                         #  default_root_dir=ckpt_path,
+                         check_val_every_n_epoch=params['test_freq'],
+                         #  profiler='simple',
+                         limit_train_batches=params['batches'],
+                         num_sanity_val_steps=0,
+                         enable_checkpointing=False,
+                         accelerator='cpu')
+    trainer.fit(model=lightning_model, datamodule=data)
+                # val_dataloaders=valloader)  # , ckpt_path=args.resume)
+    loss = trainer.callback_metrics['train_loss']
+    trainer.test(model=lightning_model,
+                 datamodule=data)
+                #  dataloaders=testloader)
+    accuracy = trainer.callback_metrics['test_bacc']
+
+    # loss function minimizes misclassification
+    # by maximizing metrics
+    results = {
+        # 'score': acc+(self.alpha*rec)+(self.beta*prec),
+        # 'loss': lightning_model.log['train_loss'][-1],
+        'loss': loss.item() + (1-accuracy),
+        'rep_loss': loss.item(),
+        # 'model': lightning_model,
+        'status': STATUS_OK,
+        'params': params,
+        'accuracy': accuracy.item(),
+        # 'precision': prec,
+        # 'recall': rec
+    }
+
+    return results
+
+
+class RadDataModule(pl.LightningDataModule):
+    def __init__(self, trainset, valset, testset, batch_size=512,
+                 num_workers=0, pin_memory=False):
+        super().__init__()
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
+        self.trainset = trainset
+        self.valset = valset
+        self.testset = testset
+
+    def train_dataloader(self):
+        return torch.utils.data.DataLoader(self.trainset,
+                                           batch_size=self.batch_size,
+                                           shuffle=True,
+                                           num_workers=self.num_workers,
+                                           pin_memory=self.pin_memory)
+
+    def val_dataloader(self):
+        return torch.utils.data.DataLoader(self.valset,
+                                           # only one batch for validation
+                                           batch_size=len(self.valset),
+                                           shuffle=False,
+                                           num_workers=0,
+                                           pin_memory=self.pin_memory)
+
+    def test_dataloader(self):
+        return torch.utils.data.DataLoader(self.testset,
+                                           # only one batch for testing
+                                           batch_size=len(self.testset),
+                                           shuffle=False,
+                                           num_workers=0,
+                                           pin_memory=self.pin_memory)
+
+
+def main():
+    torch.set_printoptions(profile='full')
+    # eval('setattr(torch.backends.cudnn, "benchmark", True)')
+    logging.basicConfig(filename='debug.log',
+                        filemode='a',
+                        level=logging.INFO)
+    args = parse_arguments()
+
+    # args.git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
+    # args.git_diff = subprocess.check_output(['git', 'diff'])
+
+    # set seed(s) for reproducibility
+    # torch.manual_seed(20230316)
+    # np.random.seed(20230316)
+
+    print('==> Preparing data..')
+    # print('min-max normalization? ', args.normalization)
+    trainset, valset, testset, ssmlset = get_datasets(args.dataset,
+                                                      args.dfpath,
+                                                      args.bfpath,
+                                                      args.valfpath,
+                                                      args.testfpath,
+                                                      args.normalization,
+                                                      args.accounting,
+                                                      args.augs)
+    print(f'ssml dataset={ssmlset}')
+
+    if ssmlset is not None:
+        full_trainset = torch.utils.data.ConcatDataset([trainset, ssmlset])
+    else:
+        full_trainset = trainset
+
+    # device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    device = 'cpu'
+    # for use with a GPU
+    # if device == 'cuda':
+    #     torch.set_float32_matmul_precision('medium')
+    # print(f'device used={device}')
+    pin_memory = True if device == 'cuda' else False
+    print(f'pin_memory={pin_memory}')
+
+    dataset = RadDataModule(full_trainset, valset, testset, args.batch_size,
+                            args.num_workers, pin_memory)
+
+    # n_layers = scope.int(hp.uniformint('n_layers', 1, 7))
+    # convolution = hp.choice('convolution', [0, 1])
+    space = {
+        'lr': hp.uniform('lr', 1e-5, 0.5),
+        'n_layers': scope.int(hp.uniformint('n_layers', 1, 4)),
+        'projection_dim': scope.int(hp.uniformint('projection_dim', 8, 1024)),
+        # 'mid': hp.choice('mid', architecture({'n_layers': n_layers,
+        #                                       'convolution': convolution})),
+        'temperature': hp.uniform('temperature', 0.1, 0.9),
+        'momentum': hp.uniform('momentum', 0.5, 0.99),
+        'beta1': hp.uniform('beta1', 0.7, 0.99),
+        'beta2': hp.uniform('beta2', 0.8, 0.999),
+        'weight_decay': hp.uniform('weight_decay', 1e-7, 1e-2),
+        # 'batch_size': tune.choice([128, 256, 512, 1024, 2048, 4096]),#, 8192]),
+        'batch_size': args.batch_size,
+        'batches': args.batches,
+        'cosine_anneal': True,
+        'alpha': 1.,
+        'num_classes': 2,
+        'num_epochs': args.num_epochs,
+        'test_freq': args.test_freq,
+        # with specified net architecture
+        'in_dim': 1000,
+        'mid': args.mid,
+        'net_layers': args.n_layers,
+        'convolution': args.convolution,
+        'net': args.net
+    }
+
+    # if args.checkpoint is not None:
+    #     checkpoint = joblib.load(args.checkpoint)
+    #     space['start_from_checkpoint']: put(checkpoint)
+
+    trials = run_hyperopt(space, fresh_start, dataset, testset,
+                          max_evals=args.max_evals, verbose=True,
+                          trials=args.trials)
+    # joblib.dump(best, 'best_model.joblib')
+    joblib.dump(trials, args.filename+'_trials.joblib')
+
+
+if __name__ == "__main__":
+    main()
diff --git a/RadClass/models/SSL/SSLHyperOpt.py b/RadClass/models/SSL/SSLHyperOpt.py
new file mode 100644
index 0000000..3f02198
--- /dev/null
+++ b/RadClass/models/SSL/SSLHyperOpt.py
@@ -0,0 +1,427 @@
+import argparse
+import os
+import subprocess
+import glob
+import time
+from itertools import combinations_with_replacement
+
+import torch
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+import lightning.pytorch as pl
+# from torchlars import LARS
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/scripts/')
+# sys.path.append(os.getcwd()+'/models/PyTorch/')
+# sys.path.append(os.getcwd()+'/models/SSL/')
+
+from ...scripts.utils import run_hyperopt
+from ...scripts.configs import get_datasets
+from ..PyTorch.critic import LinearCritic
+from ..PyTorch.lightModel import LitSimCLR
+from ...scripts.evaluate import save_checkpoint, encode_train_set, train_clf, test
+# from models import *
+from ...scripts.scheduler import CosineAnnealingWithLinearRampLR
+from ..PyTorch.ann import LinearNN, ConvNN
+
+from pytorch_metric_learning.losses import SelfSupervisedLoss, NTXentLoss
+from pytorch_metric_learning import losses, reducers
+from pytorch_metric_learning.utils import loss_and_miner_utils as lmu
+
+# hyperopt
+from hyperopt.pyll.base import scope
+from hyperopt import hp
+from hyperopt import STATUS_OK
+
+import numpy as np
+import joblib
+
+import logging
+
+# needed for lightning's distributed package
+# os.environ["PL_TORCH_DISTRIBUTED_BACKEND"] = "gloo"
+# torch.distributed.init_process_group("gloo")
+
+'''
+Author: Jordan Stomps
+
+Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+More information found here: https://github.com/ae-foster/pytorch-simclr
+
+MIT License
+
+Copyright (c) 2023 Jordan Stomps
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+'''
+
+'''Train an encoder using Contrastive Learning.'''
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='PyTorch'
+                                                 'Contrastive Learning.')
+    parser.add_argument('--base-lr', default=0.25, type=float,
+                        help='base learning rate, rescaled by batch_size/256')
+    parser.add_argument("--momentum", default=0.9, type=float,
+                        help='SGD momentum')
+    parser.add_argument('--resume', '-r', type=str, default=None,
+                        help='resume from checkpoint with this filename')
+    parser.add_argument('--checkpoint', type=str, default=None,
+                        help='filename to checkpoint for resuming raytune')
+    parser.add_argument('--dataset', '-d', type=str, default='minos',
+                        help='dataset keyword',
+                        choices=['minos', 'minos-ssml', 'minos-transfer-ssml',
+                                 'minos-curated', 'minos-2019',
+                                 'minos-2019-binary'])
+    parser.add_argument('--dfpath', '-p', type=str,
+                        help='filepath for dataset')
+    parser.add_argument('--valfpath', '-v', type=str,
+                        help='filepath for validation dataset')
+    parser.add_argument('--testfpath', '-t', type=str,
+                        help='filepath for test dataset')
+    parser.add_argument('--bfpath', '-f', type=str,
+                        help='filepath for background library augmentations')
+    parser.add_argument('--temperature', type=float, default=0.5,
+                        help='InfoNCE temperature')
+    parser.add_argument("--batch-size", type=int, default=512,
+                        help='Training batch size')
+    parser.add_argument("--num-epochs", type=int, default=100,
+                        help='Number of training epochs')
+    parser.add_argument("--max-evals", type=int, default=50,
+                        help='Number of raytune iterations')
+    parser.add_argument("--batches", type=float, default=0.75,
+                        help='Maximum number or percent of batches per epoch.')
+    parser.add_argument("--cosine-anneal", action='store_true',
+                        help="Use cosine annealing on the learning rate")
+    parser.add_argument("--normalization", action='store_true',
+                        help='Use normalization instead of'
+                             'standardization in pre-processing.')
+    parser.add_argument("--accounting", action='store_true',
+                        help='Remove estimated background before'
+                             'returning spectra in training.')
+    parser.add_argument("--convolution", action="store_true",
+                        help="Create a CNN rather than FCNN.")
+    parser.add_argument("--arch", type=str, default='minos',
+                        help='Encoder architecture',
+                        choices=['minos', 'minos-ssml', 'minos-transfer-ssml',
+                                 'minos-curated', 'minos-2019',
+                                 'minos-2019-binary'])
+    parser.add_argument("--num-workers", type=int, default=2,
+                        help='Number of threads for data loaders')
+    parser.add_argument("--test-freq", type=int, default=10,
+                        help='Frequency to fit a clf with L-BFGS for testing'
+                             'Not appropriate for large datasets.'
+                             'Set 0 to avoid classifier only training here.')
+    parser.add_argument("--filename", type=str, default='ckpt',
+                        help='Output file name')
+    parser.add_argument('--in-dim', '-i', type=int,
+                        help='number of input image dimensions')
+    parser.add_argument('--mid', '-m', type=int, nargs='+',
+                        help='hidden layer size')
+    parser.add_argument('--n-layers', '-n', type=int,
+                        help='number of hidden layers')
+    parser.add_argument('--n-classes', '-c', type=int, default=7,
+                        help='number of classes/labels in projection head')
+    parser.add_argument('--alpha', '-a', type=float, default=1.,
+                        help='weight for semi-supervised contrastive loss')
+    parser.add_argument('--augs', '-u', type=str, nargs='+', default=None,
+                        help='list of augmentations to be applied in SSL')
+    parser.add_argument('--trials', type=str, default=None,
+                        help='filename for pre-existing Trials object')
+
+    args = parser.parse_args()
+    return args
+
+
+def architecture(config):
+    # architectures = []
+    if config['convolution']:
+        # hidden_layers = [8, 16, 32, 64, 128]
+        # for combination in combinations_with_replacement(hidden_layers,
+        #                                                  config['n_layers']):
+        #     architectures.append(list(combination))
+        # for combination in combinations_with_replacement(
+        #     reversed(hidden_layers), config['n_layers']
+        # ):
+        #     architectures.append(list(combination))
+        # return hp.choice('mid', architectures)
+        return np.array([np.random.choice([8, 16, 32, 64, 128]) for i in range(config['n_layers'])])
+    else:
+        # hidden_layers = [512, 1024, 2048, 4096]
+        # for combination in combinations_with_replacement(hidden_layers,
+        #                                                  config['n_layers']):
+        #     architectures.append(list(combination))
+        # for combination in combinations_with_replacement(
+        #     reversed(hidden_layers), config['n_layers']
+        # ):
+        #     architectures.append(list(combination))
+        # return hp.choice('mid', architectures)
+        return np.array([np.random.choice([512, 1024, 2048, 4096]) for i in range(config['n_layers'])])
+
+
+def fresh_start(params, data, testset):
+    # device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    device = 'cpu'
+    # for use with a GPU
+    # if device == 'cuda':
+    #     torch.set_float32_matmul_precision('medium')
+    # print(f'device used={device}')
+    pin_memory = True if device == 'cuda' else False
+    print(f'pin_memory={pin_memory}')
+
+    params['mid'] = architecture(params)
+
+    # Model
+    print('==> Building model..')
+    ##############################################################
+    # Encoder
+    ##############################################################
+    if params['convolution']:
+        print('-> running a convolutional NN')
+        net = ConvNN(dim=params['in_dim'], mid=params['mid'], kernel=3,
+                     n_layers=params['n_layers'], dropout_rate=0.1,
+                     n_epochs=params['num_epochs'], out_bias=True,
+                     n_classes=None)
+    elif not params['convolution']:
+        print('-> running a fully-connected NN')
+        net = LinearNN(dim=params['in_dim'], mid=params['mid'],
+                       n_layers=params['n_layers'], dropout_rate=1.,
+                       n_epochs=params['num_epochs'], mid_bias=True,
+                       out_bias=True, n_classes=None)
+    net = net.to(device)
+    clf = nn.Linear(net.representation_dim, params['num_classes'])
+    print(f'net dimensions={net.representation_dim}')
+
+    ##############################################################
+    # Critic
+    ##############################################################
+    # projection head to reduce dimensionality for contrastive loss
+    proj_head = LinearCritic(latent_dim=net.representation_dim).to(device)
+    # classifier for better decision boundaries
+    # latent_clf = nn.Linear(proj_head.projection_dim, num_classes).to(device)
+    # NTXentLoss on its own requires labels (all unique)
+    critic = NTXentLoss(temperature=params['temperature'],
+                        reducer=reducers.DoNothingReducer())
+    sub_batch_size = 64
+
+    # if device == 'cuda':
+    #     repr_dim = net.representation_dim
+    #     net = torch.nn.DataParallel(net)
+    #     net.representation_dim = repr_dim
+    #     cudnn.benchmark = True
+
+    # if args.resume:
+    #     # Load checkpoint.
+    #     print('==> Resuming from checkpoint..')
+    #     assert os.path.isdir('checkpoint'), \
+    #         'Error: no checkpoint directory found!'
+    #     resume_from = os.path.join('./checkpoint', args.resume)
+    #     checkpoint = torch.load(resume_from)
+    #     net.load_state_dict(checkpoint['net'])
+    #     critic.load_state_dict(checkpoint['critic'])
+
+    # make checkpoint directory
+    # ckpt_path = './checkpoint/'+args.filename+'/'
+    # if not os.path.isdir(ckpt_path):
+    #     os.mkdir(ckpt_path)
+
+    # if args.resume:
+    #     # the last version run
+    #     last_ver = glob.glob(ckpt_path+'lightning_logs/version_*/')[-1]
+    #     ckpt = ckpt_path + last_ver + glob.glob(last_ver+'checkpoints/*.ckpt')[-1]
+    # else:
+    #     ckpt = None
+
+    # save statistical data
+    # joblib.dump(trainset.mean, ckpt_path+args.filename+'-train_means.joblib')
+    # joblib.dump(trainset.std, ckpt_path+args.filename+'-train_stds.joblib')
+
+    testloader = torch.utils.data.DataLoader(testset,
+                                             batch_size=len(testset),
+                                             shuffle=False,
+                                             num_workers=0,
+                                             pin_memory=data.pin_memory)
+
+    if params['batch_size'] <= 1024:
+        lr = params['lr'] * (np.sqrt(params['batch_size']) / 256)
+    else:
+        lr = params['lr'] * (params['batch_size'] / 256)
+
+    lightning_model = LitSimCLR(clf, net, proj_head, critic,
+                                params['batch_size'],
+                                sub_batch_size, lr, params['momentum'],
+                                params['cosine_anneal'], params['num_epochs'],
+                                params['alpha'], params['num_classes'],
+                                params['test_freq'], testloader,
+                                params['convolution'],
+                                (params['beta1'], params['beta2']),
+                                params['weight_decay'])
+    # tb_logger = pl.loggers.TensorBoardLogger(save_dir=ckpt_path)
+    trainer = pl.Trainer(max_epochs=params['num_epochs'],
+                         #  default_root_dir=ckpt_path,
+                         check_val_every_n_epoch=params['test_freq'],
+                         #  profiler='simple',
+                         limit_train_batches=params['batches'],
+                         num_sanity_val_steps=0,
+                         enable_checkpointing=False,
+                         accelerator='cpu')
+    trainer.fit(model=lightning_model, datamodule=data)
+                # val_dataloaders=valloader)  # , ckpt_path=args.resume)
+    loss = trainer.callback_metrics['train_loss']
+    trainer.test(model=lightning_model,
+                 datamodule=data)
+                #  dataloaders=testloader)
+    accuracy = trainer.callback_metrics['test_bacc']
+
+    # loss function minimizes misclassification
+    # by maximizing metrics
+    results = {
+        # 'score': acc+(self.alpha*rec)+(self.beta*prec),
+        # 'loss': lightning_model.log['train_loss'][-1],
+        'loss': loss.item(),
+        # 'model': lightning_model,
+        'status': STATUS_OK,
+        'params': params,
+        'accuracy': accuracy.item(),
+        # 'precision': prec,
+        # 'recall': rec
+    }
+
+    return results
+
+
+class RadDataModule(pl.LightningDataModule):
+    def __init__(self, trainset, valset, testset, batch_size=512,
+                 num_workers=0, pin_memory=False):
+        super().__init__()
+        self.batch_size = batch_size
+        self.num_workers = num_workers
+        self.pin_memory = pin_memory
+        self.trainset = trainset
+        self.valset = valset
+        self.testset = testset
+
+    def train_dataloader(self):
+        return torch.utils.data.DataLoader(self.trainset,
+                                           batch_size=self.batch_size,
+                                           shuffle=True,
+                                           num_workers=self.num_workers,
+                                           pin_memory=self.pin_memory)
+
+    def val_dataloader(self):
+        return torch.utils.data.DataLoader(self.valset,
+                                           # only one batch for validation
+                                           batch_size=len(self.valset),
+                                           shuffle=False,
+                                           num_workers=0,
+                                           pin_memory=self.pin_memory)
+
+    def test_dataloader(self):
+        return torch.utils.data.DataLoader(self.testset,
+                                           # only one batch for testing
+                                           batch_size=len(self.testset),
+                                           shuffle=False,
+                                           num_workers=0,
+                                           pin_memory=self.pin_memory)
+
+
+def main():
+    torch.set_printoptions(profile='full')
+    # eval('setattr(torch.backends.cudnn, "benchmark", True)')
+    logging.basicConfig(filename='debug.log',
+                        filemode='a',
+                        level=logging.INFO)
+    args = parse_arguments()
+
+    # args.git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
+    # args.git_diff = subprocess.check_output(['git', 'diff'])
+
+    # set seed(s) for reproducibility
+    # torch.manual_seed(20230316)
+    # np.random.seed(20230316)
+
+    print('==> Preparing data..')
+    # print('min-max normalization? ', args.normalization)
+    trainset, valset, testset, ssmlset = get_datasets(args.dataset,
+                                                      args.dfpath,
+                                                      args.bfpath,
+                                                      args.valfpath,
+                                                      args.testfpath,
+                                                      args.normalization,
+                                                      args.accounting,
+                                                      args.augs)
+    print(f'ssml dataset={ssmlset}')
+
+    if ssmlset is not None:
+        full_trainset = torch.utils.data.ConcatDataset([trainset, ssmlset])
+    else:
+        full_trainset = trainset
+
+    # device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    device = 'cpu'
+    # for use with a GPU
+    # if device == 'cuda':
+    #     torch.set_float32_matmul_precision('medium')
+    # print(f'device used={device}')
+    pin_memory = True if device == 'cuda' else False
+    print(f'pin_memory={pin_memory}')
+
+    dataset = RadDataModule(full_trainset, valset, testset, args.batch_size,
+                            args.num_workers, pin_memory)
+
+    # n_layers = scope.int(hp.uniformint('n_layers', 1, 7))
+    # convolution = hp.choice('convolution', [0, 1])
+    space = {
+        'lr': hp.uniform('lr', 1e-5, 0.5),
+        'n_layers': scope.int(hp.uniformint('n_layers', 1, 7)),
+        'convolution': hp.choice('convolution', [0, 1]),
+        # 'mid': hp.choice('mid', architecture({'n_layers': n_layers,
+        #                                       'convolution': convolution})),
+        'temperature': hp.uniform('temperature', 0.1, 0.9),
+        'momentum': hp.uniform('momentum', 0.5, 0.99),
+        'beta1': hp.uniform('beta1', 0.7, 0.99),
+        'beta2': hp.uniform('beta2', 0.8, 0.999),
+        'weight_decay': hp.uniform('weight_decay', 1e-7, 1e-2),
+        # 'batch_size': tune.choice([128, 256, 512, 1024, 2048, 4096]),#, 8192]),
+        'batch_size': args.batch_size,
+        'batches': args.batches,
+        'cosine_anneal': True,
+        'alpha': 1.,
+        'num_classes': 2,
+        'num_epochs': args.num_epochs,
+        'test_freq': args.test_freq,
+        'in_dim': 1000
+    }
+
+    # if args.checkpoint is not None:
+    #     checkpoint = joblib.load(args.checkpoint)
+    #     space['start_from_checkpoint']: put(checkpoint)
+
+    trials = run_hyperopt(space, fresh_start, dataset, testset,
+                          max_evals=args.max_evals, verbose=True,
+                          trials=args.trials)
+    # joblib.dump(best, 'best_model.joblib')
+    joblib.dump(trials, args.filename+'_trials.joblib')
+
+
+if __name__ == "__main__":
+    main()
diff --git a/RadClass/models/SSL/SlimCLR.py b/RadClass/models/SSL/SlimCLR.py
new file mode 100644
index 0000000..053e0a2
--- /dev/null
+++ b/RadClass/models/SSL/SlimCLR.py
@@ -0,0 +1,320 @@
+import argparse
+import os
+import subprocess
+
+import torch
+import torch.backends.cudnn as cudnn
+import torch.optim as optim
+# from torchlars import LARS
+from tqdm import tqdm
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/scripts/')
+# sys.path.append(os.getcwd()+'/models/PyTorch/')
+# sys.path.append(os.getcwd()+'/models/SSL/')
+
+from ...scripts.configs import get_datasets
+from ..PyTorch.critic import LinearCritic
+from ...scripts.evaluate import save_checkpoint, encode_train_set, train_clf, test
+# from models import *
+from ...scripts.scheduler import CosineAnnealingWithLinearRampLR
+from ..PyTorch.ann import LinearNN
+
+from pytorch_metric_learning.losses import SelfSupervisedLoss, NTXentLoss
+from pytorch_metric_learning import losses, reducers
+from pytorch_metric_learning.utils import loss_and_miner_utils as lmu
+
+import numpy as np
+import joblib
+
+import logging
+
+'''
+Author: Jordan Stomps
+
+Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+More information found here: https://github.com/ae-foster/pytorch-simclr
+
+MIT License
+
+Copyright (c) 2023 Jordan Stomps
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+'''
+
+'''Train an encoder using Contrastive Learning.'''
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='PyTorch'
+                                                 'Contrastive Learning.')
+    parser.add_argument('--base-lr', default=0.25, type=float,
+                        help='base learning rate, rescaled by batch_size/256')
+    parser.add_argument("--momentum", default=0.9, type=float,
+                        help='SGD momentum')
+    parser.add_argument('--resume', '-r', type=str, default='',
+                        help='resume from checkpoint with this filename')
+    parser.add_argument('--dataset', '-d', type=str, default='minos',
+                        help='dataset keyword',
+                        choices=['minos', 'minos-curated', 'minos-2019',
+                                 'minos-2019-binary', 'cifar10', 'cifar100',
+                                 'stl10', 'imagenet'])
+    parser.add_argument('--dfpath', '-p', type=str,
+                        help='filepath for dataset')
+    parser.add_argument('--valfpath', '-v', type=str,
+                        help='filepath for validation dataset')
+    parser.add_argument('--testfpath', '-t', type=str,
+                        help='filepath for test dataset')
+    parser.add_argument('--bfpath', '-f', type=str,
+                        help='filepath for background library augmentations')
+    parser.add_argument('--temperature', type=float, default=0.5,
+                        help='InfoNCE temperature')
+    parser.add_argument("--batch-size", type=int, default=512,
+                        help='Training batch size')
+    parser.add_argument("--num-epochs", type=int, default=100,
+                        help='Number of training epochs')
+    parser.add_argument("--cosine-anneal", action='store_true',
+                        help="Use cosine annealing on the learning rate")
+    parser.add_argument("--arch", type=str, default='minos',
+                        help='Encoder architecture',
+                        choices=['minos', 'minos-curated', 'minos-2019',
+                                 'minos-2019-binary', 'resnet18',
+                                 'resnet34', 'resnet50'])
+    parser.add_argument("--num-workers", type=int, default=2,
+                        help='Number of threads for data loaders')
+    parser.add_argument("--test-freq", type=int, default=10,
+                        help='Frequency to fit a clf with L-BFGS for testing.'
+                             'Not appropriate for large datasets.'
+                             'Set 0 to avoid classifier only training here.')
+    parser.add_argument("--filename", type=str, default='ckpt',
+                        help='Output file name')
+    parser.add_argument('--in-dim', '-i', type=int,
+                        help='number of input image dimensions')
+    parser.add_argument('--mid', '-m', type=int, nargs='+',
+                        help='hidden layer size')
+    parser.add_argument('--n-layers', '-n', type=int,
+                        help='number of hidden layers')
+    parser.add_argument('--n-classes', '-c', type=int, default=7,
+                        help='number of classes/labels in projection head')
+
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    logging.basicConfig(filename='debug.log',
+                        filemode='a',
+                        level=logging.INFO)
+    args = parse_arguments()
+    args.lr = args.base_lr * (args.batch_size / 256)
+
+    args.git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
+    args.git_diff = subprocess.check_output(['git', 'diff'])
+
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    best_acc = 0  # best test accuracy
+    start_epoch = 0  # start from epoch 0 or last checkpoint epoch
+    clf = None
+
+    # set seed(s) for reproducibility
+    torch.manual_seed(20230316)
+    np.random.seed(20230316)
+
+    print('==> Preparing data..')
+    num_classes = args.n_classes
+    trainset, valset, testset = get_datasets(args.dataset, args.dfpath,
+                                             args.bfpath, args.valfpath,
+                                             args.testfpath)
+    joblib.dump(trainset.mean, args.filename+'-train_means.joblib')
+    joblib.dump(trainset.std, args.filename+'-train_stds.joblib')
+
+    pin_memory = True if device == 'cuda' else False
+    print(f'pin_memory={pin_memory}')
+
+    trainloader = torch.utils.data.DataLoader(trainset,
+                                              batch_size=args.batch_size,
+                                              shuffle=True,
+                                              num_workers=args.num_workers,
+                                              pin_memory=pin_memory)
+    valloader = torch.utils.data.DataLoader(valset,
+                                            batch_size=args.batch_size,
+                                            shuffle=True,
+                                            num_workers=args.num_workers,
+                                            pin_memory=pin_memory)
+    testloader = torch.utils.data.DataLoader(testset,
+                                             batch_size=args.batch_size,
+                                             shuffle=True,
+                                             num_workers=args.num_workers,
+                                             pin_memory=pin_memory)
+
+    # Model
+    print('==> Building model..')
+    ##############################################################
+    # Encoder
+    ##############################################################
+    if args.arch in ['minos', 'minos-curated',
+                     'minos-2019', 'minos-2019-binary']:
+        net = LinearNN(dim=args.in_dim, mid=args.mid,
+                       n_layers=args.n_layers, dropout_rate=1.,
+                       n_epochs=args.num_epochs, mid_bias=True,
+                       out_bias=True, n_classes=None)
+    else:
+        raise ValueError("Bad architecture specification")
+    net = net.to(device)
+    print(f'net dimensions={net.representation_dim}')
+
+    ##############################################################
+    # Critic
+    ##############################################################
+    # projection head to reduce dimensionality for contrastive loss
+    proj_head = LinearCritic(latent_dim=args.mid[-1]).to(device)
+    # classifier for better decision boundaries
+    # latent_clf = nn.Linear(proj_head.projection_dim, num_classes).to(device)
+    # NTXentLoss on its own requires labels (all unique)
+    critic = NTXentLoss(temperature=0.07, reducer=reducers.DoNothingReducer())
+    sub_batch_size = 64
+
+    if device == 'cuda':
+        repr_dim = net.representation_dim
+        net = torch.nn.DataParallel(net)
+        net.representation_dim = repr_dim
+        cudnn.benchmark = True
+
+    if args.resume:
+        # Load checkpoint.
+        print('==> Resuming from checkpoint..')
+        assert os.path.isdir('checkpoint'), 'Error: no chkpt directory found!'
+        resume_from = os.path.join('./checkpoint', args.resume)
+        checkpoint = torch.load(resume_from)
+        net.load_state_dict(checkpoint['net'])
+        critic.load_state_dict(checkpoint['critic'])
+        best_acc = checkpoint['acc']
+        start_epoch = checkpoint['epoch']
+
+    base_optimizer = optim.SGD(list(net.parameters())
+                               + list(proj_head.parameters())
+                               + list(critic.parameters()),
+                               # + list(latent_clf.parameters())
+                               lr=args.lr, weight_decay=1e-6,
+                               momentum=args.momentum)
+    if args.cosine_anneal:
+        scheduler = CosineAnnealingWithLinearRampLR(base_optimizer,
+                                                    args.num_epochs)
+    # encoder_optimizer = LARS(base_optimizer, trust_coef=1e-3)
+    encoder_optimizer = base_optimizer
+
+    # Training
+    def train(epoch):
+        print('\nEpoch: %d' % epoch)
+        net.train()
+        # critic.train()
+        critic.train()
+        train_loss = 0
+        t = tqdm(enumerate(trainloader), desc='Loss: **** ',
+                 total=len(trainloader), bar_format='{desc}{bar}{r_bar}')
+        for batch_idx, (inputs, _, _) in t:
+            x1, x2 = inputs
+            x1, x2 = x1.to(device), x2.to(device)
+            encoder_optimizer.zero_grad()
+            representation1, representation2 = net(x1), net(x2)
+            # projection head for contrastive loss
+            # optional: instead pass representations directly; benefit?
+            representation1 = proj_head.project(representation1)
+            representation2 = proj_head.project(representation2)
+            # labels1 = latent_clf(representation1)
+            # labels2 = latent_clf(representation2)
+
+            # each (x1i, x2i) is a positive pair
+            labels = torch.arange(representation1.shape[0])
+            # sub-batching to preserve memory
+            all_losses = []
+            for s in range(0, args.batch_size, sub_batch_size):
+                # embedding/representation subset
+                curr_emb = representation1[s:s+sub_batch_size]
+                curr_labels = labels[s:s+sub_batch_size]
+                # apply loss across all of the second representations
+                curr_loss = critic(curr_emb, curr_labels,
+                                   ref_emb=representation2, ref_labels=labels)
+                all_losses.append(curr_loss['loss']['losses'])
+                # ignore 0 loss when sub_batch is not full
+                all_losses = [loss for loss in all_losses
+                              if not isinstance(loss, int)]
+
+            # summarize loss and calculate gradient
+            all_losses = torch.cat(all_losses, dim=0)
+            loss = torch.mean(all_losses)
+            loss.backward()
+            encoder_optimizer.step()
+            train_loss += loss.item()
+            # free memory used by loss graph of this batch
+            del loss, all_losses, curr_loss
+            x1.detach(), x2.detach()
+
+            t.set_description('Loss: %.3f ' % (train_loss / (batch_idx + 1)))
+        return train_loss
+
+    bacc_curve = np.array([])
+    train_loss_curve = np.array([])
+    test_loss_curve = np.array([])
+    confmat_curve = np.array([])
+    with torch.profiler.profile(
+        schedule=torch.profiler.schedule(
+            wait=2,
+            warmup=2,
+            active=6,
+            repeat=1),
+        on_trace_ready=torch.profiler.tensorboard_trace_handler,
+        with_stack=True
+    ):  # as profiler:
+        for epoch in range(start_epoch, start_epoch + args.num_epochs):
+            train_loss = train(epoch)
+            train_loss_curve = np.append(train_loss_curve, train_loss)
+            if (args.test_freq > 0) and (epoch % args.test_freq
+                                         == (args.test_freq - 1)):
+                X, y = encode_train_set(valloader, device, net)
+                clf = train_clf(X, y, net.representation_dim,
+                                num_classes, device, reg_weight=1e-5)
+                acc, bacc, cmat, test_loss = test(testloader, device,
+                                                  net, clf, num_classes)
+                bacc_curve = np.append(bacc_curve, bacc)
+                test_loss_curve = np.append(test_loss_curve, test_loss)
+                confmat_curve = np.append(confmat_curve, cmat)
+                print(f'\t-> epoch {epoch} Balanced Accuracy = {bacc}')
+                print(f'\t-> with confusion matrix = {cmat}')
+                if acc > best_acc:
+                    best_acc = acc
+                save_checkpoint(net, clf, critic, epoch,
+                                args, os.path.basename(__file__))
+                results = {'bacc_curve': bacc_curve,
+                           'train_loss_curve': train_loss_curve,
+                           'test_loss_curve': test_loss_curve,
+                           'confmat_curve': confmat_curve}
+                joblib.dump(results,
+                            './checkpoint/'
+                            + args.filename+'-result_curves.joblib')
+            elif args.test_freq == 0:
+                save_checkpoint(net, clf, critic, epoch,
+                                args, os.path.basename(__file__))
+            if args.cosine_anneal:
+                scheduler.step()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/RadClass/models/SSL/SlimCLRLight.py b/RadClass/models/SSL/SlimCLRLight.py
new file mode 100644
index 0000000..4fadf68
--- /dev/null
+++ b/RadClass/models/SSL/SlimCLRLight.py
@@ -0,0 +1,292 @@
+import argparse
+import os
+import subprocess
+import glob
+
+import torch
+import torch.nn as nn
+import torch.backends.cudnn as cudnn
+import lightning.pytorch as pl
+# from torchlars import LARS
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/scripts/')
+# sys.path.append(os.getcwd()+'/models/PyTorch/')
+# sys.path.append(os.getcwd()+'/models/SSL/')
+
+from ...scripts.configs import get_datasets
+from ..PyTorch.critic import LinearCritic
+from ..PyTorch.lightModel import LitSimCLR
+from ...scripts.evaluate import save_checkpoint, encode_train_set, train_clf, test
+# from models import *
+from ...scripts.scheduler import CosineAnnealingWithLinearRampLR
+from ..PyTorch.ann import LinearNN, ConvNN
+
+from pytorch_metric_learning.losses import SelfSupervisedLoss, NTXentLoss
+from pytorch_metric_learning import losses, reducers
+from pytorch_metric_learning.utils import loss_and_miner_utils as lmu
+
+import numpy as np
+import joblib
+
+import logging
+
+# needed for lightning's distributed package
+# os.environ["PL_TORCH_DISTRIBUTED_BACKEND"] = "gloo"
+# torch.distributed.init_process_group("gloo")
+
+'''
+Author: Jordan Stomps
+
+Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+More information found here: https://github.com/ae-foster/pytorch-simclr
+
+MIT License
+
+Copyright (c) 2023 Jordan Stomps
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+'''
+
+'''Train an encoder using Contrastive Learning.'''
+
+
+def parse_arguments():
+    parser = argparse.ArgumentParser(description='PyTorch'
+                                                 'Contrastive Learning.')
+    parser.add_argument('--base-lr', default=0.25, type=float,
+                        help='base learning rate, rescaled by batch_size/256')
+    parser.add_argument("--momentum", default=0.9, type=float,
+                        help='SGD momentum')
+    parser.add_argument('--resume', '-r', type=str, default=None,
+                        help='resume from checkpoint with this filename')
+    parser.add_argument('--dataset', '-d', type=str, default='minos',
+                        help='dataset keyword',
+                        choices=['minos', 'minos-ssml', 'minos-transfer-ssml',
+                                 'minos-curated', 'minos-2019',
+                                 'minos-2019-binary'])
+    parser.add_argument('--dfpath', '-p', type=str,
+                        help='filepath for dataset')
+    parser.add_argument('--valfpath', '-v', type=str,
+                        help='filepath for validation dataset')
+    parser.add_argument('--testfpath', '-t', type=str,
+                        help='filepath for test dataset')
+    parser.add_argument('--bfpath', '-f', type=str,
+                        help='filepath for background library augmentations')
+    parser.add_argument('--temperature', type=float, default=0.5,
+                        help='InfoNCE temperature')
+    parser.add_argument("--batch-size", type=int, default=512,
+                        help='Training batch size')
+    parser.add_argument("--num-epochs", type=int, default=100,
+                        help='Number of training epochs')
+    parser.add_argument("--cosine-anneal", action='store_true',
+                        help="Use cosine annealing on the learning rate")
+    parser.add_argument("--normalization", action='store_true',
+                        help='Use normalization instead of'
+                             'standardization in pre-processing.')
+    parser.add_argument("--accounting", action='store_true',
+                        help='Remove estimated background before'
+                             'returning spectra in training.')
+    parser.add_argument("--convolution", action="store_true",
+                        help="Create a CNN rather than FCNN.")
+    parser.add_argument("--arch", type=str, default='minos',
+                        help='Encoder architecture',
+                        choices=['minos', 'minos-ssml', 'minos-transfer-ssml',
+                                 'minos-curated', 'minos-2019',
+                                 'minos-2019-binary'])
+    parser.add_argument("--num-workers", type=int, default=2,
+                        help='Number of threads for data loaders')
+    parser.add_argument("--test-freq", type=int, default=10,
+                        help='Frequency to fit a clf with L-BFGS for testing'
+                             'Not appropriate for large datasets.'
+                             'Set 0 to avoid classifier only training here.')
+    parser.add_argument("--filename", type=str, default='ckpt',
+                        help='Output file name')
+    parser.add_argument('--in-dim', '-i', type=int,
+                        help='number of input image dimensions')
+    parser.add_argument('--mid', '-m', type=int, nargs='+',
+                        help='hidden layer size')
+    parser.add_argument('--n-layers', '-n', type=int,
+                        help='number of hidden layers')
+    parser.add_argument('--n-classes', '-c', type=int, default=7,
+                        help='number of classes/labels in projection head')
+    parser.add_argument('--alpha', '-a', type=float, default=1.,
+                        help='weight for semi-supervised contrastive loss')
+    parser.add_argument('--beta1', type=float, default=0.8,
+                        help='first beta used by AdamW optimizer')
+    parser.add_argument('--beta2', type=float, default=0.99,
+                        help='second beta used by AdamW optimizer')
+    parser.add_argument('--weight-decay', type=float, default=1e-6,
+                        help='weight decay hyperparameter for AdamW optimizer')
+    parser.add_argument('--augs', '-u', type=str, nargs='+', default=None,
+                        help='list of augmentations to be applied in SSL')
+
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    torch.set_printoptions(profile='full')
+    logging.basicConfig(filename='debug.log',
+                        filemode='a',
+                        level=logging.INFO)
+    args = parse_arguments()
+    if args.batch_size <= 1024:
+        args.lr = args.base_lr * (np.sqrt(args.batch_size) / 256)
+    else:
+        args.lr = args.base_lr * (args.batch_size / 256)
+
+    args.git_hash = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
+    args.git_diff = subprocess.check_output(['git', 'diff'])
+
+    device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    # for use with a GPU
+    if device == 'cuda':
+        torch.set_float32_matmul_precision('medium')
+    print(f'device used={device}')
+
+    # set seed(s) for reproducibility
+    torch.manual_seed(20230316)
+    np.random.seed(20230316)
+
+    print('==> Preparing data..')
+    print('min-max normalization? ', args.normalization)
+    num_classes = args.n_classes
+    trainset, valset, testset, ssmlset = get_datasets(args.dataset,
+                                                      args.dfpath,
+                                                      args.bfpath,
+                                                      args.valfpath,
+                                                      args.testfpath,
+                                                      args.normalization,
+                                                      args.accounting,
+                                                      args.augs)
+    print(f'ssml dataset={ssmlset}')
+
+    pin_memory = True if device == 'cuda' else False
+    print(f'pin_memory={pin_memory}')
+
+    if ssmlset is not None:
+        full_trainset = torch.utils.data.ConcatDataset([trainset, ssmlset])
+    else:
+        full_trainset = trainset
+    trainloader = torch.utils.data.DataLoader(full_trainset,
+                                              batch_size=args.batch_size,
+                                              shuffle=True,
+                                              num_workers=args.num_workers,
+                                              pin_memory=pin_memory)
+    valloader = torch.utils.data.DataLoader(valset,
+                                            batch_size=args.batch_size,
+                                            shuffle=False,
+                                            # num_workers=args.num_workers,
+                                            num_workers=0,
+                                            pin_memory=pin_memory)
+    testloader = torch.utils.data.DataLoader(testset,
+                                             batch_size=args.batch_size,
+                                             shuffle=False,
+                                             #  num_workers=args.num_workers,
+                                             num_workers=0,
+                                             pin_memory=pin_memory)
+
+    # Model
+    print('==> Building model..')
+    ##############################################################
+    # Encoder
+    ##############################################################
+    if args.arch in ['minos', 'minos-ssml', 'minos-transfer-ssml',
+                     'minos-curated', 'minos-2019', 'minos-2019-binary']:
+        if args.convolution:
+            print('-> running a convolutional NN')
+            net = ConvNN(dim=args.in_dim, mid=args.mid, kernel=3,
+                         n_layers=args.n_layers, dropout_rate=0.1,
+                         n_epochs=args.num_epochs, out_bias=True,
+                         n_classes=None)
+        elif not args.convolution:
+            print('-> running a fully-connected NN')
+            net = LinearNN(dim=args.in_dim, mid=args.mid,
+                           n_layers=args.n_layers, dropout_rate=1.,
+                           n_epochs=args.num_epochs, mid_bias=True,
+                           out_bias=True, n_classes=None)
+    else:
+        raise ValueError("Bad architecture specification")
+    net = net.to(device)
+    clf = nn.Linear(net.representation_dim, args.n_classes)
+    print(f'net dimensions={net.representation_dim}')
+
+    ##############################################################
+    # Critic
+    ##############################################################
+    # projection head to reduce dimensionality for contrastive loss
+    proj_head = LinearCritic(latent_dim=net.representation_dim).to(device)
+    # classifier for better decision boundaries
+    # latent_clf = nn.Linear(proj_head.projection_dim, num_classes).to(device)
+    # NTXentLoss on its own requires labels (all unique)
+    critic = NTXentLoss(temperature=0.07, reducer=reducers.DoNothingReducer())
+    sub_batch_size = 64
+
+    if device == 'cuda':
+        repr_dim = net.representation_dim
+        net = torch.nn.DataParallel(net)
+        net.representation_dim = repr_dim
+        cudnn.benchmark = True
+
+    # if args.resume:
+    #     # Load checkpoint.
+    #     print('==> Resuming from checkpoint..')
+    #     assert os.path.isdir('checkpoint'), \
+    #         'Error: no checkpoint directory found!'
+    #     resume_from = os.path.join('./checkpoint', args.resume)
+    #     checkpoint = torch.load(resume_from)
+    #     net.load_state_dict(checkpoint['net'])
+    #     critic.load_state_dict(checkpoint['critic'])
+
+    # make checkpoint directory
+    ckpt_path = './checkpoint/'+args.filename+'/'
+    if not os.path.isdir(ckpt_path):
+        os.mkdir(ckpt_path)
+
+    # if args.resume:
+    #     # the last version run
+    #     last_ver = glob.glob(ckpt_path+'lightning_logs/version_*/')[-1]
+    #     ckpt = ckpt_path + last_ver + glob.glob(last_ver+'checkpoints/*.ckpt')[-1]
+    # else:
+    #     ckpt = None
+
+    # save statistical data
+    joblib.dump(trainset.mean, ckpt_path+args.filename+'-train_means.joblib')
+    joblib.dump(trainset.std, ckpt_path+args.filename+'-train_stds.joblib')
+
+    lightning_model = LitSimCLR(clf, net, proj_head, critic, args.batch_size,
+                                sub_batch_size, args.lr, args.momentum,
+                                args.cosine_anneal, args.num_epochs,
+                                args.alpha, num_classes, args.test_freq,
+                                testloader, args.convolution, (args.beta1, args.beta2), args.weight_decay)
+    tb_logger = pl.loggers.TensorBoardLogger(save_dir=ckpt_path)
+    trainer = pl.Trainer(max_epochs=args.num_epochs,
+                         default_root_dir=ckpt_path,
+                         check_val_every_n_epoch=args.test_freq,
+                         profiler='simple', limit_train_batches=0.002,
+                         logger=tb_logger, num_sanity_val_steps=0)
+    trainer.fit(model=lightning_model, train_dataloaders=trainloader,
+                val_dataloaders=valloader, ckpt_path=args.resume)
+    trainer.test(model=lightning_model, dataloaders=testloader)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/RadClass/models/SSL/__init__.py b/RadClass/models/SSL/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/RadClass/models/SSML/CoTraining.py b/RadClass/models/SSML/CoTraining.py
new file mode 100644
index 0000000..c23ffed
--- /dev/null
+++ b/RadClass/models/SSML/CoTraining.py
@@ -0,0 +1,344 @@
+import numpy as np
+import matplotlib.pyplot as plt
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# sklearn models
+from sklearn import linear_model
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score
+from scripts.utils import run_hyperopt
+import joblib
+
+
+class CoTraining:
+    '''
+    Methods for deploying a basic co-training with logistic
+    regression implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys max_iter, tol, and C supported.
+    alpha: float; weight for encouraging high recall
+    beta: float; weight for encouraging high precision
+    NOTE: if alpha=beta=0, default to favoring balanced accuracy.
+    random_state: int/float for reproducible intiailization.
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, alpha=0, beta=0, random_state=0):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # dictionary of parameters for logistic regression model
+        self.params = params
+        self.alpha, self.beta = alpha, beta
+        if self.params is None:
+            self.model1 = linear_model.LogisticRegression(
+                            random_state=self.random_state)
+            self.model2 = linear_model.LogisticRegression(
+                            random_state=self.random_state)
+            # default needed for training
+            self.params = {'n_samples': 1}
+        else:
+            self.model1 = linear_model.LogisticRegression(
+                            random_state=self.random_state,
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            C=params['C']
+                        )
+            self.model2 = linear_model.LogisticRegression(
+                            random_state=self.random_state,
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            C=params['C']
+                        )
+
+    def training_loop(self, slr1, slr2, L_lr1, L_lr2,
+                      Ly_lr1, Ly_lr2, U_lr, n_samples,
+                      testx=None, testy=None):
+        '''
+        Main training iteration for co-training.
+        Given two models, labeled training data, and unlabeled training data:
+        - Train both models using their respective labeled datasets
+        - Randomly sample n_samples number of unlabeled
+            instances for model 1 and 2 each.
+        - Label the sampled unlabeled instances using
+            model 1 (u1) and model 2 (u2).
+        - Remove u1 and u2 from the unlabeled dataset and
+            include in each model's respective labeled dataset
+            with their associated labels for future training.
+        Inputs:
+        slr1: logistic regression co-training model #1
+        slr2: logistic regression co-training model #2
+        L_lr1: feature training data for co-training model #1
+        L_lr2: feature training data for co-training model #2
+        Ly_lr1: labels for input data for co-training model #1
+        Ly_lr2: labels for input data for co-training model #2
+        U_lr: unlabeled feature training data used by both models
+        n_samples: the number of instances to sample and
+            predict from Ux at one time
+        testx: feature vector/matrix used for testing the performance
+            of each model at every iteration.
+        testy: label vector used for testing the performance
+            of each model at every iteration.
+        '''
+
+        model1_accs, model2_accs = np.array([]), np.array([])
+        # should stay false but if true,
+        # the same unalbeled instance could be sampled multiple times
+        rep = False
+        while U_lr.shape[0] > 1:
+            slr1.fit(L_lr1, Ly_lr1)
+            slr2.fit(L_lr2, Ly_lr2)
+
+            # pull u1
+            # ensuring there is enough instances to sample for each model
+            if U_lr.shape[0] < n_samples*2:
+                n_samples = int(U_lr.shape[0]/2)
+            uidx1 = np.random.choice(range(U_lr.shape[0]),
+                                     n_samples,
+                                     replace=rep)
+            u1 = U_lr[uidx1].copy()
+            # remove instances that will be labeled
+            U_lr = np.delete(U_lr, uidx1, axis=0)
+
+            # pull u2
+            uidx2 = np.random.choice(range(U_lr.shape[0]),
+                                     n_samples,
+                                     replace=rep)
+            u2 = U_lr[uidx2].copy()
+            # remove instances that will be labeled
+            U_lr = np.delete(U_lr, uidx2, axis=0)
+
+            # predict unlabeled samples
+            u1y = slr1.predict(u1)
+            u2y = slr2.predict(u2)
+
+            if testx is not None and testy is not None:
+                # test and save model(s) accuracy over all training iterations
+                model1_accs = np.append(model1_accs,
+                                        balanced_accuracy_score(testy,
+                                                                slr1.predict(
+                                                                    testx)))
+                model2_accs = np.append(model2_accs,
+                                        balanced_accuracy_score(testy,
+                                                                slr2.predict(
+                                                                    testx)))
+
+            # add predictions to cotrained model(s) labeled samples
+            L_lr1 = np.append(L_lr1, u2, axis=0)
+            L_lr2 = np.append(L_lr2, u1, axis=0)
+            Ly_lr1 = np.append(Ly_lr1, u2y, axis=0)
+            Ly_lr2 = np.append(Ly_lr2, u1y, axis=0)
+
+        return slr1, slr2, model1_accs, model2_accs
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh co-training model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys n_samples, max_iter, tol, and C supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+        # unlabeled co-training data
+        Ux = data_dict['Ux']
+
+        clf = CoTraining(params=params, random_state=self.random_state)
+        # training and testing
+        model1_accs, model2_accs = clf.train(trainx, trainy, Ux, testx, testy)
+        # uses balanced_accuracy accounts for class imbalanced data
+        pred1, acc, pred2, model1_acc, model2_acc = clf.predict(testx, testy)
+        rec1, rec2 = recall_score(testy, pred1), recall_score(testy, pred2)
+        prec1, prec2 = precision_score(testy, pred1), precision_score(testy, pred2)
+
+        # loss function minimizes misclassification
+        # by maximizing metrics
+        return {'score': acc+(self.alpha*max(rec1, rec2))+(self.beta*max(prec1, prec2)),
+                'loss': (1-acc) + self.alpha*(1-max(rec1, rec2))+self.beta*(1-max(prec1, prec2)),
+                'model': clf,
+                'params': params,
+                'model': clf.model1,
+                'model2': clf.model2,
+                'model1_acc_history': model1_accs,
+                'model2_acc_history': model2_accs,
+                'accuracy': acc,
+                'precision': prec1,
+                'recall': rec1,
+                'precision2': prec2,
+                'recall2': rec2,}
+
+    def optimize(self, space, data_dict, max_evals=50, njobs=4, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a raytune compliant dictionary with defined optimization
+            spaces. For example:
+                space = {'max_iter' : tune.qrandint(10, 10000, 10),
+                        'tol'       : tune.loguniform(1e-5, 1e-3),
+                        'C'         : tune.uniform(1.0, 1000.0),
+                        'n_samples' : tune.qrandint(1, 20, 1)
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        njobs: (int) number of hyperparameter training iterations to complete
+            in parallel. Default is 4, but personal computing resources may
+            require less or allow more.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   njobs=njobs,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy, Ux,
+              testx=None, testy=None):
+        '''
+        Wrapper method for a basic co-training with logistic regression
+        implementation training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        Ux: feature vector/matrix like labeled trainx but unlabeled data.
+        testx: feature vector/matrix used for testing the performance
+            of each model at every iteration.
+        testy: label vector used for testing the performance
+            of each model at every iteration.
+        '''
+
+        # avoid overwriting when deleting in co-training loop
+        U_lr = Ux.copy()
+
+        # set the random seed of training splits for reproducibility
+        # This can be ignored by excluding params['seed']
+        # in the hyperopt space dictionary
+        if 'seed' in self.params.keys():
+            np.random.seed(self.params['seed'])
+
+        # TODO: allow a user to specify uneven splits between the two models
+        split_frac = 0.5
+        # labeled training data
+        idx = np.random.choice(range(trainy.shape[0]),
+                               size=int(split_frac * trainy.shape[0]),
+                               replace=False)
+
+        # avoid overwriting when deleting in co-training loop
+        L_lr1 = trainx[idx].copy()
+        L_lr2 = trainx[~idx].copy()
+        Ly_lr1 = trainy[idx].copy()
+        Ly_lr2 = trainy[~idx].copy()
+
+        self.model1, self.model2, model1_accs, model2_accs = \
+            self.training_loop(
+                                self.model1, self.model2,
+                                L_lr1, L_lr2,
+                                Ly_lr1, Ly_lr2,
+                                U_lr, self.params['n_samples'],
+                                testx, testy,
+                                )
+
+        # optional returns if a user is interested in training diagnostics
+        return model1_accs, model2_accs
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for sklearn's Label Propagation predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        pred1 = self.model1.predict(testx)
+        pred2 = self.model2.predict(testx)
+
+        acc = None
+        if testy is not None:
+            # balanced_accuracy accounts for class imbalanced data
+            # could alternatively use pure accuracy
+            # for a more traditional hyperopt
+            model1_acc = balanced_accuracy_score(testy, pred1)
+            model2_acc = balanced_accuracy_score(testy, pred2)
+            # select best accuracy for hyperparameter optimization
+            acc = max(model1_acc, model2_acc)
+
+        return pred1, acc, pred2, model1_acc, model2_acc
+
+    def plot_cotraining(self, model1_accs=None, model2_accs=None,
+                        filename='lr-cotraining-learningcurves.png'):
+        '''
+        Plots the training error curves for two co-training models.
+        NOTE: The user must provide the curves to plot, but each curve is
+            saved by the class under self.best and self.worst models.
+        Inputs:
+        filename: name to store picture under.
+            Must end in .png (or will be added if missing).
+        model1_accs: the accuracy scores over training epochs for model 1
+        model2_accs: the accuracy scores over training epochs for model 2
+        '''
+
+        fig, ax = plt.subplots(figsize=(10, 8), dpi=300)
+        ax.plot(np.arange(len(model1_accs)), model1_accs,
+                color='tab:blue', label='Model 1')
+        ax.plot(np.arange(len(model2_accs)), model2_accs,
+                color='tab:orange', label='Model 2')
+        ax.legend()
+        ax.set_xlabel('Co-Training Iteration')
+        ax.set_ylabel('Test Accuracy')
+        ax.grid()
+
+        if filename[-4:] != '.png':
+            filename += '.png'
+        fig.savefig(filename)
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/RadClass/models/SSML/LabelProp.py b/RadClass/models/SSML/LabelProp.py
new file mode 100644
index 0000000..47f1b18
--- /dev/null
+++ b/RadClass/models/SSML/LabelProp.py
@@ -0,0 +1,192 @@
+import numpy as np
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# sklearn models
+from sklearn import semi_supervised
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score
+from scripts.utils import run_hyperopt
+import joblib
+
+
+class LabelProp:
+    '''
+    Methods for deploying sklearn's Label Propagation
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    NOTE: Since LabelProp is guaranteed to converge given
+        enough iterations, there is no random_state defined.
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys gamma, n_neighbors, max_iter, and tol supported.
+    alpha: float; weight for encouraging high recall
+    beta: float; weight for encouraging high precision
+    NOTE: if alpha=beta=0, default to favoring balanced accuracy.
+    random_state: int/float for reproducible intiailization.
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, alpha=0, beta=0, random_state=0):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # dictionary of parameters for Label Propagation model
+        self.alpha, self.beta = alpha, beta
+        self.params = params
+        if self.params is None:
+            # defaults:
+            # knn kernel, although an rbf is equally valid
+            # TODO: allow rbf kernels
+            # n_jobs, use parallelization if available.
+            self.model = semi_supervised.LabelPropagation(
+                            kernel='knn',
+                            n_jobs=-1
+                        )
+        else:
+            self.model = semi_supervised.LabelPropagation(
+                            kernel='knn',
+                            gamma=params['gamma'],
+                            n_neighbors=params['n_neighbors'],
+                            max_iter=params['max_iter'],
+                            tol=params['tol'],
+                            n_jobs=-1
+                        )
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh Label Propagation model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys max_iter, tol, and C supported.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+        Ux = data_dict['Ux']
+
+        clf = LabelProp(params, random_state=self.random_state)
+        # training and testing
+        clf.train(trainx, trainy, Ux)
+        # uses balanced_accuracy accounts for class imbalanced data
+        pred, acc = clf.predict(testx, testy)
+        rec = recall_score(testy, pred)
+        prec = precision_score(testy, pred)
+
+        # loss function minimizes misclassification
+        # by maximizing metrics
+        return {'score': acc+(self.alpha*rec)+(self.beta*prec),
+                'loss': (1-acc) + self.alpha*(1-rec)+self.beta*(1-prec),
+                'model': clf,
+                'params': params,
+                'accuracy': acc,
+                'precision': prec,
+                'recall': rec}
+
+    def optimize(self, space, data_dict, max_evals=50, njobs=4, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a raytune compliant dictionary with defined optimization
+            spaces. For example:
+                space = {'max_iter'  : tune.qrandint(10, 10000, 10),
+                        'tol'        : tune.loguniform(1e-6, 1e-4),
+                        'gamma'      : tune.uniform(1, 50),
+                        'n_neighbors': tune.qrandint(1, 200, 1)
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        njobs: (int) number of hyperparameter training iterations to complete
+            in parallel. Default is 4, but personal computing resources may
+            require less or allow more.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   njobs=njobs,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy, Ux):
+        '''
+        Wrapper method for sklearn's Label Propagation training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        Ux: feature vector/matrix like labeled trainx but unlabeled data.
+        '''
+
+        # combine labeled and unlabeled instances for training
+        lp_trainx = np.append(trainx, Ux, axis=0)
+        lp_trainy = np.append(trainy,
+                              np.full(shape=(Ux.shape[0],), fill_value=-1),
+                              axis=0)
+
+        # semi-supervised Label Propagation
+        self.model.fit(lp_trainx, lp_trainy)
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for sklearn's Label Propagation predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        pred = self.model.predict(testx)
+
+        acc = None
+        if testy is not None:
+            # uses balanced_accuracy_score to account for class imbalance
+            acc = balanced_accuracy_score(testy, pred)
+
+        return pred, acc
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/RadClass/models/SSML/ShadowCNN.py b/RadClass/models/SSML/ShadowCNN.py
new file mode 100644
index 0000000..0b31f10
--- /dev/null
+++ b/RadClass/models/SSML/ShadowCNN.py
@@ -0,0 +1,437 @@
+import numpy as np
+import matplotlib.pyplot as plt
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# torch imports
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F
+# shadow imports
+import shadow.eaat
+import shadow.losses
+import shadow.utils
+from shadow.utils import set_seed
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score
+from scripts.utils import EarlyStopper, run_hyperopt
+import joblib
+
+
+class Net(nn.Module):
+    '''
+    Neural Network constructor.
+    Also includes method for forward pass.
+    nn.Module: PyTorch object for neural networks.
+    Inputs:
+    layer1: int length for first layer.
+    layer2: int length for second layer.
+        Ideally a multiple of layer1.
+    layer3: int length for third layer.
+        Ideally a multiple of layer2.
+    kernel: convolutional kernel size.
+        NOTE: An optimal value is unclear for spectral data.
+    drop_rate: float (<1.) probability for reset/dropout layer.
+    length: single instance data length.
+        NOTE: Assumed to be 1000 for spectral data.
+    TODO: Allow hyperopt to optimize on arbitrary sized networks.
+    '''
+
+    def __init__(self, layer1=32, layer2=64, layer3=128,
+                 kernel=3, drop_rate=0.1, length=1000):
+        '''
+        Defines the structure for each type of layer.
+        The resulting network has fixed length but the
+        user can input arbitrary widths.
+        '''
+
+        # default max_pool1d kernel set by Shadow MNIST example
+        # NOTE: max_pool1d sets mp_kernel = mp_stride
+        self.mp_kernel = 2
+        super(Net, self).__init__()
+        self.conv1 = nn.Conv1d(1, layer1, kernel, 1)
+        self.conv2 = nn.Conv1d(layer1, layer2, kernel, 1)
+        self.dropout = nn.Dropout(drop_rate)
+        # calculating the number of parameters/weights before the flattened
+        # fully-connected layer:
+        #   first, there are two convolution layers, so the output length is
+        #   the input length (feature_vector.shape[0] - 2_layers*(kernel-1))
+        #   if, in the future, more layers are desired, 2 must be adjusted
+        #   next, calculate the output of the max_pool1d layer, which is
+        #   round((conv_out - (kernel=stride - 1) - 1)/2 + 1)
+        #   finally, multiply this by the number of channels in the last
+        #   convolutional layer = layer2
+        conv_out = length-2*(kernel-1)
+        parameters = layer2*(
+                        ((conv_out - (self.mp_kernel - 1) - 1)//self.mp_kernel)
+                        + 1)
+        self.fc1 = nn.Linear(int(parameters), layer3)
+        self.fc2 = nn.Linear(layer3, 2)
+
+    def forward(self, x):
+        '''
+        The resulting network has a fixed length with
+        two convolutional layers divided by relu activation,
+        a max pooling layer, a dropout layer, and two
+        fully-connected layers separated by a relu and
+        dropout layers.
+        '''
+
+        x = self.conv1(x)
+        x = F.relu(x)
+        x = self.conv2(x)
+        x = F.max_pool1d(x, self.mp_kernel)
+        x = self.dropout(x)
+        x = torch.flatten(x, 1)
+        x = self.fc1(x)
+        x = F.relu(x)
+        x = self.dropout(x)
+        x = self.fc2(x)
+        return x
+
+
+class SpectralDataset(torch.utils.data.Dataset):
+    '''
+    Dataset loader for use with PyTorch NN training.
+    torch.utils.data.Dataset: managing user input data for random sampling.
+    Inputs:
+    trainD: the nxm input vector/matrix of data.
+    labels: associated label vector for data.
+    '''
+
+    def __init__(self, trainD, labels):
+        self.labels = labels
+        self.trainD = trainD
+
+    def __len__(self):
+        '''
+        Define what length is for the Dataset
+        '''
+
+        return len(self.labels)
+
+    def __getitem__(self, idx):
+        '''
+        Define how to retrieve an instance from a dataset.
+        Inputs:
+        idx: the index to sample from.
+        '''
+
+        label = self.labels[idx]
+        data = self.trainD[idx]
+        # no need to bother with labels, unpacking both anyways
+        # sample = {"Spectrum": data, "Class": label}
+        # return sample
+        return data, label
+
+
+class ShadowCNN:
+    '''
+    Methods for deploying a Shadow CNN
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys binning, hidden_layer, alpha, xi, eps, lr, and momentum
+        are supported.
+    TODO: Include functionality for manipulating other
+        CNN architecture parameters in hyperparameter optimization
+    alpha: float; weight for encouraging high recall
+    beta: float; weight for encouraging high precision
+    NOTE: if alpha=beta=0, default to favoring balanced accuracy.
+    random_state: int/float for reproducible intiailization.
+    length: int input length (i.e. dimensions of feature vectors)
+    TODO: Add input parameter, loss_function, for the other
+        loss function options available in Shadow (besides EAAT).
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, alpha=0, beta=0, random_state=0, length=1000):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        # set seeds for reproducibility
+        set_seed(0)
+        # device used for computation
+        self.device = torch.device("cuda" if
+                                   torch.cuda.is_available() else "cpu")
+        # dictionary of parameters for convolutional neural network model
+        self.alpha, self.beta = alpha, beta
+        self.params = params
+        if self.params is not None:
+            # assumes the input dimensions are measurements of 1000 bins
+            # TODO: Abstract this for arbitrary input size
+            self.model = Net(layer1=params['layer1'],
+                             layer2=2*params['layer1'],
+                             layer3=3*params['layer1'],
+                             kernel=params['kernel'],
+                             drop_rate=params['drop_rate'],
+                             length=np.ceil(length/params['binning']))
+            self.eaat = shadow.eaat.EAAT(model=self.model,
+                                         alpha=params['alpha'],
+                                         xi=params['xi'],
+                                         eps=params['eps'])
+            self.optimizer = optim.SGD(self.eaat.parameters(),
+                                       lr=params['lr'],
+                                       momentum=params['momentum'])
+        else:
+            # fixed value defaults needed by training algorithm
+            self.params = {'binning': 1, 'batch_size': 1}
+            # assumes the input dimensions are measurements of 1000 bins
+            # TODO: Abstract this for arbitrary input size
+            self.model = Net()
+            self.eaat = shadow.eaat.EAAT(model=self.model)
+            self.optimizer = optim.SGD(self.eaat.parameters(),
+                                       lr=0.1, momentum=0.9)
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh Shadow NN model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys binning, layer1, alpha, xi, eps, lr, momentum,
+            kernel, drop_rate, and batch_size are supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        '''
+
+        self.params = params
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+        # unlabeled co-training data
+        Ux = data_dict['Ux']
+
+        clf = ShadowCNN(params=params,
+                        random_state=self.random_state,
+                        length=trainx.shape[1])
+        # training and testing
+        losscurve, evalcurve = clf.train(trainx, trainy, Ux, testx, testy)
+        # not used; max acc in past few epochs used instead
+        y_pred, acc = clf.predict(testx, testy)
+        y_pred, acc = clf.predict(testx, testy)
+        max_acc = np.max(evalcurve[-10:])
+        rec = recall_score(testy, y_pred)
+        prec = precision_score(testy, y_pred)
+
+        # loss function minimizes misclassification
+        # by maximizing metrics
+        return {'score': max_acc+(self.alpha*rec)+(self.beta*prec),
+                'loss': (1-max_acc) + self.alpha*(1-rec)+self.beta*(1-prec),
+                'model': clf.eaat,
+                'params': params,
+                'accuracy': max_acc,
+                'precision': prec,
+                'recall': rec,
+                'losscurve': losscurve,
+                'evalcurve': evalcurve,}
+
+    def optimize(self, space, data_dict, max_evals=50, njobs=4, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a raytune compliant dictionary with defined optimization
+            spaces. For example:
+                space = {'layer1'       : tune.qrandint(1000, 10000, 10),
+                         'kernel'       : tune.qrandint(1, 9, 1),
+                         'alpha'        : tune.uniform(0.0001, 0.999),
+                         'xi'           : tune.uniform(1e-2, 1e0),
+                         'eps'          : tune.uniform(0.5, 1.5),
+                         'lr'           : tune.uniform(1e-3, 1e-1),
+                         'momentum'     : tune.uniform(0.5, 0.99),
+                         'binning'      : tune.qrandint(1, 10, 1),
+                         'batch_size'   : tune.qrandint(1, 100, 1)
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        njobs: (int) number of hyperparameter training iterations to complete
+            in parallel. Default is 4, but personal computing resources may
+            require less or allow more.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   njobs=njobs,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy, Ux, testx=None, testy=None):
+        '''
+        Wrapper method for Shadow NN training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        Ux: feature vector/matrix like labeled trainx but unlabeled data.
+        testx: feature vector/matrix used for testing the performance
+            of each model at every iteration.
+        testy: label vector used for testing the performance
+            of each model at every iteration.
+        '''
+
+        # avoid float round-off by using DoubleTensor
+        xtens = torch.FloatTensor(np.append(trainx,
+                                            Ux,
+                                            axis=0))[:,
+                                                     ::self.params['binning']]
+        # xtens[xtens == 0.0] = torch.unique(xtens)[1]/1e10
+        ytens = torch.LongTensor(np.append(trainy,
+                                           np.full(shape=(Ux.shape[0],),
+                                                   fill_value=-1),
+                                           axis=0))
+
+        # define data set object
+        dataset = SpectralDataset(xtens, ytens)
+
+        # create DataLoader object of DataSet object
+        DL_DS = torch.utils.data.DataLoader(dataset,
+                                            batch_size=self.params[
+                                                        'batch_size'
+                                                        ],
+                                            shuffle=True)
+
+        # labels for unlabeled data are always "-1"
+        xEnt = torch.nn.CrossEntropyLoss(ignore_index=-1)
+
+        # generate early-stopping watchdog
+        # TODO: allow a user of ShadowCNN to specify EarlyStopper's params
+        stopper = EarlyStopper(patience=3, min_delta=0)
+        n_epochs = 100
+        self.eaat.to(self.device)
+        losscurve = []
+        evalcurve = []
+        for epoch in range(n_epochs):
+            self.eaat.train()
+            lossavg = []
+            for i, (data, targets) in enumerate(DL_DS):
+                x = data.reshape((data.shape[0],
+                                  1,
+                                  data.shape[1])).to(self.device)
+                y = targets.to(self.device)
+                self.optimizer.zero_grad()
+                out = self.eaat(x)
+                loss = xEnt(out, y) + self.eaat.get_technique_cost(x)
+                loss.backward()
+                self.optimizer.step()
+                lossavg.append(loss.item())
+            losscurve.append(np.nanmedian(lossavg))
+            if testx is not None and testy is not None:
+                pred, acc = self.predict(testx, testy)
+                evalcurve.append(acc)
+
+                self.eaat.train()
+                # test for early stopping
+                x_val = torch.FloatTensor(
+                                    testx.copy()[:, ::self.params['binning']])
+                x_val = x_val.reshape((x_val.shape[0],
+                                       1,
+                                       x_val.shape[1])).to(self.device)
+                y_val = torch.LongTensor(testy).to(self.device)
+                out = self.eaat(x_val)
+                val_loss = xEnt(out, y_val) + \
+                    self.eaat.get_technique_cost(x_val)
+                if stopper.early_stop(val_loss):
+                    break
+
+        # optionally return the training accuracy if test data was provided
+        return losscurve, evalcurve
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for Shadow NN predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        binning: int number of bins sampled in feature vector
+        '''
+
+        self.eaat.eval()
+        y_pred, y_true = [], []
+        for i, data in enumerate(torch.FloatTensor(
+                                    testx.copy()[:, ::self.params['binning']])
+                                 ):
+            x = data.reshape((1, 1, data.shape[0])).to(self.device)
+            out = self.eaat(x)
+            y_pred.extend(torch.argmax(out, 1).detach().cpu().tolist())
+        acc = None
+        if testy is not None:
+            acc = balanced_accuracy_score(np.array(testy), np.array(y_pred))
+
+        return y_pred, acc
+
+    def plot_training(self, losscurve=None, evalcurve=None,
+                      filename='lr-cotraining-learningcurves.png'):
+        '''
+        Plots the training error curves for two co-training models.
+        NOTE: The user must provide the curves to plot, but each curve is
+            saved by the class under self.best and self.worst models.
+        Inputs:
+        filename: name to store picture under.
+            Must end in .png (or will be added if missing).
+        losscurve: the loss value over training epochs
+        evalcurve: the accuracy scores over training epochs
+        '''
+
+        fig, (ax1, ax2) = plt.subplots(2,
+                                       1,
+                                       sharex=True,
+                                       figsize=(10, 8),
+                                       dpi=300)
+        ax1.plot(losscurve)
+        ax2.plot(evalcurve)
+        ax1.set_xlabel('Epoch')
+        ax2.set_xlabel('Epoch')
+        ax1.set_ylabel('Loss Curve')
+        ax2.set_ylabel('Accuracy')
+        ax1.grid()
+        ax2.grid()
+
+        if filename[-4:] != '.png':
+            filename += '.png'
+        fig.savefig(filename)
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/RadClass/models/SSML/ShadowNN.py b/RadClass/models/SSML/ShadowNN.py
new file mode 100644
index 0000000..4128b32
--- /dev/null
+++ b/RadClass/models/SSML/ShadowNN.py
@@ -0,0 +1,287 @@
+import numpy as np
+# For hyperopt (parameter optimization)
+from hyperopt import STATUS_OK
+# torch imports
+import torch
+# shadow imports
+import shadow.eaat
+import shadow.losses
+import shadow.utils
+from shadow.utils import set_seed
+# diagnostics
+from sklearn.metrics import balanced_accuracy_score, precision_score, recall_score
+from scripts.utils import EarlyStopper, run_hyperopt
+import joblib
+
+
+class ShadowNN:
+    '''
+    Methods for deploying a Shadow fully-connected NN
+    implementation with hyperparameter optimization.
+    Data agnostic (i.e. user supplied data inputs).
+    TODO: Currently only supports binary classification.
+        Add multinomial functions and unit tests.
+        Add functionality for regression(?)
+    Inputs:
+    params: dictionary of logistic regression input functions.
+        keys binning, hidden_layer, alpha, xi, eps, lr, and momentum
+        are supported.
+    alpha: float; weight for encouraging high recall
+    beta: float; weight for encouraging high precision
+    NOTE: if alpha=beta=0, default to favoring balanced accuracy.
+    random_state: int/float for reproducible intiailization.
+    TODO: Add input parameter, loss_function, for the other
+        loss function options available in Shadow (besides EAAT).
+    '''
+
+    # only binary so far
+    def __init__(self, params=None, alpha=0, beta=0, random_state=0, input_length=1000):
+        # defaults to a fixed value for reproducibility
+        self.random_state = random_state
+        self.input_length = input_length
+        # set seeds for reproducibility
+        set_seed(0)
+        # device used for computation
+        self.device = torch.device("cuda" if
+                                   torch.cuda.is_available() else "cpu")
+        # dictionary of parameters for neural network model
+        self.alpha, self.beta = alpha, beta
+        self.params = params
+        if self.params is not None:
+            # assumes the input dimensions are measurements of 1000 bins
+            # TODO: Abstract this for arbitrary input size
+            self.eaat = shadow.eaat.EAAT(model=self.model_factory(
+                                            int(np.ceil(
+                                                self.input_length /
+                                                params['binning'])),
+                                            params['hidden_layer']),
+                                         alpha=params['alpha'],
+                                         xi=params['xi'],
+                                         eps=params['eps']).to(self.device)
+            self.eaat_opt = torch.optim.SGD(self.eaat.parameters(),
+                                            lr=params['lr'],
+                                            momentum=params['momentum'])
+            # unlabeled instances always have a label of "-1"
+            self.xEnt = torch.nn.CrossEntropyLoss(
+                            ignore_index=-1).to(self.device)
+        else:
+            self.params = {'binning': 1}
+            # assumes the input dimensions are measurements of 1000 bins
+            self.eaat = shadow.eaat.EAAT(
+                            model=self.model_factory()).to(self.device)
+            self.eaat_opt = torch.optim.SGD(self.eaat.parameters(),
+                                            lr=0.1, momentum=0.9)
+            # unlabeled instances always have a label of "-1"
+            self.xEnt = torch.nn.CrossEntropyLoss(
+                            ignore_index=-1).to(self.device)
+
+    def model_factory(self, length=1000, hidden_layer=10000):
+        return torch.nn.Sequential(
+            torch.nn.Linear(length, hidden_layer),
+            torch.nn.ReLU(),
+            torch.nn.Linear(hidden_layer, length),
+            torch.nn.ReLU(),
+            torch.nn.Linear(length, 2)
+        )
+
+    def fresh_start(self, params, data_dict):
+        '''
+        Required method for hyperopt optimization.
+        Trains and tests a fresh Shadow NN model
+        with given input parameters.
+        This method does not overwrite self.model (self.optimize() does).
+        Inputs:
+        params: dictionary of logistic regression input functions.
+            keys binning, hidden_layer, alpha, xi, eps, lr, and momentum
+            are supported.
+        data_dict: compact data representation with the four requisite
+            data structures used for training and testing a model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        '''
+
+        # unpack data
+        trainx = data_dict['trainx']
+        trainy = data_dict['trainy']
+        testx = data_dict['testx']
+        testy = data_dict['testy']
+        # unlabeled co-training data
+        Ux = data_dict['Ux']
+
+        clf = ShadowNN(params=params,
+                       random_state=self.random_state,
+                       input_length=testx.shape[1])
+        # training and testing
+        acc_history = clf.train(trainx, trainy, Ux, testx, testy)
+        # not used; max acc in past few epochs used instead
+        eaat_pred, acc = clf.predict(testx, testy)
+        max_acc = np.max(acc_history[-10:])
+        rec = recall_score(testy, eaat_pred)
+        prec = precision_score(testy, eaat_pred)
+        score = max_acc+(self.alpha*rec)+(self.beta*prec)
+        loss = (1-max_acc) + self.alpha*(1-rec)+self.beta*(1-prec)
+
+        # loss function minimizes misclassification
+        # by maximizing metrics
+        return {'score': score,
+                'loss': loss,
+                'model': clf.eaat,
+                'params': params,
+                'accuracy': max_acc,
+                'precision': prec,
+                'recall': rec,
+                'evalcurve': acc_history}
+
+    def optimize(self, space, data_dict, max_evals=50, njobs=4, verbose=True):
+        '''
+        Wrapper method for using hyperopt (see utils.run_hyperopt
+        for more details). After hyperparameter optimization, results
+        are stored, the best model -overwrites- self.model, and the
+        best params -overwrite- self.params.
+        Inputs:
+        space: a raytune compliant dictionary with defined optimization
+            spaces. For example:
+                space = {'hidden_layer' : tune.qrandint(1000, 10000, 10),
+                         'alpha'        : tune.uniform(0.0001, 0.999),
+                         'xi'           : tune.uniform(1e-2, 1e0),
+                         'eps'          : tune.uniform(0.5, 1.5),
+                         'lr'           : tune.uniform(1e-3, 1e-1),
+                         'momentum'     : tune.uniform(0.5, 0.99),
+                         'binning'      : tune.qrandint(1, 10, 1)
+                        }
+            See hyperopt docs for more information.
+        data_dict: compact data representation with the five requisite
+            data structures used for training and testing an SSML model.
+            keys trainx, trainy, testx, testy, and Ux required.
+            NOTE: Uy is not needed since labels for unlabeled data
+            instances is not used.
+        max_evals: the number of epochs for hyperparameter optimization.
+            Each iteration is one set of hyperparameters trained
+            and tested on a fresh model. Convergence for simpler
+            models like logistic regression typically happens well
+            before 50 epochs, but can increase as more complex models,
+            more hyperparameters, and a larger hyperparameter space is tested.
+        njobs: (int) number of hyperparameter training iterations to complete
+            in parallel. Default is 4, but personal computing resources may
+            require less or allow more.
+        verbose: boolean. If true, print results of hyperopt.
+            If false, print only the progress bar for optimization.
+        '''
+
+        best, worst = run_hyperopt(space=space,
+                                   model=self.fresh_start,
+                                   data_dict=data_dict,
+                                   max_evals=max_evals,
+                                   njobs=njobs,
+                                   verbose=verbose)
+
+        # save the results of hyperparameter optimization
+        self.best = best
+        self.model = best['model']
+        self.params = best['params']
+        self.worst = worst
+
+    def train(self, trainx, trainy, Ux, testx=None, testy=None):
+        '''
+        Wrapper method for Shadow NN training method.
+        Inputs:
+        trainx: nxm feature vector/matrix for training model.
+        trainy: nxk class label vector/matrix for training model.
+        Ux: feature vector/matrix like labeled trainx but unlabeled data.
+        testx: feature vector/matrix used for testing the performance
+            of each model at every iteration.
+        testy: label vector used for testing the performance
+            of each model at every iteration.
+        '''
+
+        # avoid float round-off by using DoubleTensor
+        xtens = torch.FloatTensor(np.append(trainx,
+                                            Ux,
+                                            axis=0)[:,
+                                                    ::self.params['binning']])
+        # xtens[xtens == 0.0] = torch.unique(xtens)[1]/1e10
+        ytens = torch.LongTensor(np.append(trainy,
+                                           np.full(shape=(Ux.shape[0],),
+                                                   fill_value=-1),
+                                           axis=0))
+
+        n_epochs = 100
+        xt = torch.Tensor(xtens).to(self.device)
+        yt = torch.LongTensor(ytens).to(self.device)
+        # generate early-stopping watchdog
+        # TODO: allow a user of ShadowCNN to specify EarlyStopper's params
+        stopper = EarlyStopper(patience=3, min_delta=0)
+        # saves history for max accuracy
+        acc_history = []
+        for epoch in range(n_epochs):
+            # set the model into training mode
+            # NOTE: change this to .eval() mode for testing and back again
+            self.eaat.train()
+            # Forward/backward pass for training semi-supervised model
+            out = self.eaat(xt)
+            # supervised + unsupervised loss
+            loss = self.xEnt(out, yt) + self.eaat.get_technique_cost(xt)
+            self.eaat_opt.zero_grad()
+            loss.backward()
+            self.eaat_opt.step()
+
+            if testx is not None and testy is not None:
+                x_val = torch.FloatTensor(
+                            testx.copy()
+                        )[:, ::self.params['binning']].to(self.device)
+                y_val = torch.LongTensor(testy.copy()).to(self.device)
+
+                self.eaat.eval()
+                eaat_pred = torch.max(self.eaat(x_val), 1)[-1]
+                acc = balanced_accuracy_score(testy, eaat_pred.cpu().numpy())
+                acc_history.append(acc)
+
+                self.eaat.train()
+                # test for early stopping
+                out = self.eaat(x_val)
+                val_loss = self.xEnt(out, y_val) + \
+                    self.eaat.get_technique_cost(x_val)
+                if stopper.early_stop(val_loss):
+                    break
+
+        # optionally return the training accuracy if test data was provided
+        return acc_history
+
+    def predict(self, testx, testy=None):
+        '''
+        Wrapper method for Shadow NN predict method.
+        Inputs:
+        testx: nxm feature vector/matrix for testing model.
+        testy: nxk class label vector/matrix for training model.
+            optional: if included, the predicted classes -and-
+            the resulting classification accuracy will be returned.
+        '''
+
+        self.eaat.eval()
+        eaat_pred = torch.max(self.eaat(
+                                torch.FloatTensor(
+                                    testx.copy()[:, ::self.params['binning']]
+                                    ).to(self.device)
+                                ), 1)[-1]
+        # return tensor to cpu if on gpu and convert to numpy for return
+        eaat_pred = eaat_pred.cpu().numpy()
+
+        acc = None
+        if testy is not None:
+            acc = balanced_accuracy_score(testy, eaat_pred)
+
+        return eaat_pred, acc
+
+    def save(self, filename):
+        '''
+        Save class instance to file using joblib.
+        Inputs:
+        filename: string filename to save object to file under.
+            The file must be saved with extension .joblib.
+            Added to filename if not included as input.
+        '''
+
+        if filename[-7:] != '.joblib':
+            filename += '.joblib'
+        joblib.dump(self, filename)
diff --git a/RadClass/models/SSML/__init__.py b/RadClass/models/SSML/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/RadClass/models/__init__.py b/RadClass/models/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/RadClass/scripts/__init__.py b/RadClass/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/RadClass/scripts/augs.py b/RadClass/scripts/augs.py
new file mode 100644
index 0000000..44b8c8f
--- /dev/null
+++ b/RadClass/scripts/augs.py
@@ -0,0 +1,781 @@
+import numpy as np
+from scipy.optimize import curve_fit
+from scipy.signal import find_peaks
+from scipy.stats import loguniform
+from beads.beads import beads
+
+
+# DANS: Data Augmentations for Nuclear Spectra feature-Extraction
+# TODO: standardize return to either include background or not
+class DANSE:
+    def __init__(self):
+        self.BEADS_PARAMS = dict( 
+            fc=4.749e-2,
+            r=4.1083,
+            df=2,
+            lam0=3.9907e-4,
+            lam1=4.5105e-3,
+            lam2=3.3433e-3,
+        )
+
+    def _estimate(self, X_bckg, mode):
+        '''
+        Background estimation method used in background and sig2bckg.
+        NOTE: Two background subtraction modes are supported: 'min' and 'mean'.
+        'min': take the minimum gross count-rate spectrum from X.
+        'mean': take the average count-rate for each bin from X.
+
+        Inputs:
+        X_bckg: array-like; 2D spectral array of measurements, uses the mode
+            input to complete background superimposition.
+        mode: str; two background subtraction modes are currently supported:
+            'min': take the minimum gross count-rate spectrum from X.
+            'mean': take the average count-rate for each bin from X.
+        '''
+
+        if mode == 'min':
+            idx = np.argmin(np.sum(X_bckg, axis=0))
+            X_bckg = X_bckg[idx]
+        elif mode == 'mean':
+            X_bckg = np.mean(X_bckg, axis=1)
+        elif mode == 'beads':
+            X_bckg = beads(X_bckg, **self.BEADS_PARAMS)[1]
+
+        return X_bckg
+
+    def background(self, X, X_bckg, subtraction=False,
+                   event_idx=None, mode='mean'):
+        '''
+        Superimposes an even signature onto various forms of background
+        distributions. This action does require an accurate estimation of a
+        typical baseline, but several methods are employed.
+        X is assumed to be background adjusted by default. That is, it should
+        have its original background already removed.
+        If subtraction=True and event_idx is not None, X should be 2D.
+        event_idx indicates the row in X that indicates the event spectrum.
+        The other rows in X are then used to estimate a background distribution
+        for subtraction.
+        NOTE: Two background subtraction modes are supported: 'min' and 'mean'.
+        'min': take the minimum gross count-rate spectrum from X.
+        'mean': take the average count-rate for each bin from X.
+
+        Inputs:
+        X: array-like; If 1D, taken as an event spectrum (must be previously
+            background-subtracted). If 2D, subtraction=True, and event_idx not
+            None, a background subtraction is conducted prior to
+            superimposition.
+        X_bckg: array-like; If 1D, add this spectrum to the event spectrum of X
+            as the superimposed background. If 2D, use the mode input to
+            complete background superimposition.
+        subtraction: bool; If True, conduct background subtraction on X
+            (event_idx must not be None)
+        event_idx: int(p); row index for event spectrum in X used for
+            background subtraction.
+        mode: str; two background subtraction modes are currently supported:
+            'min': take the minimum gross count-rate spectrum from X.
+            'mean': take the average count-rate for each bin from X.
+        '''
+
+        X = X.copy()
+        modes = ['min', 'mean', 'beads']
+        # input error checks
+        if subtraction and event_idx is None and X.ndim > 1:
+            raise ValueError('If subtraction=True and len(X)>1, \
+                        event_idx must be specified.')
+        elif subtraction and event_idx is not None and X.ndim <= 1:
+            raise ValueError('X must be 2D to do background subtraction.')
+        elif X_bckg.ndim > 1 and mode == 'beads':
+            raise ValueError('mode == {} does not support \
+                         multiple backgrounds'.format(mode))
+        elif mode not in modes:
+            raise ValueError('Input mode not supported.')
+
+        # subtract a background estimation if it wasn't done prior
+        if subtraction:
+            if event_idx is not None:
+                bckg = np.delete(X, event_idx, axis=0)
+                X = X[event_idx]
+            else:
+                # estimate the baseline from the event itself
+                bckg = X.copy()
+            bckg = self._estimate(bckg, mode)
+            # ensure no negative counts
+            X = (X-bckg).clip(min=0)
+
+        # estimate a background/baseline if multiple spectra are provided
+        # if mode == 'beads':
+        #     warnings.warn('mode == {} assumes X_bckg has already \
+        #         undergone BEADS estimation.'.format(mode))
+        if X_bckg.ndim > 1 and mode != 'beads':
+            X_bckg = self._estimate(X_bckg, mode)
+
+        # ensure no negative counts
+        return (X + X_bckg).clip(min=0)
+
+    def resample(self, X):
+        '''
+        Resamples spectra according to a Poisson distribution.
+        Gamma radiation detection is approximately Poissonian.
+        Each energy bin of a spectrum could be resampled using the original
+        count-rate, lambda_i, as the statistical parameter for a distribution:
+        Pois_i(lambda_i). Randomly sampling from this distribution would
+        provide a new count-rate for that energy bin that is influenced, or
+        augmented, by the original sample.
+
+        Inputs:
+        X: array-like; can be a vector of one spectrum, a matrix of many
+            matrices (rows: spectra, cols: instances), or a subset of either.
+            X serves as the statistical parameters for each distribution.
+        Return:
+        augmentation: array-like, same shape as X; the augmented spectra using
+            channel resampling (see above)
+        '''
+
+        # lambda = n*p using constant probability
+        p = 0.5
+        n = X / p
+        # augmentation = np.random.poisson(lam=X)
+        # using binomial distribution for accurate low-count sampling
+        augmentation = np.random.binomial(n=n.astype(int), p=p)
+
+        return augmentation
+
+    def sig2bckg(self, X, X_bckg, r=(0.5, 2.), subtraction=False,
+                 event_idx=None, mode='mean'):
+        '''
+        Estimate and subtract background and scale signal-to-noise of event
+        signature. The return is a spectrum with an estimated background and
+        a perturbed signal intensity.
+        Scaling ratio is 1/r^2. Therefore, r<1 makes the signal more intense
+        and r>1 makes the signal smaller.
+        X is assumed to be background adjusted by default. That is, it should
+        have its original background already removed.
+        If subtraction=True and event_idx is not None, X should be 2D.
+        event_idx indicates the row in X that indicates the event spectrum.
+        The other rows in X are then used to estimate a background distribution
+        for subtraction.
+        NOTE: Two background subtraction modes are supported: 'min' and 'mean'.
+        'min': take the minimum gross count-rate spectrum from X.
+        'mean': take the average count-rate for each bin from X.
+
+        Inputs:
+        X: array-like; If 1D, taken as an event spectrum (must be previously
+            background-subtracted). If 2D, subtraction=True, and event_idx not
+            None, a background subtraction is conducted prior to
+            superimposition.
+        X_bckg: array-like; If 1D, add this spectrum to the event spectrum of X
+            as the superimposed background. If 2D, use the mode input to
+            complete background superimposition.
+        r: tuple; [min, max) scaling ratio. Default values ensure random
+            scaling that is no more than 2x larger or smaller than the original
+            signal. See numpy.random.uniform for information on interval.
+            NOTE: Enforce a specific value with (r1, r2) where r1=r2.
+        subtraction: bool; If True, conduct background subtraction on X
+            (event_idx must not be None)
+        event_idx: int(p); row index for event spectrum in X used for
+            background subtraction.
+        mode: str; two background subtraction modes are currently supported:
+            'min': take the minimum gross count-rate spectrum from X.
+            'mean': take the average count-rate for each bin from X.
+        '''
+
+        X = X.copy()
+        modes = ['min', 'mean', 'beads']
+        # input error checks
+        if subtraction and event_idx is None and X.ndim > 1:
+            raise ValueError('If subtraction=True and len(X)>1, \
+                        event_idx must be specified.')
+        elif subtraction and event_idx is not None and X.ndim <= 1:
+            raise ValueError('X must be 2D to do background subtraction.')
+        elif X_bckg.ndim > 1 and mode == 'beads':
+            raise ValueError('mode == {} does not support \
+                         multiple backgrounds'.format(mode))
+        elif mode not in modes:
+            raise ValueError('Input mode not supported.')
+
+        if r[0] <= 0 or r[1] <= 0:
+            raise ValueError('{} must be positive.'.format(r))
+
+        # subtract a background estimation if it wasn't done prior
+        if subtraction:
+            if event_idx is not None:
+                bckg = np.delete(X, event_idx, axis=0)
+                X = X[event_idx]
+            else:
+                # estimate the baseline from the event itself
+                bckg = X.copy()
+            bckg = self._estimate(bckg, mode)
+            # ensure no negative counts
+            X = (X-bckg).clip(min=0)
+
+        # estimate a background/baseline if multiple spectra are provided
+        # if mode == 'beads':
+        #     warnings.warn('mode == {} assumes X_bckg has already \
+        #         undergone BEADS estimation.'.format(mode))
+        if X_bckg.ndim > 1 and mode != 'beads':
+            X_bckg = self._estimate(X_bckg, mode)
+
+        # even random choice between upscaling and downscaling
+        r = loguniform.rvs(r[0], r[1], size=1)
+        X *= r
+
+        # ensure no negative counts
+        return (X + X_bckg).clip(min=0)
+
+    def _gauss(self, x, amp, mu, sigma):
+        '''
+        Fit equation for a Gaussian distribution.
+
+        Inputs:
+        x: array-like; 1D spectrum array of count-rates
+        amp: float; amplitude = A/sigma*sqrt(2*pi)
+        mu: float; mean
+        sigma: float; standard deviation
+        '''
+
+        return amp * np.exp(-((x - mu) / 4 / sigma)**2)
+
+    def _emg(self, x, amp, mu, sigma, tau):
+        """
+        Exponentially Modifed Gaussian (for small tau). See:
+        https://en.wikipedia.org/wiki/Exponentially_modified_Gaussian_distribution
+
+        Inputs:
+        x: array-like; 1D spectrum array of count-rates
+        amp: float; amplitude = A/sigma*sqrt(2*pi)
+        mu: float; mean
+        sigma: float; standard deviation
+        tau: float; exponent relaxation time
+        """
+
+        term1 = np.exp(-0.5 * np.power((x - mu) / sigma, 2))
+        term2 = 1 + (((x - mu) * tau) / sigma**2)
+        return amp * term1 / term2
+
+    def _lingauss(self, x, amp, mu, sigma, m, b):
+        '''
+        Includes a linear term to the above function. Used for modeling
+        (assumption) linear background on either shoulder of a gamma photopeak.
+
+        Inputs:
+        x: array-like; 1D spectrum array of count-rates
+        amp: float; amplitude = A/sigma*sqrt(2*pi)
+        mu: float; mean
+        sigma: float; standard deviation
+        m: float; linear slope for background/baseline
+        b: float; y-intercept for background/baseline
+        '''
+
+        return amp * np.exp(-0.5 * np.power((x - mu) / sigma, 2.)) + m*x + b
+
+    def _fit(self, roi, X):
+        '''
+        Fit function used by resolution() for fitting a Gaussian function
+        on top of a linear background in a specified region of interest.
+        TODO: Add a threshold for fit 'goodness.' Return -1 if failed.
+
+        Inputs:
+        roi: tuple; (min, max) bin/index values for region of interest - used
+            to index from data, X
+        X: array-like; 1D spectrum array of count-rates
+        '''
+
+        # binning of data (default usually 0->1000 bins)
+        ch = np.arange(0, len(X))
+        region = X[roi[0]:roi[1]]
+
+        # initial guess for fit
+        max_y = np.max(region)
+        max_z = ch[roi[0]:roi[1]][np.argmax(region)]
+        # [amp, mu, sigma, m, b]
+        p0 = [max_y, max_z, 1., 0, X[roi[0]]]
+
+        # prevents nonsensical fit parameters (fail otherwise)
+        lower_bound = [0, 0, 0, -np.inf, -np.inf]
+        upper_bound = [np.inf, X.shape[0]-1, np.inf, np.inf, np.inf]
+        bounds = (lower_bound, upper_bound)
+        coeff, var_matrix = curve_fit(self._lingauss,
+                                      ch[roi[0]:roi[1]],
+                                      region,
+                                      p0=p0,
+                                      bounds=bounds)
+
+        return coeff
+
+        # # as calculated exactly from Gaussian statistics
+        # fwhm = 2*np.sqrt(2*np.log(2))*coeff[1]
+        # return fwhm
+
+    def _crude_bckg(self, roi, X):
+        '''
+        Linear estimation of background using the bounds of an ROI.
+        Uses point-slope formula and the bounds for the ROI region to create
+        an array of the expected background.
+
+        Inputs:
+        roi: tuple; (min, max) bin/index values for region of interest - used
+            to index from data, X
+        X: array-like; 1D spectrum array of count-rates
+        '''
+
+        lower_bound = roi[0]
+        upper_bound = roi[1]
+
+        y1 = X[lower_bound]
+        y2 = X[upper_bound]
+        slope = (y2 - y1) / (upper_bound - lower_bound)
+
+        y = slope * (np.arange(lower_bound, upper_bound) - lower_bound) + y1
+
+        return y, slope, y1
+
+    def _escape_int(self, E):
+        '''
+        Computes the ratio of escape peak/photopeak intensity as
+        a function of photopeak energy (> 1.022 MeV).
+        This is roughly estimated from two papers:
+        - 10.1016/0029-554X(73)90186-9
+        - 10.13182/NT11-A12285
+        Three values are eye-estimated and polynomially fitted using
+        Wolfram Alpha. This is a crude computation with poorly vetted
+        papers working with HPGe (rather than the typical NaI) detectors.
+        NOTE: This breaks down for E>~4 MeV, the ratio will grow > 1.
+            For E<~1.3MeV, the polynomial starts to increase again, but at
+            a very low intensity for such low energy gammas.
+        TODO: find better sources or a better method for intensity estimation.
+
+        Inputs:
+        E: float; energy of photopeak
+        '''
+
+        return (8.63095e-8*E**2) - (0.000209524*E) + 0.136518
+
+    def nuclear(self, roi, X, escape, binE=3.,
+                width=None, counts=None, subtract=False):
+        '''
+        Inject different nuclear interactions into the spectrum.
+        Current functionality allows for the introduction of either escape
+        peaks or entirely new photopeaks (ignoring Compton continuum).
+        Width and counts relationship for escape and photo-peaks is assumed
+        to be linear across the spectrum. However, the user can specify
+        width and counts as an input.
+
+        Inputs:
+        roi: list; (min, max) bin/index values for region of interest - used
+            to index from data, X
+        X: array-like; 1D spectrum array of count-rates
+        escape: bool; False if adding photopeak, True if adding escape peaks.
+            if True, roi must include the peak > 1,022 keV to introduce peaks.
+        binE: float; Energy/Channel ratio for spectrum. Necessary for computing
+            escape peak relationships.
+        width: float; width, in channels, of peak to introduce. Technically
+            defined as the standard deviation for the distribution
+            (see numpy.random.normal documentation).
+        counts: int; number of counts to introduce in peak (i.e. intensity).
+        '''
+
+        # assumes the center of the normal distribution is the ROI center
+        b = np.mean(roi)
+        E = b*binE
+        # escape peak error to ensure physics
+        if escape and E < 1022:
+            raise ValueError('Photopeaks below 1,022 keV ',
+                             'do not produce escape peaks.')
+        # avoid overwriting original data
+        nX = X.copy()
+        bins = nX.shape[0]
+
+        # find (photo)peaks with heights above baseline of at least 10 counts
+        # ignoring low-energy distribution typically residing in first 100 bins
+        peaks, properties = find_peaks(X[100:],
+                                       prominence=20,
+                                       width=4,
+                                       rel_height=0.5)
+        # find the tallest peak to estimate energy resolution
+        # remember to shift the peak found by the 100-bin mask
+        fit_peak = peaks[np.argsort(properties['prominences'])[-1]]+100
+        # fit ROI to estimate representative peak counts
+        w = int(len(nX)*0.1)
+        fit_roi = [max(fit_peak-w, 0), min(fit_peak+w, bins-1)]
+        # fit the most prominent peak
+        # [amp, mu, sigma, m, b]
+        coeff = self._fit(fit_roi, nX)
+        amp, sigma = coeff[0], coeff[2]
+        # assume linear relationship in peak counts and width over spectrum
+        # width is approximately a delta fnct. at the beginning of the spectrum
+        # counts are approximately zero by the end of the spectrum
+        slope_sigma = sigma/fit_peak
+        # slope_counts should be negative because fit_peak < bins
+        slope_counts = np.sqrt(2*np.pi) * amp / (fit_peak - bins)
+        # avoid bad fits from adding an exponential amount of counts
+        max_counts = min(-slope_counts * bins,
+                         np.sqrt(np.sum(nX)))
+
+        # insert peak at input energy
+        if not escape:
+            # approximate width and counts from relationship estimated above
+            sigma_peak = slope_sigma * b
+            # avoid bad fits from adding an exponential amount of counts
+            cts_peak = min(np.absolute(max_counts - (slope_counts * b)),
+                           np.sqrt(np.sum(nX)))
+            # overwrite if user input is given
+            if width is not None:
+                sigma_peak = width
+            if counts is not None:
+                cts_peak = counts
+            # create another spectrum with only the peak
+            new_peak, _ = np.histogram(np.round(
+                                        np.random.normal(loc=b,
+                                                         scale=sigma_peak,
+                                                         size=int(cts_peak))),
+                                       bins=bins,
+                                       range=(0, bins))
+            if subtract:
+                nX = nX-new_peak
+            else:
+                nX = nX+new_peak
+        # insert escape peaks if specified or physically realistic
+        if escape or (E >= 1022 and not subtract):
+            # fit the peak at input energy
+            # [amp, mu, sigma, m, b]
+            coeff = self._fit(roi, nX)
+            # background counts integral
+            bckg_width = roi[1] - roi[0]
+            background = (coeff[3]/2)*(roi[1]**2
+                                       - roi[0]**2) + coeff[4] * (bckg_width)
+            # find difference from background
+            peak_counts = np.sum(nX[roi[0]:roi[1]]) - background
+
+            # normal distribution parameters for escape peaks
+            b_single = int((E-511)/binE)
+            sigma_single = slope_sigma * b_single
+            b_double = int((E-1022)/binE)
+            sigma_double = slope_sigma * b_double
+            # escape peak intensity estimated as a function of E
+            cts = self._escape_int(E)*peak_counts
+            # overwrite if user input is given
+            if width is not None:
+                sigma_single = sigma_double = width
+            if counts is not None:
+                cts = counts
+
+            # create a blank spectrum with only the escape peak
+            single, _ = np.histogram(np.round(
+                                      np.random.normal(loc=b_single,
+                                                       scale=sigma_single,
+                                                       size=int(cts))),
+                                     bins=bins,
+                                     range=(0, bins))
+            double, _ = np.histogram(np.round(
+                                      np.random.normal(loc=b_double,
+                                                       scale=sigma_double,
+                                                       size=int(cts))),
+                                     bins=bins,
+                                     range=(0, bins))
+            if subtract:
+                nX = nX-single-double
+            else:
+                nX = nX+single+double
+        return nX
+
+    def find_res(self, X, width=4, roi_perc=0.03):
+        '''
+        Automatically find reasonable peaks in a spectrum and return one.
+        This can be used to randomly find a peak to perturb via resolution.
+        Uses BEADS to identify peaks in a spectrum.
+        Note that both BEADS and scipy.signals.find_peaks can be very unstable
+        and thus this is not always reliable. It is recommended to check for
+        reasonable peaks or fits/augmentations after using this method.
+
+        Inputs:
+        X: array-like; 1D spectrum array of count-rates
+        width: int; minimum channel width for an identified peak
+            (see scipy.signals.find_peaks for more information)
+        roi_perc: float; percent of total channels in X to have on each
+            shoulder of an ROI.
+        '''
+
+        beads_results = beads(X, **self.BEADS_PARAMS)
+        # np.clip(min=0) ensures no negative counts when finding peaks
+        peaks, _ = find_peaks(beads_results[0].clip(min=0),
+                              width=width,
+                              rel_height=0.5)
+        choice = np.random.choice(peaks, 1)
+        w = int(len(X)*roi_perc)
+        roi = [int(max(choice-w, 0)), int(min(choice+w, len(X)-1))]
+        return roi
+
+    def resolution(self, roi, X, multiplier=1.5, conserve=True):
+        '''
+        Manipulate the resolution, or width, of a photopeak as measured by
+        the full-width at half-maximum (FWHM).
+        In terms of reasonable values for multiplier, be cautious for
+        values >> 1. Wider peaks will overwrite a wider area of the spectrum.
+        Note that sometimes the interplay between a tighter or wider ROI
+        (which determines the region to fit) and the size of the multiplier
+        can affect the shape of the resulting peak.
+
+        Inputs:
+        roi: list; (min, max) bin/index values for region of interest - used
+            to index from data, X
+        X: array-like; 1D spectrum array of count-rates
+        multiplier: float; scaler to manipulate FWHM by. Greater than 1
+            widens the peak and vice versa.
+        conserve: bool; if True, peak counts will be conserved after
+            augmentation, meaning a taller peak for multipler<1 & vice versa
+        '''
+
+        if multiplier <= 0:
+            raise ValueError('{} must be positive.'.format(multiplier))
+
+        # avoid overwriting original data
+        X = X.copy()
+        if multiplier < 0:
+            multiplier = 1/abs(multiplier)
+
+        # [amp, mu, sigma, m, b]
+        coeff = self._fit(roi, X)
+        # amp = coeff[0]
+        fwhm = 2*np.sqrt(2*np.log(2))*coeff[2]
+        new_sigma = multiplier * fwhm / (2*np.sqrt(2*np.log(2)))
+        coeff[2] = new_sigma
+
+        # there's no need to refind background/baseline
+        # because it was fit in coeff above
+        # but this could be used to isolate background
+        # y, m, b = self._crude_bckg(roi, X)
+
+        # expanding ROI if new peak is too wide
+        # 6-sigma ensures the entire Gaussian distribution is captured
+        # NOTE: this is unstable, new peaks (and the background/baseline)
+        # can overwrite other spectral features, should it be removed?
+        if 2*new_sigma >= roi[1]-roi[0]:
+            # maximum expansion cannot be more than length of spectrum
+            roi[0] = max(0, roi[0]-int(new_sigma))
+            roi[1] = min(X.shape[0]-1, roi[1]+int(new_sigma))
+
+        ch = np.arange(roi[0], roi[1])
+        peak = self._lingauss(ch,
+                              amp=coeff[0],
+                              mu=coeff[1],
+                              sigma=new_sigma,
+                              m=coeff[3],
+                              b=coeff[4])
+        if conserve:
+            # only counts from background
+            background = (coeff[3]*ch + coeff[4]).clip(min=0)
+            # only counts from old peak
+            old_cts = (X[ch] - background).clip(min=0)
+            # only counts from new peak
+            new_cts = (peak - background).clip(min=0)
+            # scale new peak to conserve original counts
+            if np.sum(new_cts) > 0:
+                peak = (new_cts*(np.sum(old_cts)/np.sum(new_cts))) + background
+
+        # normalize to conserve relative count-rate
+        # NOTE: this is realistic physically, but is it necesary?
+        # peak = peak * (np.sum(X[roi[0]:roi[1]]) / np.sum(peak))
+
+        # add noise to the otherwise smooth transformation
+        # .clip() necessary so counts are not negative
+        # .astype(float) avoids ValueError: lam value too large
+        peak = self.resample(peak.clip(min=0).astype(np.float64))
+        X[roi[0]:roi[1]] = peak
+        return X
+
+    def mask(self, X, mode='random', interval=5, block=(0, 100)):
+        '''
+        Mask specific regions of a spectrum to force feature importance.
+        This may or may not be physically realistic, depending on the masking
+        scenario (e.g. pileup) but it represents a common image augmentation.
+        NOTE: the default values for interval and block are not used, but
+        recommended sizes or degrees for reasonable augmentations.
+
+        Inputs:
+        X: array-like; should be 1D, i.e. one spectrum to be augmented
+        mode: str; three modes are supported:
+            'interval': mask every interval's channel
+            'block': mask everything within a block range
+            'both': mask every interval's channel within a block range
+            'random': randomly pick one of the above
+        interval: int; mask every [this int] channel in the spectrum
+        block: tuple; spectral range to mask (assumed spectral length is
+            1000 channels)
+        '''
+
+        # avoid overwriting original data
+        X = X.copy()
+
+        modes = ['random', 'interval', 'block', 'both']
+        if mode not in modes:
+            raise ValueError('Input mode not supported.')
+        if mode == 'random':
+            mode = np.random.choice(modes)
+            if mode == 'interval':
+                # high => exclusive: 10+1
+                interval = np.random.randint(1, 11)
+            elif mode == 'block':
+                # default spectral length is 1,000 channels
+                # TODO: abstract spectral length
+                low = np.random.randint(0, 999)
+                # default block width is low+10 to max length
+                # TODO: abstract block width
+                high = np.random.randint(low+10, 1000)
+                block = (low, high)
+
+        # mask spectrum (i.e. set values to 0)
+        if mode == 'interval':
+            X[::interval] = 0
+        elif mode == 'block':
+            X[block[0]:block[1]] = 0
+        elif mode == 'both':
+            X[block[0]:block[1]:interval] = 0
+
+        return X
+
+    def _ResampleLinear1D(self, original, targetLen):
+        '''
+        Originally from StackOverflow:
+        https://stackoverflow.com/questions/20322079/downsample-a-1d-numpy-array
+        Upsamples or downsamples an array by interpolating
+        the value in each bin to a given length.
+
+        Inputs:
+        original: array-like; spectrum or array to be resampled
+        targetLen: int; target length to resize/resample array
+        '''
+
+        original = np.array(original, dtype=float)
+        index_arr = np.linspace(0, len(original)-1, num=targetLen, dtype=float)
+        # find the floor (round-down) for each bin (cutting off with int)
+        index_floor = np.array(index_arr, dtype=int)
+        # find the ceiling (max/round-up) for each bin
+        index_ceil = index_floor + 1
+        # compute the difference/remainder
+        index_rem = index_arr - index_floor
+
+        val1 = original[index_floor]
+        val2 = original[index_ceil % len(original)]
+        # interpolate the new value for each new bin
+        interp = val1 * (1.0-index_rem) + val2 * index_rem
+        assert (len(interp) == targetLen)
+        return interp
+
+    def _Poisson1D(self, X, lam):
+        '''
+        Apply positive gain shift by randomly distributing counts in each bin
+        according to a Poisson distribution with parameter lam.
+        The random Poisson distribution results in a spectrum that can have a
+        slightly different distribution of counts rather than the uniform
+        deformation of _ResampleLinear1D.
+        The drift is energy dependent (i.e. more drift for higher energies).
+        This mode only supports positive gain shift.
+
+        Inputs:
+        X: array-like; 1D spectrum, with count-rate for each channel
+        lam: float; Poisson parameter for gain drift. Determines the severity
+            of gain drift in spectrum.
+        '''
+
+        new_ct = X.copy()
+        for i, c in enumerate(X):
+            # randomly sample a new assigned index for every count in bin
+            # using np.unique, summarize which index each count goes to
+            idx, nc = np.unique(np.round(
+                                    np.random.poisson(lam=lam*(i/X.shape[0]),
+                                                      size=int(c))),
+                                return_counts=True)
+            # check to see if any indices are greater than the spectral length
+            missing_idx = np.count_nonzero(i+idx >= new_ct.shape[0])
+            if missing_idx > 0:
+                # add blank bins if so
+                new_ct = np.append(new_ct,
+                                   np.repeat(0,
+                                             np.max(idx)+i-new_ct.shape[0]+1))
+            # distribute all counts according to their poisson index
+            new_ct[(i+idx).astype(int)] += nc
+            # adjust for double-counting
+            new_ct[i] -= np.sum(nc)
+
+        return new_ct
+
+    def gain_shift(self, X, bins=None, lam=np.random.uniform(-5, 5),
+                   k=0, mode='resample'):
+        '''
+        Modulate the gain-shift underlying a spectrum.
+        This simulates a change in the voltage to channel mapping, which
+        will affect how the spectral shape appears in channel vs. energy space.
+        If a positive gain shift occurs (multiplier increases), e.g. 1V=1ch
+        becomes 0.9V=1ch, spectral features will stretch out and widen across
+        the spectrum. Vice versa for a negative gain shift.
+        Qualitatively, a positive drift manifests in a smeared or stretched
+        spectrum with wider peaks whereas a negative drift manifests in a
+        squeezed or tightened spectrum with narrower peaks.
+        Both a positive and negative gain drift are supported, however only
+        mode='resample' supports negative drift.
+
+        Inputs:
+        X: array-like; 1D spectrum, with count-rate for each channel
+        bins: array-like; 1D vector (with length len(counts)+1) of either
+            bin edges in energy space or channel numbers.
+        lam: float; Poisson parameter for gain drift. Determines the severity
+            of gain drift in spectrum.
+        k: int; number of bins to shift the entire spectrum by
+        mode: str; two possible gain shift algorithms can be used
+            'resample': linearly resample the spectrum according to a new
+            length (lam), evenly redistributing the counts.
+            'poisson': statistically/randomly resample the counts in each bin
+            according to a poisson distribution of parameter lam.
+            NOTE: 'poisson' only works in the positive direction.
+            TODO: Future feature implementation should probably focus
+                just on the rebinning algorithm, since it is simpler
+                and can work in both directions.
+        '''
+
+        modes = ['resample', 'poisson']
+        if mode not in modes:
+            raise ValueError('{} is not a supported algorithm.'.format(mode))
+        if len(X.shape) > 1:
+            raise ValueError(f'gain_shift expects only 1 spectrum (i.e. 1D \
+                               vector) but {X.shape[0]} were passed')
+
+        # gain-shift algorithm
+        # add blank bins before or after the spectrum
+        if k < 0:
+            X = np.append(X, np.repeat(0., np.absolute(k)))
+            X[0] = np.sum(X[:np.absolute(k)])
+            X = np.delete(X, np.arange(1, np.absolute(k)))
+            # fix the length of the spectrum to be the same as before
+            if bins is not None:
+                bins = np.linspace(bins[0], bins[-1], X.shape[0]+1)
+        elif k > 0:
+            X = np.insert(X, 0, np.repeat(0., k))
+            # fix the length of the spectrum to be the same as before
+            if bins is not None:
+                width = bins[1] - bins[0]
+                bins = np.arange(bins[0], bins[-1]+(k*width), width)
+
+        # only a direct bin shift is desired
+        if lam == 0.:
+            return X, bins
+        # gain-drift algorithm(s)
+        elif mode == 'resample' or (mode == 'poisson' and lam < 0):
+            # second condition needed since 'poisson' does not support
+            # negative gain drift (lam < 0)
+            new_ct = self._ResampleLinear1D(X, int(X.shape[0]+lam))
+        elif mode == 'poisson':
+            # recalculate binning if passed
+            new_ct = self._Poisson1D(X, abs(lam))
+
+        # enforce the same count-rate
+        new_ct *= np.sum(X)/np.sum(new_ct)
+
+        # compute bins if passed
+        if bins is not None:
+            width = bins[1] - bins[0]
+            new_b = np.arange(bins[0],
+                              bins[0]+((len(new_ct)+1)*width),
+                              width)
+            return new_ct, new_b
+        else:
+            return new_ct, bins
diff --git a/RadClass/scripts/configs.py b/RadClass/scripts/configs.py
new file mode 100644
index 0000000..1b4047a
--- /dev/null
+++ b/RadClass/scripts/configs.py
@@ -0,0 +1,287 @@
+'''
+Author: Jordan Stomps
+
+Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+More information found here: https://github.com/ae-foster/pytorch-simclr
+
+MIT License
+
+Copyright (c) 2023 Jordan Stomps
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+'''
+
+# import torchvision
+# import torchvision.transforms as transforms
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/scripts/')
+# sys.path.append(os.getcwd()+'/data/')
+# from augmentation import ColourDistortion
+from .dataset import MINOSBiaugment, DataOrganizer, DataBiaugment
+from .specTools import read_h_file
+# from models import *
+from .transforms import Background, Resample, Sig2Bckg, Nuclear, \
+    Resolution, Mask, GainShift
+from sklearn.model_selection import train_test_split
+import numpy as np
+import pandas as pd
+
+
+def add_indices(dataset_cls):
+    class NewClass(dataset_cls):
+        def __getitem__(self, item):
+            output = super(NewClass, self).__getitem__(item)
+            return (*output, item)
+
+    return NewClass
+
+
+def get_datasets(dataset, dset_fpath, bckg_fpath, valsfpath=None,
+                 testfpath=None, normalization=False, accounting=False,
+                 augs=None, add_indices_to_data=False):
+    # , augment_clf_train=False, num_positive=None):
+
+    ssml_dset = None
+    transform_dict = {
+        'Background': Background(bckg_dir=bckg_fpath, mode='beads'),
+        'Resample': Resample(),
+        'Sig2Bckg': Sig2Bckg(bckg_dir=bckg_fpath, mode='beads', r=(0.5, 1.5)),
+        'Nuclear': Nuclear(binE=3),
+        'Resolution': Resolution(multiplier=(0.5, 1.5)),
+        'Mask': Mask(),
+        'GainShift': GainShift()
+    }
+    transform_train = []
+    if augs is not None:
+        for key in augs:
+            transform_train.append(transform_dict[key])
+    else:
+        transform_train = [
+            Background(bckg_dir=bckg_fpath, mode='beads'),
+            Resample(),
+            Sig2Bckg(bckg_dir=bckg_fpath, mode='beads', r=(0.5, 1.5)),
+            Nuclear(binE=3),
+            Resolution(multiplier=(0.5, 1.5)),
+            Mask(),
+            GainShift()
+        ]
+    print('list of transformations:')
+    for t in transform_train:
+        print(f'\t{t}')
+
+    if dataset in ['minos', 'minos-ssml']:
+        data = pd.read_hdf(dset_fpath, key='data')
+        # print(f'\tclasses: {np.unique(targets, return_counts=True)}')
+        # print(f'\t\tshape: {targets.shape}')
+        ytr = np.full(data.shape[0], -1)
+        Xtr = data.to_numpy()[:, np.arange(1000)].astype(float)
+        print(f'\tNOTE: double check data indexing: {data.shape}')
+        val = pd.read_hdf(valsfpath, key='data')
+        Xval = val.to_numpy()[:, 1+np.arange(1000)].astype(float)
+        yval = val['label'].values
+        # yval[yval == 1] = 0
+        yval[yval != 1] = 0
+        test = read_h_file(testfpath, 60, 60)
+        Xtest = test.to_numpy()[:, np.arange(1000)].astype(float)
+        targets = test['event'].values
+        # all test values are positives
+        # ytest = np.full_like(ytest, 0, dtype=np.int32)
+        ytest = np.ones_like(targets, dtype=np.int32)
+        # metal transfers
+        ytest[targets == 'ac225'] = 0
+        ytest[targets == 'activated-metals'] = 0
+        ytest[targets == 'spent-fuel'] = 0
+        print(f'\ttraining instances = {Xtr.shape[0]}')
+        print(f'\tvalidation instances = {Xval.shape[0]}')
+        print(f'\ttest instances = {Xtest.shape[0]}')
+
+        if add_indices_to_data:
+            tr_dset = add_indices(MINOSBiaugment(Xtr, ytr,
+                                                 transforms=transform_train,
+                                                 normalization=normalization,
+                                                 accounting=accounting))
+            val_dset = add_indices(DataOrganizer(Xval, yval, tr_dset.mean,
+                                                 tr_dset.std,
+                                                 accounting=accounting))
+            if dataset == 'minos-ssml':
+                ssml_dset = add_indices(DataBiaugment(Xval.copy(), yval.copy(),
+                                                      transform_train,
+                                                      tr_dset.mean,
+                                                      tr_dset.std,
+                                                      accounting=accounting))
+            test_dset = add_indices(DataOrganizer(Xtest, ytest, tr_dset.mean,
+                                                  tr_dset.std,
+                                                  accounting=accounting))
+        else:
+            tr_dset = MINOSBiaugment(Xtr, ytr, transforms=transform_train,
+                                     normalization=normalization,
+                                     accounting=accounting)
+            val_dset = DataOrganizer(Xval, yval, tr_dset.mean, tr_dset.std,
+                                     accounting=accounting)
+            if dataset == 'minos-ssml':
+                ssml_dset = DataBiaugment(Xval, yval, transform_train,
+                                        tr_dset.mean, tr_dset.std,
+                                        accounting=accounting)
+            test_dset = DataOrganizer(Xtest, ytest, tr_dset.mean,
+                                      tr_dset.std, accounting=accounting)
+    elif dataset in ['minos-curated', 'minos-transfer-ssml']:
+        data = pd.read_hdf(dset_fpath, key='data')
+        # print(f'\tclasses: {np.unique(targets, return_counts=True)}')
+        # print(f'\t\tshape: {targets.shape}')
+        ytr = np.full(data.shape[0], -1)
+        Xtr = data.to_numpy()[:, np.arange(1000)].astype(float)
+        print(f'\tNOTE: double check data indexing: {data.shape}')
+
+        test_data = read_h_file(testfpath, 60, 60)
+        X = test_data.to_numpy()[:, np.arange(1000)].astype(float)
+        y = test_data['event'].values
+        Xval, Xtest, \
+            val_targets, test_targets = train_test_split(X, y,
+                                                         train_size=0.2,
+                                                         stratify=y)
+        # all test values are positives
+        # ytest = np.full_like(ytest, 0, dtype=np.int32)
+        yval = np.ones_like(val_targets, dtype=np.int32)
+        ytest = np.ones_like(test_targets, dtype=np.int32)
+        # metal transfers
+        yval[val_targets == 'ac225'] = 0
+        yval[val_targets == 'activated-metals'] = 0
+        yval[val_targets == 'spent-fuel'] = 0
+        ytest[test_targets == 'ac225'] = 0
+        ytest[test_targets == 'activated-metals'] = 0
+        ytest[test_targets == 'spent-fuel'] = 0
+
+        print(f'\ttraining instances = {Xtr.shape[0]}')
+        print(f'\tvalidation instances = {Xval.shape[0]}')
+        print(f'\ttest instances = {Xtest.shape[0]}')
+
+        if add_indices_to_data:
+            tr_dset = add_indices(MINOSBiaugment(Xtr, ytr,
+                                                 transforms=transform_train,
+                                                 normalization=normalization,
+                                                 accounting=accounting))
+            val_dset = add_indices(DataOrganizer(Xval, yval, tr_dset.mean,
+                                                 tr_dset.std,
+                                                 accounting=accounting))
+            if dataset == 'minos-transfer-ssml':
+                ssml_dset = add_indices(DataBiaugment(Xval.copy(), yval.copy(),
+                                                      transform_train,
+                                                      tr_dset.mean,
+                                                      tr_dset.std,
+                                                      accounting=accounting))
+            test_dset = add_indices(DataOrganizer(Xtest, ytest, tr_dset.mean,
+                                                  tr_dset.std,
+                                                  accounting=accounting))
+        else:
+            tr_dset = MINOSBiaugment(Xtr, ytr, transforms=transform_train,
+                                     normalization=normalization,
+                                     accounting=accounting)
+            val_dset = DataOrganizer(Xval, yval, tr_dset.mean, tr_dset.std,
+                                     accounting=accounting)
+            if dataset == 'minos-transfer-ssml':
+                ssml_dset = DataBiaugment(Xval, yval, transform_train,
+                                        tr_dset.mean, tr_dset.std,
+                                        accounting=accounting)
+            test_dset = DataOrganizer(Xtest, ytest, tr_dset.mean, tr_dset.std,
+                                      accounting=accounting)
+    elif dataset == 'minos-2019':
+        # Including unlabeled spectral data for contrastive learning
+        data = pd.read_hdf(dset_fpath, key='data')
+        # print(f'\tclasses: {np.unique(targets, return_counts=True)}')
+        # print(f'\t\tshape: {targets.shape}')
+        ytr = np.full(data.shape[0], -1)
+        Xtr = data.to_numpy()[:, np.arange(1000)].astype(float)
+        print(f'\tNOTE: double check data indexing: {data.shape}')
+
+        X = pd.read_hdf(valsfpath, key='data')
+        # events = np.unique(X['label'].values)
+        y = X['label'].values
+        y[y == 1] = 0
+        y[y != 0] = 1
+        X = X.to_numpy()[:, 1+np.arange(1000)].astype(float)
+        run = True
+        while run:
+            Xval, Xtest, yval, ytest = train_test_split(X, y, test_size=213)
+            if np.unique(ytest, return_counts=True)[1][0] == 125:
+                run = False
+        print(f'\ttraining instances = {Xtr.shape[0]}')
+        print(f'\tvalidation instances = {Xval.shape[0]}')
+        print(f'\ttest instances = {Xtest.shape[0]}')
+
+        if add_indices_to_data:
+            tr_dset = add_indices(MINOSBiaugment(Xtr, ytr,
+                                                 transforms=transform_train,
+                                                 normalization=normalization,
+                                                 accounting=accounting))
+            val_dset = add_indices(DataOrganizer(Xval, yval, tr_dset.mean,
+                                                 tr_dset.std,
+                                                 accounting=accounting))
+            test_dset = add_indices(DataOrganizer(Xtest, ytest, tr_dset.mean,
+                                                  tr_dset.std,
+                                                  accounting=accounting))
+        else:
+            tr_dset = MINOSBiaugment(Xtr, ytr, transforms=transform_train,
+                                     normalization=normalization,
+                                     accounting=accounting)
+            val_dset = DataOrganizer(Xval, yval, tr_dset.mean, tr_dset.std,
+                                     accounting=accounting)
+            test_dset = DataOrganizer(Xtest, ytest, tr_dset.mean, tr_dset.std,
+                                      accounting=accounting)
+    elif dataset == 'minos-2019-binary':
+        # Using only the data that was used for the preliminary experiment
+        data = pd.read_hdf(dset_fpath, key='data')
+        targets = data['label'].values
+        targets[targets == 1] = 0
+        targets[targets != 0] = 1
+        print(f'\tclasses: {np.unique(targets, return_counts=True)}')
+        print(f'\t\tshape: {targets.shape}')
+        data = data.to_numpy()[:, 1+np.arange(1000)].astype(float)
+        print(f'\tNOTE: double check data indexing: {data.shape}')
+        Xtr, X, ytr, y = train_test_split(data, targets, test_size=0.3)
+        Xval, Xtest, yval, ytest = train_test_split(X, y, train_size=0.33)
+        print(f'\ttraining instances = {Xtr.shape[0]}')
+        print(f'\tvalidation instances = {Xval.shape[0]}')
+        print(f'\ttest instances = {Xtest.shape[0]}')
+
+        if add_indices_to_data:
+            tr_dset = add_indices(MINOSBiaugment(np.append(Xtr, Xval, axis=0),
+                                                 np.append(ytr, yval, axis=0),
+                                                 transforms=transform_train,
+                                                 normalization=normalization,
+                                                 accounting=accounting))
+            val_dset = add_indices(DataOrganizer(Xval, yval, tr_dset.mean,
+                                                 tr_dset.std,
+                                                 accounting=accounting))
+            test_dset = add_indices(DataOrganizer(Xtest, ytest, tr_dset.mean,
+                                                  tr_dset.std,
+                                                  accounting=accounting))
+        else:
+            tr_dset = MINOSBiaugment(Xtr, ytr, transforms=transform_train,
+                                     normalization=normalization,
+                                     accounting=accounting)
+            val_dset = DataOrganizer(Xval, yval, tr_dset.mean, tr_dset.std,
+                                     accounting=accounting)
+            test_dset = DataOrganizer(Xtest, ytest, tr_dset.mean, tr_dset.std,
+                                      accounting=accounting)
+    else:
+        raise ValueError("Bad dataset value: {}".format(dataset))
+
+    return tr_dset, val_dset, test_dset, ssml_dset
diff --git a/RadClass/scripts/dataset.py b/RadClass/scripts/dataset.py
new file mode 100644
index 0000000..c8fe936
--- /dev/null
+++ b/RadClass/scripts/dataset.py
@@ -0,0 +1,149 @@
+import numpy as np
+import torch
+import logging
+from torch.utils.data import Dataset
+from RadClass.scripts.augs import DANSE
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/scripts/')
+
+
+def remove_bckg(X):
+    auger = DANSE()
+    if X.ndim > 1:
+        newX = torch.zeros_like(X)
+        for i in range(X.shape[0]):
+            newX[i] = X[i] - auger._estimate(X[i], mode='beads')
+        return newX
+    else:
+        return X - auger._estimate(X, mode='beads')
+
+
+class DataOrganizer(Dataset):
+    def __init__(self, X, y, mean, std, accounting=False):
+        self.data = torch.FloatTensor(X.copy())
+        self.targets = torch.LongTensor(y.copy())
+        # whether or not to remove background in output spectra
+        self.accounting = accounting
+
+        self.mean = mean
+        self.std = std
+
+    def __len__(self):
+        return self.data.size(0)
+
+    def __getitem__(self, idx):
+        x = self.data[idx]
+        y = self.targets[idx]
+
+        if self.accounting:
+            x = remove_bckg(x)
+        # normalize all data
+        x = x - self.mean
+        x = torch.where(self.std == 0, x, x/self.std)
+
+        return x, y
+
+
+class MINOSBiaugment(Dataset):
+    def __init__(self, X, y, transforms,
+                 normalization=False, accounting=False):
+        # self.data = pd.read_hdf(data_fpath, key='data')
+        # self.targets = torch.from_numpy(self.data['event'].values)
+        # self.data = torch.from_numpy(self.data[np.arange(1000)].values)
+        self.data = torch.FloatTensor(X.copy())
+        self.targets = torch.LongTensor(y.copy())
+        self.transforms = transforms
+        # whether or not to remove background in output spectra
+        self.accounting = accounting
+
+        # remove background for normalization
+        if self.accounting:
+            print('***************************\
+                   conducting accounting')
+            tmp = remove_bckg(self.data)
+        else:
+            tmp = self.data
+        self.mean = torch.mean(tmp, axis=0)
+        self.std = torch.std(tmp, axis=0)
+        if normalization:
+            print('***************************\
+                   conducting min-max normalization')
+            self.mean = torch.min(tmp, axis=0)[0]
+            self.std = torch.max(tmp, axis=0)[0] - self.mean
+
+    def __len__(self):
+        return self.data.size(0)
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+        spec, target = self.data[index], self.targets[index]
+
+        # if self.transforms is not None:
+        aug1, aug2 = np.random.choice(self.transforms, size=2, replace=False)
+        logging.debug(f'{index}: aug1={aug1} and aug2={aug2}')
+        spec1 = torch.FloatTensor(aug1(spec))
+        spec2 = torch.FloatTensor(aug2(spec))
+
+        # remove background
+        if self.accounting:
+            spec1 = remove_bckg(spec1)
+            spec2 = remove_bckg(spec2)
+        # normalize all data
+        spec1 = spec1 - self.mean
+        spec1 = torch.where(self.std == 0., spec1, spec1/self.std)
+        spec2 = spec2 - self.mean
+        spec2 = torch.where(self.std == 0., spec2, spec2/self.std)
+
+        return (spec1, spec2), target, index
+
+
+class DataBiaugment(Dataset):
+    def __init__(self, X, y, transforms, mean, std, accounting=False):
+        # self.data = pd.read_hdf(data_fpath, key='data')
+        # self.targets = torch.from_numpy(self.data['event'].values)
+        # self.data = torch.from_numpy(self.data[np.arange(1000)].values)
+        self.data = torch.FloatTensor(X.copy())
+        self.targets = torch.LongTensor(y.copy())
+        self.transforms = transforms
+        # whether or not to remove background in output spectra
+        self.accounting = accounting
+
+        self.mean = mean
+        self.std = std
+
+    def __len__(self):
+        return self.data.size(0)
+
+    def __getitem__(self, index):
+        """
+        Args:
+            index (int): Index
+        Returns:
+            tuple: (image, target) where target is index of the target class.
+        """
+        spec, target = self.data[index], self.targets[index]
+
+        # if self.transforms is not None:
+        aug1, aug2 = np.random.choice(self.transforms, size=2, replace=False)
+        logging.debug(f'{index}: aug1={aug1} and aug2={aug2}')
+        spec1 = torch.FloatTensor(aug1(spec))
+        spec2 = torch.FloatTensor(aug2(spec))
+
+        # remove background
+        if self.accounting:
+            spec1 = remove_bckg(spec1)
+            spec2 = remove_bckg(spec2)
+        # normalize all data
+        spec1 = spec1 - self.mean
+        spec1 = torch.where(self.std == 0., spec1, spec1/self.std)
+        spec2 = spec2 - self.mean
+        spec2 = torch.where(self.std == 0., spec2, spec2/self.std)
+
+        return (spec1, spec2), target, index
diff --git a/RadClass/scripts/evaluate.py b/RadClass/scripts/evaluate.py
new file mode 100644
index 0000000..7bce0eb
--- /dev/null
+++ b/RadClass/scripts/evaluate.py
@@ -0,0 +1,132 @@
+'''
+Author: Jordan Stomps
+
+Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+More information found here: https://github.com/ae-foster/pytorch-simclr
+'''
+
+import os
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from tqdm import tqdm
+from torchmetrics import ConfusionMatrix
+
+
+def save_checkpoint(net, clf, critic, epoch, args, script_name):
+    # Save checkpoint.
+    print('Saving..')
+    state = {
+        'net': net.state_dict(),
+        'clf': clf.state_dict(),
+        'critic': critic.state_dict(),
+        'epoch': epoch,
+        'args': vars(args),
+        'script': script_name
+    }
+    if not os.path.isdir('checkpoint'):
+        os.mkdir('checkpoint')
+    destination = os.path.join('./checkpoint', args.filename+'.pth')
+    torch.save(state, destination)
+
+
+def encode_train_set(clftrainloader, device, net):
+    net.eval()
+
+    store = []
+    with torch.no_grad():
+        t = tqdm(enumerate(clftrainloader),
+                 desc='Encoded: **/** ',
+                 total=len(clftrainloader),
+                 bar_format='{desc}{bar}{r_bar}')
+        for batch_idx, (inputs, targets) in t:
+            inputs, targets = inputs.to(device), targets.to(device)
+            representation = net(inputs)
+            store.append((representation, targets))
+
+            t.set_description('Encoded %d/%d' %
+                              (batch_idx, len(clftrainloader)))
+
+    X, y = zip(*store)
+    X, y = torch.cat(X, dim=0), torch.cat(y, dim=0)
+    return X, y
+
+
+def train_clf(X, y, representation_dim, num_classes, device, reg_weight=1e-3):
+    print('\nL2 Regularization weight: %g' % reg_weight)
+    print(f'\tX: min={X.min()} and max={X.max()}')
+
+    criterion = nn.CrossEntropyLoss()
+    n_lbfgs_steps = 500
+
+    # Should be reset after each epoch for a completely independent evaluation
+    clf = nn.Linear(representation_dim, num_classes).to(device)
+    clf_optimizer = optim.LBFGS(clf.parameters(), lr=1e-2)
+    clf.train()
+
+    t = tqdm(range(n_lbfgs_steps),
+             desc='Loss: **** | Train Acc: ****% ',
+             bar_format='{desc}{bar}{r_bar}')
+    for _ in t:
+        def closure():
+            clf_optimizer.zero_grad()
+            raw_scores = clf(X)
+            loss = criterion(raw_scores, y)
+            loss += reg_weight * clf.weight.pow(2).sum()
+            loss.backward()
+
+            _, predicted = raw_scores.max(1)
+            correct = predicted.eq(y).sum().item()
+            # print(f'X={X[0]}\nraw_scores={raw_scores[0]}')
+            # print(f'y={y}')
+            # print(f'\tcorrect ({correct}) from predicted: {predicted}')
+
+            t.set_description('Loss: %.3f | Train Acc: %.3f%% ' %
+                              (loss, 100. * correct / y.shape[0]))
+
+            return loss
+
+        clf_optimizer.step(closure)
+
+    return clf
+
+
+def test(testloader, device, net, clf, n_classes=2):
+    criterion = nn.CrossEntropyLoss()
+    net.eval()
+    clf.eval()
+    test_clf_loss = 0
+    correct = 0
+    total = 0
+    if n_classes > 2:
+        confmat = ConfusionMatrix(task='multiclass', num_classes=n_classes)
+        cmat = torch.zeros(n_classes, n_classes)
+    else:
+        confmat = ConfusionMatrix(task='binary', num_classes=n_classes)
+        cmat = torch.zeros(n_classes, n_classes)
+    with torch.no_grad():
+        t = tqdm(enumerate(testloader),
+                 total=len(testloader),
+                 desc='Loss: **** | Test Acc: ****% ',
+                 bar_format='{desc}{bar}{r_bar}')
+        for batch_idx, (inputs, targets) in t:
+            inputs, targets = inputs.to(device), targets.to(device)
+            representation = net(inputs)
+            # test_repr_loss = criterion(representation, targets)
+            raw_scores = clf(representation)
+            clf_loss = criterion(raw_scores, targets)
+
+            test_clf_loss += clf_loss.item()
+            _, predicted = raw_scores.max(1)
+            total += targets.size(0)
+            correct += predicted.eq(targets).sum().item()
+            cmat += confmat(predicted, targets)
+
+            t.set_description('Loss: %.3f | Test Acc: %.3f%% ' %
+                              (test_clf_loss / (batch_idx + 1),
+                               100. * correct / total))
+
+    acc = 100. * correct / total
+    bacc = 0.5 * ((cmat[0][0] / (cmat[0][0] + cmat[0][1])) +
+                  (cmat[1][1] / (cmat[1][1] + cmat[1][0])))
+    return acc, bacc, cmat, test_clf_loss
diff --git a/RadClass/scripts/scheduler.py b/RadClass/scripts/scheduler.py
new file mode 100644
index 0000000..3fa6197
--- /dev/null
+++ b/RadClass/scripts/scheduler.py
@@ -0,0 +1,32 @@
+'''
+Author: Jordan Stomps
+
+Largely adapted from a PyTorch conversion of SimCLR by Adam Foster.
+More information found here: https://github.com/ae-foster/pytorch-simclr
+'''
+
+import math
+
+from torch.optim.lr_scheduler import _LRScheduler
+
+
+class CosineAnnealingWithLinearRampLR(_LRScheduler):
+
+    def __init__(self, optimizer, T_max, eta_min=0,
+                 last_epoch=-1, ramp_len=10):
+        self.T_max = T_max
+        self.eta_min = eta_min
+        self.ramp_len = ramp_len
+        super(CosineAnnealingWithLinearRampLR, self).__init__(optimizer,
+                                                              last_epoch)
+
+    def get_lr(self):
+        return self._get_closed_form_lr()
+
+    def _get_closed_form_lr(self):
+        cosine_lr = [self.eta_min + (base_lr - self.eta_min) *
+                     (1 + math.cos(math.pi * self.last_epoch / self.T_max)) / 2
+                     for base_lr in self.base_lrs]
+        linear_lr = [base_lr * (1 + self.last_epoch) /
+                     self.ramp_len for base_lr in self.base_lrs]
+        return [min(x, y) for x, y in zip(cosine_lr, linear_lr)]
diff --git a/RadClass/scripts/specTools.py b/RadClass/scripts/specTools.py
new file mode 100644
index 0000000..18e2705
--- /dev/null
+++ b/RadClass/scripts/specTools.py
@@ -0,0 +1,159 @@
+import numpy as np
+import pandas as pd
+import h5py as h
+from typing import List, Optional, Type
+
+
+def integrate_spectral_matrix(
+		S: np.ndarray,
+		integration_time: int,
+		stride: int
+) -> List[np.ndarray]:
+	"""
+	:param S: matrix of 1-sec spectra
+	:param integration_time: desired integration, length of each spectral block
+	:param stride: shift between spectral blocks
+	:return: list of integrated spectra, each as a np.ndarray (1,n) vector for n channels
+	"""
+	# set limits for loop
+	last_row = S.shape[0]
+	current_row = 0
+	spectra = []
+	while (current_row + integration_time) <= last_row:
+		spectra.append(
+			np.atleast_2d(np.sum(S[current_row:current_row+integration_time, :], axis=0)).reshape(1, -1)
+		)
+		current_row += stride
+	return spectra
+
+"""
+def remove_event_counter(df):
+	# removes the trailing counter from the event label
+	def relabel_row(r):
+		return '_'.join(r['event'].split('_')[:-1])
+
+	df['event'] = df.apply(relabel_row, axis=1)
+
+	return df
+"""
+
+def separate_event_counter(df):
+	"""make event instance/counter a separate column for tracking/parsing"""
+	def _helper(r):
+		split_event = r['event'].split('_')
+		r['event'] = '-'.join(split_event[:-1])
+		r['instance'] = split_event[-1]
+		return r
+
+	df = df.apply(_helper, axis=1)
+	return df
+
+
+def resample_spectra(
+        df: pd.DataFrame,
+        n: int,
+        n_channels=1000
+) -> pd.DataFrame:
+    """
+    :param df: dataframe containing m spectra as rows and labels
+    :param n: number of resamples for each spectrum
+    :return: list of m * (n + 1) spectra
+    """
+    def _resample(spec):
+        """performs single resample"""
+        return np.array([np.random.poisson(lam=channel) for channel in spec])
+
+    # combine labels to make repeating easier
+    unsplit_columns = df.columns
+    print("Before combine_label():\n")
+    print(df.columns)
+    df = combine_label(df)
+    print("\n\nAfter combine_label()\n")
+    print(df.columns)
+
+    spectra = np.array(df.iloc[:, :n_channels])
+    # note we assume our label is in one columns
+    labels = np.array(df.iloc[:, n_channels])
+
+    # note np.repeat() repeats each element rather than repeating the whole array
+    new_spectra = [_resample(spectrum) for spectrum in spectra for _ in range(n) ]
+    new_labels = np.concatenate([labels.reshape(-1, 1), np.repeat(labels, n).reshape(-1, 1)], axis=0)
+    combined_data = np.concatenate(
+        [np.concatenate([spectra, new_spectra], axis=0), new_labels], axis=1
+    )
+
+    # undo label combine to allow separate tracking of event, event counter, and detector/station
+    # I might be able to skip the next line
+    df_ = pd.DataFrame(data=combined_data, columns=df.columns)
+    df_ = split_labels(df_)
+    #print("After split_labels()\n")
+    #print(df.columns)
+    #print("Size of combined data...")
+
+    return df_
+
+
+def combine_label(df):
+    """combines event and detector to make resampling easier"""
+    def _combine_helper(r):
+        return '_'.join([r['event'], r['detector'], r['instance']])
+
+    df['label'] = df.apply(_combine_helper, axis=1)
+    df = df.drop(['event', 'detector', 'instance'], axis=1)
+    return df
+
+
+def split_labels(df):
+	"""opposite of combine labels to do after resampling"""
+	def _split_helper(r):
+		r['event'] = r['label'].split('_')[0]
+		r['detector'] = r['label'].split('_')[1]
+		r['instance'] = r['label'].split('_')[2]
+		return r
+
+	df = df.apply(_split_helper, axis=1)
+	df = df.drop('label', axis=1)
+	
+	return df
+
+
+def read_h_file(
+		file: str,
+		integration_time: int,
+		stride: int,
+		resample: bool=False,
+		n: int=None
+) -> pd.DataFrame:
+	"""
+	extract time-integrated spectra for multiple events and detectors from hdf5 file
+	:param file: hdf5 file as string
+	:param integration_time: desired integration for spectral processing
+	:param stride: stride for moving-window time integration
+	:param resample: choose to resample spectra to generate additional 
+	:return: flattened pd.dataFrame of spectra and associated information/labels
+	"""
+	df_list = []
+
+	cols = [f'channel {i}' for i in range(1, 1001)] # number for channels ugly hardcoded
+
+	f = h.File(file, 'r')
+	events = list(f.keys())
+	for event in events:
+		print(f'Processing {event} events')
+		current_event = f[event]
+		nodes = list(current_event.keys())
+		for node in nodes:
+			spectral_matrix = np.array(current_event[node]['spectra'])
+			spectra_list = integrate_spectral_matrix(spectral_matrix, integration_time, stride)
+			for s in spectra_list:
+				df_ = pd.DataFrame(data=s, columns=cols)
+				df_['event'] = event
+				df_['detector'] = node
+				df_list.append(df_)
+			#return [np.array(spectra_list[0]), event, node]
+
+	df = pd.concat(df_list)
+	df = separate_event_counter(df)
+
+	return df
+
diff --git a/RadClass/scripts/transforms.py b/RadClass/scripts/transforms.py
new file mode 100644
index 0000000..d294a62
--- /dev/null
+++ b/RadClass/scripts/transforms.py
@@ -0,0 +1,187 @@
+from .augs import DANSE
+import numpy as np
+import pandas as pd
+from scipy.stats import loguniform
+import torch
+
+# import sys
+# import os
+# sys.path.append(os.getcwd()+'/scripts/')
+
+
+class Background(torch.nn.Module):
+    def __init__(self, bckg_dir, mode='beads'):
+        super().__init__()
+        # _log_api_usage_once(self)
+
+        self.mode = mode
+        self.bckg = pd.read_hdf(bckg_dir, key='data')
+        self.bckg_dir = bckg_dir
+
+    def forward(self, X):
+        X = X.detach().numpy()
+        bckg_idx = np.random.choice(self.bckg.shape[0])
+        ibckg = self.bckg.iloc[bckg_idx][
+            np.arange(1000)].to_numpy().astype(float)
+        auger = DANSE()
+        return auger.background(X,
+                                ibckg,
+                                subtraction=True,
+                                event_idx=None,
+                                mode='beads')
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}\
+            (bckg_dir={self.bckg_dir}, mode={self.mode})"
+
+
+class Resample(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, X):
+        X = X.detach().numpy()
+        auger = DANSE()
+        return auger.resample(np.absolute(X))
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}"
+
+
+class Sig2Bckg(torch.nn.Module):
+    def __init__(self, bckg_dir, mode='beads', r=(0.5, 2.)):
+        super().__init__()
+        # _log_api_usage_once(self)
+
+        self.mode = mode
+        self.bckg = pd.read_hdf(bckg_dir, key='data')
+        self.bckg_dir = bckg_dir
+        self.r = r
+
+    def forward(self, X):
+        X = X.detach().numpy()
+        bckg_idx = np.random.choice(self.bckg.shape[0])
+        ibckg = self.bckg.iloc[bckg_idx][
+            np.arange(1000)].to_numpy().astype(float)
+        auger = DANSE()
+        return auger.sig2bckg(X,
+                              ibckg,
+                              r=self.r,
+                              subtraction=True,
+                              event_idx=None,
+                              mode='beads')
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}\
+            (bckg_dir={self.bckg_dir}, mode={self.mode}, r={self.r})"
+
+
+class Nuclear(torch.nn.Module):
+    def __init__(self, binE=3.):
+        super().__init__()
+
+        self.binE = binE
+
+    def forward(self, X):
+        X = X.detach().numpy()
+        nuclides = {'K40Th232': [1460, 2614],
+                    'U238': [609],
+                    'Bi214': [1764, 2204],
+                    'Pb214': [295, 352],
+                    'Ar41': [1294]}
+        nkey = np.random.choice(np.array(list(nuclides.keys())))
+        for e in nuclides[nkey]:
+            chE = e/self.binE
+            roi = [int(max(chE-int(len(X)*0.01), 0)),
+                   int(min(chE+int(len(X)*0.01), len(X)-1))]
+            auger = DANSE()
+            try:
+                X = auger.nuclear(roi,
+                                  X,
+                                  escape=False,
+                                  binE=self.binE,
+                                  subtract=False)
+            # ignore unsuccessful peak fits
+            except (RuntimeError, IndexError, ValueError):
+                continue
+        return X
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(binE={self.binE})"
+
+
+class Resolution(torch.nn.Module):
+    def __init__(self, multiplier=(0.5, 1.5)):
+        super().__init__()
+
+        if multiplier[0] <= 0 or multiplier[1] <= 0:
+            raise ValueError('{} must be positive.'.format(multiplier))
+        self.multiplier = multiplier
+
+    def forward(self, X):
+        X = X.detach().numpy()
+        auger = DANSE()
+        success = False
+        for i in range(100):
+            try:
+                roi = auger.find_res(X)
+            # ignore unsuccessful peak fits
+            except (RuntimeError, IndexError, ValueError):
+                success = False
+                continue
+            multiplier = loguniform.rvs(self.multiplier[0],
+                                        self.multiplier[1],
+                                        size=1)
+            conserve = np.random.choice([True, False])
+            try:
+                X = auger.resolution(roi,
+                                     X.copy(),
+                                     multiplier=multiplier,
+                                     conserve=conserve)
+                success = True
+            # ignore unsuccessful peak fits
+            except (RuntimeError, IndexError, ValueError):
+                success = False
+                continue
+            if success:
+                break
+            if i == 99:
+                print('NOTE: resolution aug failed...')
+        return X
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}(multiplier={self.multiplier})"
+
+
+class Mask(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, X):
+        X = X.detach().numpy()
+        auger = DANSE()
+        return auger.mask(X,
+                          mode='block',
+                          block=(0, np.random.randint(20, 100)))
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}"
+
+
+class GainShift(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, X):
+        X = X.detach().numpy()
+        auger = DANSE()
+
+        k = np.random.randint(-5, 5)
+        lam = np.random.uniform(-5, 5)
+        new, _ = auger.gain_shift(X, bins=None, lam=lam, k=k, mode='resample')
+        if len(new) < len(X):
+            new = np.append(new, np.repeat(0, 1000-len(new)))
+        return new[:len(X)]
+
+    def __repr__(self) -> str:
+        return f"{self.__class__.__name__}"
diff --git a/RadClass/scripts/utils.py b/RadClass/scripts/utils.py
new file mode 100644
index 0000000..7d5b2c6
--- /dev/null
+++ b/RadClass/scripts/utils.py
@@ -0,0 +1,398 @@
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+import time
+import joblib
+import glob
+# For hyperparameter optimization
+from hyperopt import Trials, tpe, fmin, trials_from_docs
+from functools import partial
+# diagnostics
+from sklearn.metrics import confusion_matrix
+# pca
+from sklearn.preprocessing import StandardScaler
+from sklearn.decomposition import PCA
+# Cross Validation
+from sklearn.model_selection import KFold, StratifiedKFold
+
+
+class EarlyStopper:
+    '''
+    Early stopping mechanism for neural networks.
+    Code adapted from user "isle_of_gods" from StackOverflow:
+    https://stackoverflow.com/questions/71998978/early-stopping-in-pytorch
+    Use this class to break a training loop if the validation loss is low.
+    Inputs:
+    patience: integer; forces stop if validation loss has not improved
+        for some time
+    min_delta: "fudge value" for how much loss to tolerate before stopping
+    '''
+
+    def __init__(self, patience=1, min_delta=0):
+        self.patience = patience
+        self.min_delta = min_delta
+        self.counter = 0
+        self.min_validation_loss = np.inf
+
+    def early_stop(self, validation_loss):
+        '''
+        Tests for the early stopping condition if the validation loss
+        has not improved for a certain period of time (patience).
+        Inputs:
+        validation_loss: typically a float value for the loss function of
+            a neural network training loop
+        '''
+
+        if validation_loss < self.min_validation_loss:
+            # keep track of the smallest validation loss
+            # if it has been beaten, restart patience
+            self.min_validation_loss = validation_loss
+            self.counter = 0
+        elif validation_loss > (self.min_validation_loss + self.min_delta):
+            # keep track of whether validation loss has been decreasing
+            # by a tolerable amount
+            self.counter += 1
+            if self.counter >= self.patience:
+                return True
+        return False
+
+
+def run_hyperopt(space, model, data, testset, metric='loss', mode='min',
+                 max_evals=50, verbose=True, trials=None):
+    '''
+    Runs hyperparameter optimization on a model given a parameter space.
+    Inputs:
+    space: dictionary with each hyperparameter as keys and values being the
+        range of parameter space (see hyperopt docs for defining a space)
+    mode: function that takes params dictionary, trains a specified ML model
+        and returns the optimization loss function, model, and other
+        attributes (e.g. accuracy on evaluation set)
+    max_eval: (int) run hyperparameter optimization for max_val iterations
+    njobs: (int) number of hyperparameter training iterations to complete
+        in parallel. Default is 4, but personal computing resources may
+        require less or allow more.
+    verbose: report best and worse loss/accuracy
+
+    Returns:
+    best: dictionary with returns from model function, including best loss,
+        best trained model, best parameters, etc.
+    '''
+
+    # algo = HyperOptSearch(metric=metric, mode=mode)
+    # algo = ConcurrencyLimiter(algo, max_concurrent=njobs)
+
+    if trials is None:
+        trials = Trials()
+    else:
+        trials = joblib.load(trials)
+        max_evals = len(trials)+1
+
+    # wrap data into objective function
+    fmin_objective = partial(model, data=data, testset=testset)
+
+    # trainable = tune.with_resources(
+    #     tune.with_parameters(model, data=data, testset=testset),
+    #     {"cpu": num_workers+1})
+    # run hyperopt
+    # tuner = tune.Tuner(
+    #             trainable,
+    #             param_space=space,
+    #             # run_config=air.RunConfig(stop=TimeStopper()),
+    #             tune_config=tune.TuneConfig(num_samples=max_evals,
+    #                                         metric=metric,
+    #                                         mode=mode,
+    #                                         search_alg=algo),
+    #                                         # time_budget_s=3600),
+    #         )
+
+    # results = tuner.fit()
+
+    fmin(fmin_objective,
+         space,
+         algo=tpe.suggest,
+         max_evals=max_evals,
+         trials=trials)
+
+    # of all trials, find best and worst loss/accuracy from optimization
+    # if mode == 'min':
+    #     worst_mode = 'max'
+    # else:
+    #     worst_mode = 'min'
+    # best = results.get_best_result(metric=metric, mode=mode)
+    # worst = results.get_best_result(metric=metric, mode=worst_mode)
+    best = trials.results[np.argmin([r['loss'] for r in trials.results])]
+    worst = trials.results[np.argmax([r['loss'] for r in trials.results])]
+    # best_checkpoint = best.checkpoint
+    # best = best.metrics
+    # worst_checkpoint = worst.checkpoint
+    # worst = worst.metrics
+
+    if verbose:
+        print('best metrics:')
+        print('\taccuracy:', best['accuracy'])
+        # print('\tprecision:', best['precision'])
+        # print('\trecall:', best['recall'])
+        # print('\tscore:', best['score'])
+        print('\tloss:', best['loss'])
+        print('\tparams:', best['params'])
+        # print('\tmodel:', best['model'])
+
+        print('worst metrics:')
+        print('\taccuracy:', worst['accuracy'])
+        # print('\tprecision:', worst['precision'])
+        # print('\trecall:', worst['recall'])
+        # print('\tscore:', worst['score'])
+        print('\tloss:', worst['loss'])
+        print('\tparams:', worst['params'])
+        # print('\tmodel:', worst['model'])
+
+    return best, worst, trials  # , best_checkpoint, worst_checkpoint
+
+
+def combine_trials(filenames, save=True):
+    '''
+    Combine a group of hyperopt.Trials() files into
+    one file object.
+    filenames: str; path and filename to stored Trials object.
+        Use bash * casing for multiple objects.
+        e.g. "/home/user/*_trials.joblib"
+    '''
+
+    files = glob.glob(filenames)
+    trials = []
+    for file in files:
+        trials = trials + list(joblib.load(file)[2])
+    trials_merged = trials_from_docs(trials)
+    if save:
+        joblib.dump(trials_merged, './trials_merged.joblib')
+    return trials_merged
+
+
+def cross_validation(model, X, y, params, n_splits=3,
+                     stratified=False, random_state=None):
+    '''
+    Perform K-Fold cross validation using sklearn and a given model.
+    The model *must* have a fresh_start method (see models in RadClass/models).
+    fresh_start() is used instead of train() to be agnostic to the data needed
+        for training (fresh_start requires a data_dict whereas each model's
+        train could take different combinations of labeled & unlabeled data).
+        This also avoids the need to do hyperparameter optimization (and
+        therefore many training epochs) for every K-Fold.
+    NOTE: fresh_start returns the model and results in a dictionary but
+        does not overwrite/save the model to the respective class.
+        You can manually overwrite using model.model = return.model
+    Hyperparameter optimization (model.optimize) can be done before or after
+        cross validation to specify the (optimal) parameters used by the model
+        since they are required here.
+    NOTE: Fixed default to shuffle data during cross validation splits.
+        (See sklearn cross validation docs for more info.)
+    NOTE: Unlabeled data, if provided, will always be included in the training
+        dataset. This means that this cross validation implementation is
+        susceptible to bias in the unlabeled data distribution. To test for
+        this bias, a user can manually run cross validation as a parent to
+        calling this function, splitting the unlabeled data and adding
+        different folds into X.
+    Inputs:
+    model: ML model class object (e.g. RadClass/models).
+        Must have a fresh_start() method.
+        NOTE: If the model expects unlabeled data but unlabed data is not
+        provided in X/y, an error will likely be thrown when training the model
+        through fresh_start.
+    X: array of feature vectors (rows of individual instances, cols of vectors)
+        This should include all data for training and testing (since the
+        testing subset will be split by cross validation), including unlabeled
+        data if needed/used.
+    y: array/vector of labels for X. If including unlabeled data, use -1.
+        This should have the same order as X. That is, each row index in X
+        has an associated label with the same index in y.
+    params: dictionary of hyperparameters. Will depend on model used.
+        Alternatively, use model.params for models in RadClass/models
+    n_splits: int number of splits for K-Fold cross validation
+    stratified: bool; if True, balance the K-Folds to have roughly the same
+        proportion of samples from each class.
+    random_state: seed for reproducility.
+    '''
+
+    # return lists
+    accs = []
+    reports = []
+
+    if stratified:
+        cv = StratifiedKFold(n_splits=n_splits, random_state=random_state,
+                             shuffle=True)
+    else:
+        cv = KFold(n_splits=n_splits, random_state=random_state,
+                   shuffle=True)
+
+    # separate unlabeled data if included
+    Ux = None
+    Uy = None
+    if -1 in y:
+        U_idx = np.where(y == -1)[0]
+        L_idx = np.where(y != -1)[0]
+        Ux = X[U_idx]
+        Uy = y[U_idx]
+        Lx = X[L_idx]
+        Ly = y[L_idx]
+    else:
+        Lx = X
+        Ly = y
+    # conduct K-Fold cross validation
+    cv.get_n_splits(Lx, Ly)
+    for train_idx, test_idx in cv.split(Lx, Ly):
+        trainx, testx = Lx[train_idx], Lx[test_idx]
+        trainy, testy = Ly[train_idx], Ly[test_idx]
+
+        # construct data dictionary for training in fresh_start
+        data_dict = {'trainx': trainx, 'trainy': trainy,
+                     'testx': testx, 'testy': testy}
+        if Ux is not None:
+            data_dict['Ux'] = Ux
+            data_dict['Uy'] = Uy
+        results = model.fresh_start(params, data_dict)
+        accs = np.append(accs, results['accuracy'])
+        reports = np.append(reports, results)
+
+    # report cross validation results
+    print('Average accuracy:', np.mean(accs))
+    print('Max accuracy:', np.max(accs))
+    print('All accuracy:', accs)
+    # return the results of fresh_start for the max accuracy model
+    return reports[np.argmax(accs)]
+
+
+def pca(Lx, Ly, Ux, Uy, filename):
+    '''
+    A function for computing and plotting 2D PCA.
+    Inputs:
+    Lx: labeled feature data.
+    Ly: class labels for labeled data.
+    Ux: unlabeled feature data.
+    Uy: labels for unlabeled data (all labels should be -1).
+    filename: filename for saved plot.
+        The file must be saved with extension .joblib.
+        Added to filename if not included as input.
+    '''
+
+    plt.rcParams.update({'font.size': 20})
+    # only saving colors for binary classification with unlabeled instances
+    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
+
+    pcadata = np.append(Lx, Ux, axis=0)
+    normalizer = StandardScaler()
+    x = normalizer.fit_transform(pcadata)
+    print(np.mean(pcadata), np.std(pcadata))
+    print(np.mean(x), np.std(x))
+
+    pca = PCA(n_components=2)
+    pca.fit_transform(x)
+    print(pca.explained_variance_ratio_)
+    print(pca.singular_values_)
+    print(pca.components_)
+
+    principalComponents = pca.fit_transform(x)
+
+    fig, ax = plt.subplots(figsize=(10, 8))
+    ax.set_xlabel('Principal Component 1', fontsize=15)
+    ax.set_ylabel('Principal Component 2', fontsize=15)
+    for idx, color in col_dict.items():
+        indices = np.where(np.append(Ly, Uy, axis=0) == idx)[0]
+        ax.scatter(principalComponents[indices, 0],
+                   principalComponents[indices, 1],
+                   c=color,
+                   label='class '+str(idx))
+    ax.grid()
+    ax.legend()
+
+    if filename[-4:] != '.png':
+        filename += '.png'
+    fig.tight_layout()
+    fig.savefig(filename)
+
+
+def multiD_pca(Lx, Ly, Ux, Uy, filename, n=2):
+    '''
+    A function for computing and plotting n-dimensional PCA.
+    Inputs:
+    Lx: labeled feature data.
+    Ly: class labels for labeled data.
+    Ux: unlabeled feature data.
+    Uy: labels for unlabeled data (all labels should be -1).
+    filename: filename for saved plot.
+        The file must be saved with extension .joblib.
+        Added to filename if not included as input.
+    n: number of singular values to include in PCA analysis.
+    '''
+
+    plt.rcParams.update({'font.size': 20})
+    # only saving colors for binary classification with unlabeled instances
+    col_dict = {-1: 'tab:gray', 0: 'tab:orange', 1: 'tab:blue'}
+
+    pcadata = np.append(Lx, Ux, axis=0)
+    normalizer = StandardScaler()
+    x = normalizer.fit_transform(pcadata)
+    print(np.mean(pcadata), np.std(pcadata))
+    print(np.mean(x), np.std(x))
+
+    n = 2
+    pca = PCA(n_components=n)
+    principalComponents = pca.fit_transform(x)
+    print(pca.explained_variance_ratio_)
+    print(pca.singular_values_)
+    print(pca.components_)
+
+    alph = ["A", "B", "C", "D", "E", "F", "G", "H",
+            "I", "J", "K", "L", "M", "N", "O", "P",
+            "Q", "R", "S", "T", "U", "V", "W", "X",
+            "Y", "Z"]
+    jobs = alph[:n]
+
+    fig, axes = plt.subplots(n, n, figsize=(15, 15))
+
+    for row in range(axes.shape[0]):
+        for col in range(axes.shape[1]):
+            ax = axes[row, col]
+            if row == col:
+                ax.tick_params(
+                    axis='both', which='both',
+                    bottom='off', top='off',
+                    labelbottom='off',
+                    left='off', right='off',
+                    labelleft='off'
+                )
+                ax.text(0.5, 0.5, jobs[row], horizontalalignment='center')
+            else:
+                for idx, color in col_dict.items():
+                    indices = np.where(np.append(Ly, Uy, axis=0) == idx)[0]
+                    ax.scatter(principalComponents[indices, row],
+                               principalComponents[indices, col],
+                               c=color,
+                               label='class '+str(idx))
+    fig.tight_layout()
+    if filename[-4:] != '.png':
+        filename += '.png'
+    fig.savefig(filename)
+
+
+def plot_cf(testy, predy, title, filename):
+    '''
+    Uses sklearn metric to compute a confusion matrix for visualization
+    Inputs:
+    testy: array/vector with ground-truth labels for test/evaluation set
+    predy: array/vector with predicted sample labels from trained model
+    title: string title for plot
+    filename: string with extension for confusion matrix file
+    '''
+
+    cf_matrix = confusion_matrix(testy, predy)
+    ax = sns.heatmap(cf_matrix, annot=True, cmap='Blues')
+
+    ax.set_title(title)
+    ax.set_xlabel('\nPredicted Values')
+    ax.set_ylabel('Actual Values ')
+
+    # Ticket labels - List must be in alphabetical order
+    ax.xaxis.set_ticklabels(['0(SNM)', '1(other)'])
+    ax.yaxis.set_ticklabels(['0(SNM)', '1(other)'])
+    # Save the visualization of the Confusion Matrix.
+    plt.savefig(filename)
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/contrastive-environment.yml b/contrastive-environment.yml
new file mode 100644
index 0000000..9e7024f
--- /dev/null
+++ b/contrastive-environment.yml
@@ -0,0 +1,248 @@
+name: contrastive
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - abseil-cpp=20211102.0=hd4dd3e8_0
+  - absl-py=1.4.0=py311h06a4308_0
+  - aiohttp=3.8.3=py311h5eee18b_0
+  - aiosignal=1.2.0=pyhd3eb1b0_0
+  - appdirs=1.4.4=pyhd3eb1b0_0
+  - asttokens=2.2.1=pyhd8ed1ab_0
+  - async-timeout=4.0.2=py311h06a4308_0
+  - attrs=22.1.0=py311h06a4308_0
+  - backcall=0.2.0=pyh9f0ad1d_0
+  - backports=1.0=pyhd8ed1ab_3
+  - backports.functools_lru_cache=1.6.5=pyhd8ed1ab_0
+  - blas=1.0=mkl
+  - blinker=1.4=py311h06a4308_0
+  - blosc=1.21.3=h6a678d5_0
+  - bottleneck=1.3.5=py311hbed6279_0
+  - brotli=1.0.9=h5eee18b_7
+  - brotli-bin=1.0.9=h5eee18b_7
+  - brotlipy=0.7.0=py311h5eee18b_1002
+  - bzip2=1.0.8=h7b6447c_0
+  - c-ares=1.19.0=h5eee18b_0
+  - c-blosc2=2.10.5=h80c7b02_0
+  - ca-certificates=2023.11.17=hbcca054_0
+  - cachetools=4.2.2=pyhd3eb1b0_0
+  - certifi=2023.11.17=py311h06a4308_0
+  - cffi=1.15.1=py311h5eee18b_3
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - click=8.0.4=py311h06a4308_0
+  - comm=0.1.4=pyhd8ed1ab_0
+  - contourpy=1.0.5=py311hdb19cb5_0
+  - cryptography=41.0.2=py311h22a60cf_0
+  - cuda-cudart=11.7.99=0
+  - cuda-cupti=11.7.101=0
+  - cuda-libraries=11.7.1=0
+  - cuda-nvrtc=11.7.99=0
+  - cuda-nvtx=11.7.91=0
+  - cuda-runtime=11.7.1=0
+  - cycler=0.11.0=pyhd3eb1b0_0
+  - dbus=1.13.18=hb2f20db_0
+  - debugpy=1.6.7=py311h6a678d5_0
+  - decorator=5.1.1=pyhd8ed1ab_0
+  - executing=1.2.0=pyhd8ed1ab_0
+  - expat=2.4.9=h6a678d5_0
+  - ffmpeg=4.3=hf484d3e_0
+  - filelock=3.9.0=py311h06a4308_0
+  - fontconfig=2.14.1=h52c9d5c_1
+  - fonttools=4.25.0=pyhd3eb1b0_0
+  - freetype=2.12.1=h4a9f257_0
+  - frozenlist=1.3.3=py311h5eee18b_0
+  - giflib=5.2.1=h5eee18b_3
+  - glib=2.69.1=he621ea3_2
+  - gmp=6.2.1=h295c915_3
+  - gmpy2=2.1.2=py311hc9b5ff0_0
+  - gnutls=3.6.15=he1e5248_0
+  - google-auth=2.6.0=pyhd3eb1b0_0
+  - google-auth-oauthlib=0.5.2=py311h06a4308_0
+  - grpc-cpp=1.48.2=he1ff14a_1
+  - grpcio=1.48.2=py311he1ff14a_1
+  - gst-plugins-base=1.14.1=h6a678d5_1
+  - gstreamer=1.14.1=h5eee18b_1
+  - h5py=3.9.0=py311hdd6beaf_0
+  - hdf5=1.12.1=h2b7332f_3
+  - icu=58.2=he6710b0_3
+  - idna=3.4=py311h06a4308_0
+  - importlib-metadata=6.8.0=pyha770c72_0
+  - importlib_metadata=6.8.0=hd8ed1ab_0
+  - intel-openmp=2023.1.0=hdb19cb5_46305
+  - ipykernel=6.25.0=pyh71e2992_0
+  - ipython=8.14.0=pyh41d4057_0
+  - jedi=0.19.0=pyhd8ed1ab_0
+  - jinja2=3.1.2=py311h06a4308_0
+  - joblib=1.2.0=py311h06a4308_0
+  - jpeg=9e=h5eee18b_1
+  - jupyter_client=8.3.0=pyhd8ed1ab_0
+  - jupyter_core=4.12.0=py311h38be061_0
+  - kiwisolver=1.4.4=py311h6a678d5_0
+  - krb5=1.20.1=h143b758_1
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - lerc=3.0=h295c915_0
+  - libbrotlicommon=1.0.9=h5eee18b_7
+  - libbrotlidec=1.0.9=h5eee18b_7
+  - libbrotlienc=1.0.9=h5eee18b_7
+  - libclang=10.0.1=default_hb85057a_2
+  - libcublas=11.10.3.66=0
+  - libcufft=10.7.2.124=h4fbf590_0
+  - libcufile=1.7.1.12=0
+  - libcurand=10.3.3.129=0
+  - libcurl=8.2.1=h251f7ec_0
+  - libcusolver=11.4.0.1=0
+  - libcusparse=11.7.4.91=0
+  - libdeflate=1.17=h5eee18b_0
+  - libedit=3.1.20221030=h5eee18b_0
+  - libev=4.33=h7f8727e_1
+  - libevent=2.1.12=hdbd6064_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgfortran-ng=11.2.0=h00389a5_1
+  - libgfortran5=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.16=h7f8727e_2
+  - libidn2=2.3.4=h5eee18b_0
+  - libllvm10=10.0.1=hbcb73fb_5
+  - libnghttp2=1.52.0=h2d74bed_1
+  - libnpp=11.7.4.75=0
+  - libnvjpeg=11.8.0.2=0
+  - libpng=1.6.39=h5eee18b_0
+  - libpq=12.15=hdbd6064_1
+  - libprotobuf=3.20.3=he621ea3_0
+  - libsodium=1.0.18=h36c2ea0_1
+  - libssh2=1.10.0=hdbd6064_2
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.5.0=h6a678d5_2
+  - libunistring=0.9.10=h27cfd23_0
+  - libuuid=1.41.5=h5eee18b_0
+  - libwebp=1.2.4=h11a3e52_1
+  - libwebp-base=1.2.4=h5eee18b_1
+  - libxcb=1.15=h7f8727e_0
+  - libxkbcommon=1.0.1=hfa300c1_0
+  - libxml2=2.9.14=h74e7548_0
+  - libxslt=1.1.35=h4e12654_0
+  - lightning-utilities=0.10.0=pyhd8ed1ab_0
+  - lz4-c=1.9.4=h6a678d5_0
+  - lzo=2.10=h7b6447c_2
+  - markdown=3.4.1=py311h06a4308_0
+  - markupsafe=2.1.1=py311h5eee18b_0
+  - matplotlib=3.7.1=py311h06a4308_1
+  - matplotlib-base=3.7.1=py311ha02d727_1
+  - matplotlib-inline=0.1.6=pyhd8ed1ab_0
+  - mkl=2023.1.0=h6d00ec8_46342
+  - mkl-service=2.4.0=py311h5eee18b_1
+  - mkl_fft=1.3.6=py311ha02d727_1
+  - mkl_random=1.2.2=py311ha02d727_1
+  - mpc=1.1.0=h10f8cd9_1
+  - mpfr=4.0.2=hb69a4c5_1
+  - mpmath=1.3.0=py311h06a4308_0
+  - multidict=6.0.2=py311h5eee18b_0
+  - munkres=1.1.4=py_0
+  - ncurses=6.4=h6a678d5_0
+  - nest-asyncio=1.5.6=pyhd8ed1ab_0
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.1=py311h06a4308_0
+  - nspr=4.35=h6a678d5_0
+  - nss=3.89.1=h6a678d5_0
+  - numexpr=2.8.4=py311h65dcdc2_1
+  - numpy=1.25.0=py311h08b1b3b_0
+  - numpy-base=1.25.0=py311hf175353_0
+  - oauthlib=3.2.2=py311h06a4308_0
+  - openh264=2.1.1=h4ff587b_0
+  - openssl=3.0.12=h7f8727e_0
+  - packaging=23.0=py311h06a4308_0
+  - pandas=1.5.3=py311hba01205_0
+  - parso=0.8.3=pyhd8ed1ab_0
+  - pcre=8.45=h295c915_0
+  - pexpect=4.8.0=pyh1a96a4e_2
+  - pickleshare=0.7.5=py_1003
+  - pillow=9.4.0=py311h6a678d5_0
+  - pip=23.2.1=py311h06a4308_0
+  - ply=3.11=py311h06a4308_0
+  - pooch=1.4.0=pyhd3eb1b0_0
+  - prompt-toolkit=3.0.39=pyha770c72_0
+  - prompt_toolkit=3.0.39=hd8ed1ab_0
+  - protobuf=3.20.3=py311h6a678d5_0
+  - psutil=5.9.0=py311h5eee18b_0
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - py-cpuinfo=9.0.0=py311h06a4308_0
+  - pyasn1=0.4.8=pyhd3eb1b0_0
+  - pyasn1-modules=0.2.8=py_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pygments=2.15.1=pyhd8ed1ab_0
+  - pyjwt=2.4.0=py311h06a4308_0
+  - pyopenssl=23.2.0=py311h06a4308_0
+  - pyparsing=3.0.9=py311h06a4308_0
+  - pyqt=5.15.7=py311h6a678d5_0
+  - pyqt5-sip=12.11.0=py311h6a678d5_0
+  - pysocks=1.7.1=py311h06a4308_0
+  - pytables=3.8.0=py311hb8ae3fc_3
+  - python=3.11.4=h955ad1f_0
+  - python-dateutil=2.8.2=pyhd3eb1b0_0
+  - python_abi=3.11=2_cp311
+  - pytorch=2.0.1=py3.11_cuda11.7_cudnn8.5.0_0
+  - pytorch-cuda=11.7=h778d358_5
+  - pytorch-mutex=1.0=cuda
+  - pytz=2022.7=py311h06a4308_0
+  - pyzmq=25.1.0=py311h6a678d5_0
+  - qt-main=5.15.2=h327a75a_7
+  - qt-webengine=5.15.9=hd2b0992_4
+  - qtwebkit=5.212=h4eab89a_4
+  - re2=2022.04.01=h295c915_0
+  - readline=8.2=h5eee18b_0
+  - requests=2.31.0=py311h06a4308_0
+  - requests-oauthlib=1.3.0=py_0
+  - rsa=4.7.2=pyhd3eb1b0_1
+  - scikit-learn=1.2.2=py311h6a678d5_1
+  - scipy=1.10.1=py311h08b1b3b_1
+  - seaborn=0.12.2=py311h06a4308_0
+  - setuptools=68.0.0=py311h06a4308_0
+  - sip=6.6.2=py311h6a678d5_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlite=3.41.2=h5eee18b_0
+  - stack_data=0.6.2=pyhd8ed1ab_0
+  - sympy=1.11.1=py311h06a4308_0
+  - tbb=2021.8.0=hdb19cb5_0
+  - tensorboard=2.12.1=py311h06a4308_0
+  - tensorboard-data-server=0.7.0=py311h52d8a92_0
+  - tensorboard-plugin-wit=1.6.0=py_0
+  - threadpoolctl=2.2.0=pyh0d69192_0
+  - tk=8.6.12=h1ccaba5_0
+  - toml=0.10.2=pyhd3eb1b0_0
+  - torchaudio=2.0.2=py311_cu117
+  - torchmetrics=1.2.1=pyhd8ed1ab_0
+  - torchtriton=2.0.0=py311
+  - torchvision=0.15.2=py311_cu117
+  - tornado=6.3.2=py311h5eee18b_0
+  - tqdm=4.65.0=py311h92b7b1e_0
+  - traitlets=5.9.0=pyhd8ed1ab_0
+  - typing_extensions=4.7.1=py311h06a4308_0
+  - tzdata=2023c=h04d1e81_0
+  - urllib3=1.26.16=py311h06a4308_0
+  - wcwidth=0.2.6=pyhd8ed1ab_0
+  - werkzeug=2.2.3=py311h06a4308_0
+  - wheel=0.38.4=py311h06a4308_0
+  - xz=5.4.2=h5eee18b_0
+  - yarl=1.8.1=py311h5eee18b_0
+  - zeromq=4.3.4=h9c3ff4c_1
+  - zipp=3.16.2=pyhd8ed1ab_0
+  - zlib=1.2.13=h5eee18b_0
+  - zlib-ng=2.0.7=h5eee18b_0
+  - zstd=1.5.5=hc292b87_0
+  - pip:
+      - captum==0.6.0
+      - cloudpickle==2.2.1
+      - future==0.18.3
+      - hyperopt==0.2.7
+      - py4j==0.10.9.7
+      - pytorch-metric-learning==2.3.0
+prefix: /home/stomps/miniconda3/envs/contrastive
diff --git a/requirements.txt b/requirements.txt
index 06d1c3a..9ad78c2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,11 @@ numpy
 h5py
 progressbar2
 scipy>=1.7.0
+scikit-learn
+hyperopt
+matplotlib
+seaborn
+joblib
+ray[tune]
+torch
+shadow-ssml
diff --git a/tests/test_BackgroundEstimator.py b/tests/test_BackgroundEstimator.py
index 2d10c89..efc1299 100644
--- a/tests/test_BackgroundEstimator.py
+++ b/tests/test_BackgroundEstimator.py
@@ -77,7 +77,6 @@ def test_write():
     bckg.write(ofilename=ofilename)
 
     results = np.loadtxt(fname=ofilename+'.csv', delimiter=',')
-    print(results)
 
     # the resulting observation should be:
     #   counts * integration / live-time
diff --git a/tests/test_models.py b/tests/test_models.py
new file mode 100644
index 0000000..6ce3a29
--- /dev/null
+++ b/tests/test_models.py
@@ -0,0 +1,466 @@
+# diagnostics
+import numpy as np
+from datetime import datetime, timedelta
+# testing models
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import StandardScaler
+import tests.test_data as test_data
+# hyperopt
+from ray import tune
+# from hyperopt.pyll.base import scope
+# from hyperopt import hp
+# testing utils
+import scripts.utils as utils
+# models
+from models.LogReg import LogReg
+from models.SSML.CoTraining import CoTraining
+from models.SSML.LabelProp import LabelProp
+from models.SSML.ShadowNN import ShadowNN
+from models.SSML.ShadowCNN import ShadowCNN
+# testing write
+import joblib
+import os
+
+# initialize sample data
+start_date = datetime(2019, 2, 2)
+delta = timedelta(seconds=1)
+timestamps = np.arange(start_date,
+                       start_date + (test_data.timesteps * delta),
+                       delta).astype('datetime64[s]').astype('float64')
+
+live = np.full((len(timestamps),), test_data.livetime)
+sample_val = 1.0
+spectra = np.full((len(timestamps), test_data.energy_bins),
+                  np.full((1, test_data.energy_bins), sample_val))
+# setting up for rejected null hypothesis
+rejected_H0_time = np.random.choice(spectra.shape[0],
+                                    test_data.timesteps//2,
+                                    replace=False)
+spectra[rejected_H0_time] = 100.0
+
+labels = np.full((spectra.shape[0],), 0)
+labels[rejected_H0_time] = 1
+
+
+def test_utils():
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    Uy = np.full_like(Uy, -1)
+
+    # test cross validation for supervised data using LogReg
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=X,
+                                           y=y,
+                                           params=params)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # test cross validation for supervised data and StratifiedKFold with LogReg
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=X,
+                                           y=y,
+                                           params=params,
+                                           stratified=True)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # test cross validation for SSML with LabelProp
+    params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
+    model = LabelProp(params=params)
+    max_acc_model = utils.cross_validation(model=model,
+                                           X=np.append(X, Ux, axis=0),
+                                           y=np.append(y, Uy, axis=0),
+                                           params=params,
+                                           stratified=True)
+    assert max_acc_model['accuracy'] >= 0.5
+
+    # data split for data visualization
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    filename = 'test_pca'
+    utils.pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename)
+    os.remove(filename+'.png')
+
+    filename = 'test_multiD_pca'
+    utils.multiD_pca(X_train, y_train, Ux, np.full_like(Uy, -1), filename, n=5)
+    os.remove(filename+'.png')
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+
+    # default behavior
+    model = LogReg(params=None, random_state=0)
+    model.train(X_train, y_train)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    filename = 'test_cf'
+    utils.plot_cf(y_test, pred, title=filename, filename=filename)
+    os.remove(filename+'.png')
+
+
+def test_LogReg():
+    # test saving model input parameters
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = LogReg(params=params)
+
+    assert model.model.max_iter == params['max_iter']
+    assert model.model.tol == params['tol']
+    assert model.model.C == params['C']
+
+    X_train, X_test, y_train, y_test = train_test_split(spectra,
+                                                        labels,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+
+    # default behavior
+    model = LogReg(params=None, random_state=0)
+    model.train(X_train, y_train)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    assert acc > 0.7
+    np.testing.assert_equal(pred, y_test)
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': tune.qrandint(10, 10000, 10),
+             'tol': tune.loguniform(1e-5, 1e-1),
+             'C': tune.uniform(0.001, 1000.0)
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_CoTraining():
+    # test saving model input parameters
+    params = {'max_iter': 2022, 'tol': 0.5, 'C': 5.0}
+    model = CoTraining(params=params)
+
+    assert model.model1.max_iter == params['max_iter']
+    assert model.model1.tol == params['tol']
+    assert model.model1.C == params['C']
+
+    assert model.model2.max_iter == params['max_iter']
+    assert model.model2.tol == params['tol']
+    assert model.model2.C == params['C']
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    # default behavior
+    model = CoTraining(params=None, random_state=0)
+    model.train(X_train, y_train, Ux)
+
+    # testing train and predict methods
+    pred, acc, *_ = model.predict(X_test, y_test)
+
+    assert acc > 0.7
+    np.testing.assert_equal(pred, y_test)
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': tune.qrandint(10, 10000, 10),
+             'tol': tune.loguniform(1e-5, 1e-3),
+             'C': tune.uniform(1.0, 1000.0),
+             'n_samples': tune.qrandint(1, 20, 1),
+             'seed': 0
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+
+    # testing model plotting method
+    filename = 'test_plot'
+    model.plot_cotraining(model1_accs=model.best['model1_acc_history'],
+                          model2_accs=model.best['model2_acc_history'],
+                          filename=filename)
+    os.remove(filename+'.png')
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_LabelProp():
+    # test saving model input parameters
+    params = {'gamma': 10, 'n_neighbors': 15, 'max_iter': 2022, 'tol': 0.5}
+    model = LabelProp(params=params)
+
+    assert model.model.gamma == params['gamma']
+    assert model.model.n_neighbors == params['n_neighbors']
+    assert model.model.max_iter == params['max_iter']
+    assert model.model.tol == params['tol']
+
+    # there should be no normalization on LabelProp data
+    # since it depends on the distances between samples
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # default behavior
+    model = LabelProp(params=None, random_state=0)
+    model.train(X_train, y_train, Ux)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # the default n_neighbors(=7) from sklearn is too large
+    # for the size of this dataset
+    # therefore the accuracy is expected to be poor
+    # a better value for this dataset would be n_neighbors=2
+    # (tested when specifying params in LabelProp.__init__)
+    assert acc >= 0.5
+    # uninteresting test if LabelProp predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = {'max_iter': tune.qrandint(10, 10000, 10),
+             'tol': tune.loguniform(1e-6, 1e-4),
+             'gamma': tune.uniform(1, 50),
+             'n_neighbors': tune.qrandint(1, X_train.shape[0], 1)
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_ShadowNN():
+    # check default parameter settings
+    model = ShadowNN()
+    assert model.params == {'binning': 1}
+    assert model.eaat is not None
+    assert model.eaat_opt is not None
+    assert model.xEnt is not None
+    assert model.input_length == 1000
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    params = {'hidden_layer': 10,
+              'alpha': 0.1,
+              'xi': 1e-3,
+              'eps': 1.0,
+              'lr': 0.1,
+              'momentum': 0.9,
+              'binning': 20}
+    # default behavior
+    model = ShadowNN(params=params, random_state=0)
+    acc_history = model.train(X_train, y_train, Ux, X_test, y_test)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # test for agreement between training and testing
+    # (since the same data is used for diagnostics in this test)
+    assert acc_history[-1] == acc
+
+    # Shadow/PyTorch reports accuracies as percentages
+    # rather than decimals
+    # uninteresting test if Shadow predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = {'hidden_layer': 10,
+             'alpha': 0.1,
+             'xi': 1e-3,
+             'eps': 1.0,
+             'lr': 0.1,
+             'momentum': 0.9,
+             'binning': tune.qrandint(10, 20, 1)
+             }
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)
+
+
+def test_ShadowCNN():
+    # check default parameter settings
+    model = ShadowCNN()
+    assert model.params == {'binning': 1, 'batch_size': 1}
+    assert model.model is not None
+    assert model.eaat is not None
+    assert model.optimizer is not None
+
+    X, Ux, y, Uy = train_test_split(spectra,
+                                    labels,
+                                    test_size=0.5,
+                                    random_state=0)
+    X_train, X_test, y_train, y_test = train_test_split(X,
+                                                        y,
+                                                        test_size=0.2,
+                                                        random_state=0)
+
+    # normalization
+    normalizer = StandardScaler()
+    normalizer.fit(X_train)
+
+    X_train = normalizer.transform(X_train)
+    X_test = normalizer.transform(X_test)
+    Ux = normalizer.transform(Ux)
+
+    params = {'layer1': 2,
+              'kernel': 3,
+              'alpha': 0.1,
+              'xi': 1e-3,
+              'eps': 1.0,
+              'lr': 0.1,
+              'momentum': 0.9,
+              'binning': 20,
+              'batch_size': 4,
+              'drop_rate': 0.1}
+
+    # default behavior
+    model = ShadowCNN(params=params, random_state=0)
+    losscurve, evalcurve = model.train(X_train, y_train, Ux, X_test, y_test)
+
+    # testing train and predict methods
+    pred, acc = model.predict(X_test, y_test)
+
+    # test for agreement between training and testing
+    # (since the same data is used for diagnostics in this test)
+    assert evalcurve[-1] == acc
+
+    # Shadow/PyTorch reports accuracies as percentages
+    # rather than decimals
+    # uninteresting test if Shadow predicts all one class
+    # TODO: make the default params test meaningful
+    assert np.count_nonzero(pred == y_test) > 0
+
+    # testing hyperopt optimize methods
+    space = params
+    space['binning'] = tune.qrandint(10, 20, 1)
+    data_dict = {'trainx': X_train,
+                 'testx': X_test,
+                 'trainy': y_train,
+                 'testy': y_test,
+                 'Ux': Ux
+                 }
+    model.optimize(space, data_dict, max_evals=2, verbose=True)
+
+    assert model.best['accuracy'] >= model.worst['accuracy']
+
+    # testing model plotting method
+    filename = 'test_plot'
+    model.plot_training(losscurve=model.best['losscurve'],
+                        evalcurve=model.best['evalcurve'],
+                        filename=filename)
+    os.remove(filename+'.png')
+
+    # testing model write to file method
+    filename = 'test_LogReg'
+    ext = '.joblib'
+    model.save(filename)
+    model_file = joblib.load(filename+ext)
+    assert model_file.best['params'] == model.best['params']
+
+    os.remove(filename+ext)