diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 528da84f..0565b82e 100755
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.3.0
+    rev: v4.5.0
     hooks:
       - id: check-yaml
       - id: end-of-file-fixer
@@ -8,13 +8,13 @@ repos:
       - id: trailing-whitespace
         exclude: .ipynb_checkpoints|data/Gaia_hp8_densitymap.fits
   - repo: https://github.com/python/black
-    rev: 22.3.0
+    rev: 24.2.0
     hooks:
       - id: black
         pass_filenames: true
         exclude: .ipynb_checkpoints|data|^.fits
   - repo: https://github.com/pycqa/flake8
-    rev: 3.8.4
+    rev: 7.0.0
     hooks:
       - id: flake8
         pass_filenames: true
diff --git a/.requirements/dev.txt b/.requirements/dev.txt
deleted file mode 100644
index b663b35f..00000000
--- a/.requirements/dev.txt
+++ /dev/null
@@ -1,9 +0,0 @@
-deepdiff>=5.0
-gsutil>=4.60
-keras-tuner>=1.0.2
-matplotlib>=3.3
-pytest>=6.1.2
-questionary>=1.8.1
-scikit-learn>=0.24.1
-tensorflow>=2.14.0
-wandb>=0.12.1
diff --git a/.requirements/doc.txt b/.requirements/doc.txt
deleted file mode 100644
index 83f9f419..00000000
--- a/.requirements/doc.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-h5py>=3.10.0
-astropy>=5.2.2
-fast-histogram>=0.11
-fire>=0.4.0
-healpy>=1.16.2
-Jinja2<=3.1
-myst-parser>=0.18.1
-pandas>=1.2
-penquins>=2.3.1
-pre-commit>=3.2.2
-pyyaml>=5.3.1
-sphinx>=4.2
-sphinx_press_theme>=0.8.0
-tdtax>=0.1.6
-tables>=3.7
-pyarrow>=9.0.0
-numba>=0.56.4
-numpy>=1.23,<1.24
-cesium>=0.11.1
-xgboost>=1.7.5
-seaborn>=0.12.2
-pydot>=1.4.2
-jupyter>=1.0.0
diff --git a/README.md b/README.md
index 22778d6d..2d22b201 100644
--- a/README.md
+++ b/README.md
@@ -1,13 +1,14 @@
-# SCoPe: ZTF source classification project
+# SCoPe: ZTF Source Classification Project
 
 [![arXiv](https://img.shields.io/badge/arXiv-2102.11304-brightgreen)](https://arxiv.org/abs/2102.11304)
 [![arXiv](https://img.shields.io/badge/arXiv-2009.14071-brightgreen)](https://arxiv.org/abs/2009.14071)
+[![arXiv](https://img.shields.io/badge/arXiv-2312.00143-brightgreen)](https://arxiv.org/abs/2312.00143)
 
-The documentation is hosted at [https://zwickytransientfacility.github.io/scope-docs/](https://zwickytransientfacility.github.io/scope-docs/). To generate HTML files of the documentation locally, run `./scope.py doc`
+`scope-ml` uses machine learning to classify light curves from the Zwicky Transient Facility ([ZTF](https://www.ztf.caltech.edu)). The documentation is hosted at [https://zwickytransientfacility.github.io/scope-docs/](https://zwickytransientfacility.github.io/scope-docs/). To generate HTML files of the documentation locally, clone the repository and run `scope-doc` after installing.
 
 ## Funding
  We gratefully acknowledge previous and current support from the U.S. National Science Foundation (NSF) Harnessing the Data Revolution (HDR) Institute for <a href="https://a3d3.ai">Accelerated AI Algorithms for Data-Driven Discovery (A3D3)</a> under Cooperative Agreement No. <a href="https://www.nsf.gov/awardsearch/showAward?AWD_ID=2117997">PHY-2117997</a>.
 
  <p align="center">
- <img src="https://github.com/ZwickyTransientFacility/scope/blob/main/assets/a3d3.png" alt="A3D3" width="200"/>
- <img src="https://github.com/ZwickyTransientFacility/scope/blob/main/assets/nsf.png" alt="NSF" width="200"/>
+ <img src="https://github.com/ZwickyTransientFacility/scope/raw/main/assets/a3d3.png" alt="A3D3" width="200"/>
+ <img src="https://github.com/ZwickyTransientFacility/scope/raw/main/assets/nsf.png" alt="NSF" width="200"/>
diff --git a/tools/SCoPe_data_analysis_plots.ipynb b/SCoPe_data_analysis_plots.ipynb
similarity index 100%
rename from tools/SCoPe_data_analysis_plots.ipynb
rename to SCoPe_data_analysis_plots.ipynb
diff --git a/config.defaults.yaml b/config.defaults.yaml
index e4b25239..909921fa 100644
--- a/config.defaults.yaml
+++ b/config.defaults.yaml
@@ -1731,6 +1731,15 @@ training:
       eval_metric: 'auc'
       early_stopping_rounds: 10
       num_boost_round: 999
+    plot_params:
+      cm_include_count: False
+      cm_include_percent: True
+      annotate_scores: False
+  dnn:
+    dense_branch: True
+    conv_branch: True
+    loss: 'binary_crossentropy'
+    optimizer: 'adam'
   classes:
     # phenomenological classes
     vnv:
diff --git a/dev-requirements.txt b/dev-requirements.txt
new file mode 100644
index 00000000..ae9ca6ff
--- /dev/null
+++ b/dev-requirements.txt
@@ -0,0 +1,5 @@
+pytest>=6.1.2
+pre-commit>=3.5.0
+sphinx>=4.2
+sphinx_press_theme>=0.8.0
+poetry>=1.7.1
diff --git a/doc/developer.md b/doc/developer.md
index 6957058f..68b18156 100644
--- a/doc/developer.md
+++ b/doc/developer.md
@@ -1,6 +1,23 @@
 # Installation/Developer Guidelines
 
-## Initial steps
+## Science users
+- Create and activate a virtual/conda environment with Python 3.11, e.g:
+  ```shell script
+  conda create -n scope-env python=3.11
+  conda activate scope-env
+  ```
+- Install the latest release of `scope-ml` from PyPI:
+  ```shell script
+  pip install scope-ml
+  ```
+- In the directory of your choice, run the initialization script. This will create the required directories and copy the necessary files to run the code:
+  ```shell script
+  scope-initialize
+  ```
+- Change directories to `scope` and modify `config.yaml` to finish the initialization process. This config file is used by default when running all scripts. You can also specify another config file using the `--config-path` argument.
+
+
+## Developers/contributors
 
 - Create your own fork the [scope repository](https://github.com/ZwickyTransientFacility/scope) by clicking the "fork" button. Then, decide whether you would like to use HTTPS (easier for beginners) or SSH.
 - Following one set of instructions below, clone (download) your copy of the repository, and set up a remote called `upstream` that points to the main `scope` repository.
@@ -21,9 +38,9 @@ git clone git@github.com:<yourname>/scope.git && cd scope
 git remote add upstream git@github.com:ZwickyTransientFacility/scope.git
 ```
 
-## Setting up your environment (Windows/Linux/macOS)
+### Setting up your environment (Windows/Linux/macOS)
 
-### Use a package manager for installation
+#### Use a package manager for installation
 
 We currently recommend running `scope` with Python 3.11. You may want to begin your installation by creating/activating a virtual environment, for example using conda. We specifically recommend installing miniforge3 (https://github.com/conda-forge/miniforge).
 
@@ -34,9 +51,9 @@ conda create -n scope-env -c conda-forge python=3.11
 conda activate scope-env
 ```
 
-### Update your `PYTHONPATH`
+#### (Optional): Update your `PYTHONPATH`
 
-Ensure that Python can import from `scope` by modifying the `PYTHONPATH` environment variable. Use a simple text editor like `nano` to modify the appropriate file (depending on which shell you are using). For example, if using bash, run `nano ~/.bash_profile` and add the following line:
+If you plan to import from `scope`, ensure that Python can import from `scope` by modifying the `PYTHONPATH` environment variable. Use a simple text editor like `nano` to modify the appropriate file (depending on which shell you are using). For example, if using bash, run `nano ~/.bash_profile` and add the following line:
 
 ```bash
 export PYTHONPATH="$PYTHONPATH:$HOME/scope"
@@ -44,13 +61,20 @@ export PYTHONPATH="$PYTHONPATH:$HOME/scope"
 
 Save the updated file (`Ctrl+O` in `nano`) and close/reopen your terminal for this change to be recognized. Then `cd` back into scope and activate your `scope-env` again.
 
-### Install pre-commit
+### Install required packages
+
+Ensure you are in the `scope` directory that contains `pyproject.toml`. Then, install the required python packages by running:
+```bash
+pip install .
+```
+
+#### Install dev requirements, pre-commit hook
 
 We use `black` to format the code and `flake8` to verify that code complies with [PEP8](https://www.python.org/dev/peps/pep-0008/).
-Please install our pre-commit hook as follows:
+Please install our dev requirements and pre-commit hook as follows:
 
 ```shell script
-pip install pre-commit
+pip install -r dev-requirements.txt
 pre-commit install
 ```
 
@@ -60,14 +84,7 @@ code.
 
 The pre-commit hook will lint *changes* made to the source.
 
-## Install required packages
-
-Install the required python packages by running:
-```bash
-pip install -r requirements.txt
-```
-
-### Create and modify config.yaml
+#### Create and modify config.yaml
 
 From the included config.defaults.yaml, make a copy called config.yaml:
 
@@ -77,14 +94,15 @@ cp config.defaults.yaml config.yaml
 
 Edit config.yaml to include Kowalski instance and Fritz tokens in the associated empty `token:` fields.
 
-### Testing
-Run `./scope.py test` to test your installation. Note that for the test to pass, you will need access to the Kowalski database. If you do not have Kowalski access, you can run `./scope.py test_limited` to run a more limited (but still useful) set of tests.
+#### Testing
+Run `scope-test` to test your installation. Note that for the test to pass, you will need access to the Kowalski database. If you do not have Kowalski access, you can run `scope-test-limited` to run a more limited (but still useful) set of tests.
 
 ### Troubleshooting
 Upon encountering installation/testing errors, manually install the package in question using  `conda install xxx` , and remove it from `.requirements/dev.txt`. After that, re-run `pip install -r requirements.txt` to continue.
 
-### Known issues
-- Across all platforms, we are currently aware of `scope` dependency issues with Python 3.11.
+#### Known issues
+- If using GPU-accelerated period-finding algorithms for feature generation, you will need to install [periodfind](https://github.com/ZwickyTransientFacility/periodfind) separately from the source.
+- Across all platforms, we are currently aware of `scope` dependency issues with Python 3.12.
 - Anaconda continues to cause problems with environment setup.
 - Using `pip` to install `healpy` on an arm64 Mac can raise an error upon import. We recommend including `h5py` as a requirement during the creation of your `conda` environment.
 - On Windows machines, `healpy` and `cesium` raise errors upon installation.
@@ -93,7 +111,7 @@ Upon encountering installation/testing errors, manually install the package in q
 
 If the installation continues to raise errors, update the conda environment and try again.
 
-## How to contribute
+### How to contribute
 
 Contributions to `scope` are made through [GitHub Pull Requests](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests), a set of proposed commits (or patches):
 
@@ -144,7 +162,7 @@ Developers may merge `main` into their branch as many times as they want to.
 
 1. Once the pull request has been reviewed and approved by at least one team member, it will be merged into `scope`.
 
-## Contributing Field Guide sections
+### Contributing Field Guide sections
 
 If you would like to contribute a Field Guide section, please follow the steps below.
 
diff --git a/doc/quickstart.md b/doc/quickstart.md
index 6fad4ae1..c3184e70 100644
--- a/doc/quickstart.md
+++ b/doc/quickstart.md
@@ -1,16 +1,18 @@
 # Quick Start Guide
 
-This guide is intended to facilitate quick interactions with SCoPe code after you have completed the **Installation/Developer Guidelines** section. More detailed usage info can be found in the **Usage** section. **All of the following examples assume that SCoPe is installed in your home directory. If the `scope` directory is located elsewhere, adjust the example code as necessary.**
+This guide is intended to facilitate quick interactions with SCoPe code after you have completed the **Installation/Developer Guidelines** section. More detailed usage info can be found in the **Usage** section.
 
 ## Modify `config.yaml`
 To start out, provide SCoPe your training set's filepath using the `training:` `dataset:` field in `config.yaml`. The path should be a partial one starting within the `scope` directory. For example, if your training set `trainingSet.parquet` is within the `tools` directory (which itself is within `scope`), provide `tools/trainingSet.parquet` in the `dataset:` field.
 
+When running scripts, `scope` will by default use the `config.yaml` file in your current directory. You can specify a different config file by providing its path to any installed script using the `--config-path` argument.
+
 ## Training
 
 Train an XGBoost binary classifier using the following code:
 
 ```
-./scope.py train --tag=vnv --algorithm=xgb --group=ss23 --period_suffix=ELS_ECE_EAOV --epochs=30 --verbose --save --plot --skip_cv
+scope-train --tag vnv --algorithm xgb --group ss23 --period-suffix ELS_ECE_EAOV --epochs 30 --verbose --save --plot --skip-cv
 ```
 
 ### Arguments:
@@ -20,34 +22,34 @@ Train an XGBoost binary classifier using the following code:
 
 `--group`: if `--save` is passed, training results are saved to the group/directory named here.
 
-`--period_suffix`: SCoPe determines light curve periods using GPU-accelerated algorithms. These algorithms include a Lomb-Scargle approach (ELS), Conditional Entropy (ECE), Analysis of Variance (AOV), and an approach nesting all three (ELS_ECE_EAOV). Periodic features are stored with the suffix specified here.
+`--period-suffix`: SCoPe determines light curve periods using GPU-accelerated algorithms. These algorithms include a Lomb-Scargle approach (ELS), Conditional Entropy (ECE), Analysis of Variance (AOV), and an approach nesting all three (ELS_ECE_EAOV). Periodic features are stored with the suffix specified here.
 
-`--min_count`: requires at least min_count positive examples to run training.
+`--min-count`: requires at least min_count positive examples to run training.
 
 `--epochs`: neural network training takes an --epochs argument that is set to 30 here.
 
 ***Notes:***
-- *The above training runs the XGB algorithm by default and skips cross-validation in the interest of time. For a full run, you can remove the `--skip_cv` argument to run a cross-validated grid search of XGB hyperparameters during training.*
+- *The above training runs the XGB algorithm by default and skips cross-validation in the interest of time. For a full run, you can remove the `--skip-cv` argument to run a cross-validated grid search of XGB hyperparameters during training.*
 
-- *DNN hyperparameters are optimized using a different approach - Weights and Biases Sweeps (https://docs.wandb.ai/guides/sweeps). The results of these sweeps are the default hyperparameters in the config file. To run another round of sweeps for DNN, create a WandB account and set the `--run_sweeps` keyword in the call to `scope.py train`.*
+- *DNN hyperparameters are optimized using a different approach - Weights and Biases Sweeps (https://docs.wandb.ai/guides/sweeps). The results of these sweeps are the default hyperparameters in the config file. To run another round of sweeps for DNN, create a WandB account and set the `--run-sweeps` keyword in the call to `scope-train`.*
 
 - *SCoPe DNN training does not provide feature importance information (due to the hidden layers of the network). Feature importance is possible to estimate for neural networks, but it is more computationally expensive compared to this "free" information from XGB.*
 
 ### Train multiple classifiers with one script
 
-Create a shell script that contains multiple calls to `scope.py train`:
+Create a shell script that contains multiple calls to `scope-train`:
 ```
-./scope.py create_training_script --filename=train_xgb.sh --min_count=1000 --algorithm=xgb --period_suffix=ELS_ECE_EAOV --add_keywords="--save --plot --group=ss23 --epochs=30 --skip_cv"
+create-training-script --filename train_xgb.sh --min-count 1000 --algorithm xgb --period-suffix ELS_ECE_EAOV --add-keywords "--save --plot --group ss23 --epochs 30 --skip-cv"
 ```
 
-Modify the permissions of this script by running `chmod +x train_xgb.sh`. Run the generated training script in a terminal window (using e.g. `./train_xgb.sh`) to train multiple label sequentially.
+Modify the permissions of this script by running `chmod +x train_xgb.sh`. Run the generated training script in a terminal window (using e.g. `./train_xgb.sh`) to train multiple classifers sequentially.
 
 ***Note:***
-- *The code will throw an error if the training script filename already exists.*
+- *The code will raise an error if the training script filename already exists.*
 
 ### Running training on HPC resources
 
-`train_algorithm_slurm.py` and `train_algorithm_job_submission.py` can be used generate and submit `slurm` scripts to train all classifiers in parallel using HPC resources.
+`train-algorithm-slurm` and `train-algorithm-job-submission` can be used generate and submit `slurm` scripts to train all classifiers in parallel using HPC resources.
 
 ## Plotting Classifier Performance
 SCoPe saves diagnostic plots and json files to report each classifier's performance. The below code shows the location of the validation set results for one classifier.
@@ -82,10 +84,10 @@ This code may also be placed in a loop over multiple labels to compare each clas
 
 ## Inference
 
-Use `tools/inference.py` to run inference on a field (297) of features (within a directory called `generated_features`). The classifiers used for this inference are within the `ss23` directory/group specified during training.
+Use `run-inference` to run inference on a field (297) of features (in this example, located in a directory called `generated_features`). The classifiers used for this inference are within the `ss23` directory/group specified during training.
 
 ```
-./scope.py create_inference_script --filename=get_all_preds_xgb.sh --group_name=ss23 --algorithm=xgb --period_suffix=ELS_ECE_EAOV --feature_directory=generated_features
+create-inference-script --filename get_all_preds_xgb.sh --group-name ss23 --algorithm xgb --period-suffix ELS_ECE_EAOV --feature-directory generated_features
 ```
 
 Modify the permissions of this script using `chmod +x get_all_preds_xgb.sh`, then run on the desired field:
@@ -94,12 +96,12 @@ Modify the permissions of this script using `chmod +x get_all_preds_xgb.sh`, the
 ```
 
 ***Notes:***
-- *`scope.py create_inference_script` will throw an error if the inference script filename already exists.*
+- *`create-inference-script` will raise an error if the inference script filename already exists.*
 - *Inference begins by imputing missing features using the strategies specified in the `features:` section of the config file.*
 
 ### Running inference on HPC resources
 
-`run_inference_slurm.py` and `run_inference_job_submission.py` can be used generate and submit `slurm` scripts to run inference for all classifiers in parallel using HPC resources.*
+`run-inference-slurm` and `run-inference-job-submission` can be used generate and submit `slurm` scripts to run inference for all classifiers in parallel using HPC resources.*
 
 ## Examining predictions
 
diff --git a/doc/usage.md b/doc/usage.md
index ca920dcd..a143e68e 100644
--- a/doc/usage.md
+++ b/doc/usage.md
@@ -5,49 +5,49 @@
 - Create HDF5 file for single CCD/quad pair in a field:
 
 ```sh
-./get_quad_ids.py --catalog ZTF_source_features_DR16 --field 301 --ccd 2 --quad 3 --minobs 20 --skip 0 --limit 10000
+get-quad-ids --catalog ZTF_source_features_DR16 --field 301 --ccd 2 --quad 3 --minobs 20 --skip 0 --limit 10000
 ```
 
 - Create multiple HDF5 files for some CCD/quad pairs in a field:
 
 ```sh
-./get_quad_ids.py --catalog ZTF_source_features_DR16 --field 301 --multi-quads --ccd-range 1 8 --quad-range 2 4 --minobs 20 --limit 10000
+get-quad-ids --catalog ZTF_source_features_DR16 --field 301 --multi-quads --ccd-range 1 8 --quad-range 2 4 --minobs 20 --limit 10000
 ```
 
 - Create multiple HDF5 files for all CCD/quad pairs in a field:
 
 ```sh
-./get_quad_ids.py --catalog ZTF_source_features_DR16 --field 301 --multi-quads --minobs 20 --limit 10000
+get-quad-ids --catalog ZTF_source_features_DR16 --field 301 --multi-quads --minobs 20 --limit 10000
 ```
 
 - Create single HDF5 file for all sources in a field:
 
 ```sh
-./get_quad_ids.py --catalog ZTF_source_features_DR16 --field 301 --whole-field
+get-quad-ids --catalog ZTF_source_features_DR16 --field 301 --whole-field
 ```
 
 ## Download SCoPe features for ZTF fields/CCDs/quadrants
 
-- First, run `get_quad_ids.py` for desired fields/ccds/quads.
+- First, run `get-quad_ids` for desired fields/ccds/quads.
 
 - Download features for all sources in a field:
 ```sh
-./tools/get_features.py --field 301 --whole-field
+get-features --field 301 --whole-field
 ```
 
 - Download features for all sources in a field, imputing missing features using the strategies in `config.yaml`:
 ```sh
-./tools/get_features.py --field 301 --whole-field --impute-missing-features
+get-features --field 301 --whole-field --impute-missing-features
 ```
 
 - Download features for a range of ccd/quads individually:
 ```sh
-./tools/get_features.py --field 301 --ccd-range 1 2 --quad-range 3 4
+get-features --field 301 --ccd-range 1 2 --quad-range 3 4
 ```
 
 - Download features for a single pair of ccd/quad:
 ```sh
-./tools/get_features.py --field 301 --ccd-range 1 --quad-range 2
+get-features --field 301 --ccd-range 1 --quad-range 2
 ```
 
 
@@ -59,16 +59,16 @@ please refer to [arxiv:2102.11304](https://arxiv.org/pdf/2102.11304.pdf).
 - The training pipeline can be invoked with the `scope.py` utility. For example:
 
 ```sh
-./scope.py train --tag=vnv --path_dataset=data/training/dataset.d15.csv --batch_size=64 --epochs=100 --verbose=1 --pre_trained_model=models/experiment/vnv/vnv.20221117_001502.h5
+scope-train --tag vnv --path-dataset data/training/dataset.d15.csv --batch-size 64 --epochs 100 --verbose 1 --pre-trained-model models/experiment/vnv/vnv.20221117_001502.h5
 ```
 
-Refer to `./scope.py train --help` for details.
+Refer to `scope-train --help` for details.
 
 - All the necessary metadata/configuration could be defined in `config.yaml` under `training`,
-but could also be overridden with optional `scope.py train` arguments, e.g.
-`./scope.py train ... --batch_size=32 --threshold=0.6 ...`.
+but could also be overridden with optional `scope-train` arguments, e.g.
+`scope-train ... --batch-size 32 --threshold 0.6 ...`.
 
-- By default, the pipeline uses the `DNN` models defined in `scope/nn.py` using the tensorflow's `keras` functional API. SCoPe also supports an implementation of XGBoost (set `--algorithm=xgb`; see `scope/xgb.py`).
+- By default, the pipeline uses the `DNN` models defined in `scope/nn.py` using the tensorflow's `keras` functional API. SCoPe also supports an implementation of XGBoost (set `--algorithm xgb`; see `scope/xgb.py`).
 - If `--save` is specified during `DNN` training, an HDF5 file of the model's layers and weights will be saved. This file can be directly used for additional training and inferencing. For `XGB`, a json file will save the model along with a `.params` file with the model parameters.
 - The `Dataset` class defined in `scope.utils` hides the complexity of our dataset handling "under the rug".
 - You can request access to a Google Drive folder containing the latest trained models [here](https://drive.google.com/drive/folders/1_oLBxveioKtw7LyMJfism745USe9tEGZ?usp=sharing).
@@ -77,47 +77,47 @@ but could also be overridden with optional `scope.py train` arguments, e.g.
   These are referenced in `config.yaml` under `training.classes.<class>.features`.
 
 - Feature stats to be used for feature scaling/standardization before training
-  is defined in `config.yaml` under `feature_stats`.
+  are either computed by the code (default) or defined in `config.yaml` under `feature_stats`.
 
 - We use [Weights & Biases](https://wandb.com) to track experiments.
   Project details and access credentials can be defined in `config.yaml` under `wandb`.
 
-Initially, SCoPe used a `bash` script to train all classifier families:
+Initially, SCoPe used a `bash` script to train all classifier families, e.g:
 
 ```sh
 for class in pnp longt i fla ew eb ea e agn bis blyr ceph dscu lpv mir puls rrlyr rscvn srv wuma yso; \
   do echo $class; \
   for state in 1 2 3 4 5 6 7 8 9 42; \
-    do ./scope.py train \
-      --tag=$class --path_dataset=data/training/dataset.d15.csv \
-      --scale_features=min_max --batch_size=64 \
-      --epochs=300 --patience=30 --random_state=$state \
-      --verbose=1 --gpu=1 --conv_branch=true --save; \
+    do scope-train \
+      --tag $class --path-dataset data/training/dataset.d15.csv \
+      --scale-features min_max --batch-size 64 \
+      --epochs 300 --patience 30 --random-state $state \
+      --verbose 1 --gpu 1 --conv-branch --save; \
   done; \
 done;
 ```
 
-Now, a training script containing one line per class to be trained can be generated by running `./scope.py create_training_script`, for example:
+Now, a training script containing one line per class to be trained can be generated by running `create-training-script`, for example:
 ```bash
-./scope.py create_training_script --filename='train_dnn.sh' --min_count=100 --pre_trained_group_name='experiment' --add_keywords='--save --batch_size=32 --group=new_experiment --period_suffix=ELS_ECE_EAOV'
+create-training-script --filename train_dnn.sh --min-count 100 --pre-trained-group-name experiment --add-keywords '--save --batch-size 32 --group new_experiment --period-suffix ELS_ECE_EAOV'
 ```
-A path to the training set may be provided as input to this method or otherwise taken from `config.yaml` (`training: dataset:`). To continue training on existing models, specify the `--pre_trained_group_name` keyword containing the models in `create_training_script`. If training on a feature collection containing multiple sets of periodic features (from different algorithms), set the suffix corresponding to the desired algorithm using `--period_suffix` or the `features: info: period_suffix:` field in the config file. The string specified in  `--add_keywords` serves as a catch-all for additional keywords that the user wishes to be included in each line of the script.
+A path to the training set may be provided as input to this method or otherwise taken from `config.yaml` (`training: dataset:`). To continue training on existing models, specify the `--pre-trained-group-name` keyword containing the models in `create-training-script`. If training on a feature collection containing multiple sets of periodic features (from different algorithms), set the suffix corresponding to the desired algorithm using `--period-suffix` or the `features: info: period_suffix:` field in the config file. The string specified in  `--add-keywords` serves as a catch-all for additional keywords that the user wishes to be included in each line of the script.
 
-If `--pre_trained_group_name` is specified and the `--train_all` keyword is set, the output script will train all classes specified in `config.yaml` regardless of whether they have a pre-trained model. If `--train_all` is not set (the default), the script will limit training to classes that have an existing trained model.
+If `--pre-trained-group-name` is specified and the `--train-all` keyword is set, the output script will train all classes specified in `config.yaml` regardless of whether they have a pre-trained model. If `--train-all` is not set (the default), the script will limit training to classes that have an existing trained model.
 
 ## Running inference
 
 Running inference requires the following steps: download ids of a field, download (or generate) features for all downloaded ids, run inference for all available trained models, e.g:
 ```
-./tools/get_quad_ids.py --field=<field_number> --whole_field
-./tools/get_features.py --field=<field_number> --whole_field --impute_missing_features
+get-quad-ids --field <field_number> --whole-field
+get-features --field <field_number> --whole-field --impute-missing-features
 ```
 OR
 ```
-./tools/generate_features.py --field <field_number> --ccd <ccd_number> --quad <quad_number> --doGPU
+generate-features --field <field_number> --ccd <ccd_number> --quad <quad_number> --doGPU
 ```
 
-The optimal way to run inference is through an inference script generated by running `./scope.py create_inference_script` with the appropriate arguments. After creating the script and adding the needed permissions (e.g. using `chmod +x`), the commands to run inference on the field `<field_number>` are (in order):
+The optimal way to run inference is through an inference script generated by running `create-inference-script` with the appropriate arguments. After creating the script and adding the needed permissions (e.g. using `chmod +x`), the commands to run inference on the field `<field_number>` are (in order):
 ```
 ./get_all_preds.sh <field_number>
 ```
@@ -173,45 +173,44 @@ The fields associated with each key are `fritz_label` (containing the associated
 ```
 
 ## Generating features
-Code has been adapted from [ztfperiodic](https://github.com/mcoughlin/ztfperiodic) and other sources to calculate basic and Fourier stats for light curves along with other features. This allows new features to be generated with SCoPe, both locally and using GPU cluster resources. The feature generation script is contained within `tools/generate_features.py`.
+Code has been adapted from [ztfperiodic](https://github.com/mcoughlin/ztfperiodic) and other sources to calculate basic and Fourier stats for light curves along with other features. This allows new features to be generated with SCoPe, both locally and using GPU cluster resources. The feature generation script is run using the `generate-features` command.
 
 Currently, the basic stats are calculated via `tools/featureGeneration/lcstats.py`, and a host of period-finding algorithms are available in `tools/featureGeneration/periodsearch.py`. Among the CPU-based period-finding algorithms, there is not yet support for `AOV_cython`. For the `AOV` algorithm to work, run `source build.sh` in the `tools/featureGeneration/pyaov/` directory, then copy the newly created `.so` file (`aov.cpython-310-darwin.so` or similar) to `lib/python3.10/site-packages/` or equivalent within your environment. The GPU-based algorithms require CUDA support (so Mac GPUs are not supported).
 
 inputs:
-1. --source_catalog* : name of Kowalski catalog containing ZTF sources (str)
-2. --alerts_catalog* : name of Kowalski catalog containing ZTF alerts (str)
-3. --gaia_catalog* : name of Kowalski catalog containing Gaia data (str)
-4. --bright_star_query_radius_arcsec : maximum angular distance from ZTF sources to query nearby bright stars in Gaia (float)
-5. --xmatch_radius_arcsec : maximum angular distance from ZTF sources to match external catalog sources (float)
-6. --kowalski_instances* : dictionary containing {names of Kowalski instances : authenticated penquins.Kowalski objects} (dict)
-7. --limit : maximum number of sources to process in batch queries / statistics calculations (int)
-8. --period_algorithms* : dictionary containing names of period algorithms to run. Normally specified in config - if specified here, should be a (list)
-9. --period_batch_size : maximum number of sources to simultaneously perform period finding (int)
-10. --doCPU : flag to run config-specified CPU period algorithms (bool)
-11. --doGPU : flag to run config-specified GPU period algorithms (bool)
-12. --samples_per_peak : number of samples per periodogram peak (int)
-13. --doScaleMinPeriod : for period finding, scale min period based on min_cadence_minutes (bool). Otherwise, set --max_freq to desired value
-14. --doRemoveTerrestrial : remove terrestrial frequencies from period-finding analysis (bool)
-15. --Ncore : number of CPU cores to parallelize queries (int)
-16. --field : ZTF field to run (int)
-17. --ccd : ZTF ccd to run (int)
-18. --quad : ZTF quadrant to run (int)
-19. --min_n_lc_points : minimum number of points required to generate features for a light curve (int)
-20. --min_cadence_minutes : minimum cadence between light curve points. Higher-cadence data are dropped except for the first point in the sequence (float)
-21. --dirname : name of generated feature directory (str)
-22. --filename : prefix of each feature filename (str)
-23. --doCesium : flag to compute config-specified cesium features in addition to default list (bool)
-24. --doNotSave : flag to avoid saving generated features (bool)
-25. --stop_early : flag to stop feature generation before entire quadrant is run. Pair with --limit to run small-scale tests (bool)
-26. --doQuadrantFile : flag to use a generated file containing [jobID, field, ccd, quad] columns instead of specifying --field, --ccd and --quad (bool)
-27. --quadrant_file : name of quadrant file in the generated_features/slurm directory or equivalent (str)
-28. --quadrant_index : number of job in quadrant file to run (int)
-29. --doSpecificIDs: flag to perform feature generation for ztf_id column in config-specified file (bool)
-30. --skipCloseSources: flag to skip removal of sources too close to bright stars via Gaia (bool)
-31. --top_n_periods: number of (E)LS, (E)CE periods to pass to (E)AOV if using (E)LS_(E)CE_(E)AOV algorithm (int)
-32. --max_freq: maximum frequency [1 / days] to use for period finding (float). Overridden by --doScaleMinPeriod
-33. --fg_dataset*: path to parquet, hdf5 or csv file containing specific sources for feature generation (str)
-34. --max_timestamp_hjd*: maximum timestamp of queried light curves, HJD (float)
+1. --source-catalog* : name of Kowalski catalog containing ZTF sources (str)
+2. --alerts-catalog* : name of Kowalski catalog containing ZTF alerts (str)
+3. --gaia-catalog* : name of Kowalski catalog containing Gaia data (str)
+4. --bright-star-query-radius-arcsec : maximum angular distance from ZTF sources to query nearby bright stars in Gaia (float)
+5. --xmatch-radius-arcsec : maximum angular distance from ZTF sources to match external catalog sources (float)
+6. --limit : maximum number of sources to process in batch queries / statistics calculations (int)
+7. --period-algorithms* : dictionary containing names of period algorithms to run. Normally specified in config - if specified here, should be a (list)
+8. --period-batch-size : maximum number of sources to simultaneously perform period finding (int)
+9.  --doCPU : flag to run config-specified CPU period algorithms (bool)
+10. --doGPU : flag to run config-specified GPU period algorithms (bool)
+11. --samples_per_peak : number of samples per periodogram peak (int)
+12. --doScaleMinPeriod : for period finding, scale min period based on min-cadence-minutes (bool). Otherwise, set --max-freq to desired value
+13. --doRemoveTerrestrial : remove terrestrial frequencies from period-finding analysis (bool)
+14. --Ncore : number of CPU cores to parallelize queries (int)
+15. --field : ZTF field to run (int)
+16. --ccd : ZTF ccd to run (int)
+17. --quad : ZTF quadrant to run (int)
+18. --min-n-lc-points : minimum number of points required to generate features for a light curve (int)
+19. --min-cadence-minutes : minimum cadence between light curve points. Higher-cadence data are dropped except for the first point in the sequence (float)
+20. --dirname : name of generated feature directory (str)
+21. --filename : prefix of each feature filename (str)
+22. --doCesium : flag to compute config-specified cesium features in addition to default list (bool)
+23. --doNotSave : flag to avoid saving generated features (bool)
+24. --stop-early : flag to stop feature generation before entire quadrant is run. Pair with --limit to run small-scale tests (bool)
+25. --doQuadrantFile : flag to use a generated file containing [jobID, field, ccd, quad] columns instead of specifying --field, --ccd and --quad (bool)
+26. --quadrant-file : name of quadrant file in the generated_features/slurm directory or equivalent (str)
+27. --quadrant-index : number of job in quadrant file to run (int)
+28. --doSpecificIDs: flag to perform feature generation for ztf_id column in config-specified file (bool)
+29. --skipCloseSources: flag to skip removal of sources too close to bright stars via Gaia (bool)
+30. --top-n-periods: number of (E)LS, (E)CE periods to pass to (E)AOV if using (E)LS_(E)CE_(E)AOV algorithm (int)
+31. --max-freq: maximum frequency [1 / days] to use for period finding (float). Overridden by --doScaleMinPeriod
+32. --fg-dataset*: path to parquet, hdf5 or csv file containing specific sources for feature generation (str)
+33. --max-timestamp-hjd*: maximum timestamp of queried light curves, HJD (float)
 
 output:
 feature_df : dataframe containing generated features
@@ -222,7 +221,7 @@ feature_df : dataframe containing generated features
 The following is an example of running the feature generation script locally:
 
 ```
-./generate_features.py --field 301 --ccd 2 --quad 4 --source_catalog ZTF_sources_20230109 --alerts_catalog ZTF_alerts --gaia_catalog Gaia_EDR3 --bright_star_query_radius_arcsec 300.0 --xmatch_radius_arcsec 2.0 --query_size_limit 10000 --period_batch_size 1000 --samples_per_peak 10 --Ncore 4 --min_n_lc_points 50 --min_cadence_minutes 30.0 --dirname generated_features --filename gen_features --doCPU --doRemoveTerrestrial --doCesium
+generate-features --field 301 --ccd 2 --quad 4 --source-catalog ZTF_sources_20230109 --alerts-catalog ZTF_alerts --gaia-catalog Gaia_EDR3 --bright-star-query-radius-arcsec 300.0 --xmatch-radius-arcsec 2.0 --query-size-limit 10000 --period-batch-size 1000 --samples-per-peak 10 --Ncore 4 --min-n-lc-points 50 --min-cadence-minutes 30.0 --dirname generated_features --filename gen_features --doCPU --doRemoveTerrestrial --doCesium
 ```
 
 Setting `--doCPU` will run the config-specified CPU period algorithms on each source. Setting `--doGPU` instead will do likewise with the specified GPU algorithms. If neither of these keywords is set, the code will assign a value of `1.0` to each period and compute Fourier statistics using that number.
@@ -230,34 +229,36 @@ Setting `--doCPU` will run the config-specified CPU period algorithms on each so
 Below is an example run the script using a job/quadrant file (containing [job id, field, ccd, quad] columns) instead of specifying field/ccd/quad directly:
 
 ```
-/home/bhealy/scope/tools/generate_features.py --source_catalog ZTF_sources_20230109 --alerts_catalog ZTF_alerts --gaia_catalog Gaia_EDR3 --bright_star_query_radius_arcsec 300.0 --xmatch_radius_arcsec 2.0 --query_size_limit 10000 --period_batch_size 1000 --samples_per_peak 10 --Ncore 20 --min_n_lc_points 50 --min_cadence_minutes 30.0 --dirname generated_features_DR15 --filename gen_features --doGPU --doRemoveTerrestrial --doCesium --doQuadrantFile --quadrant_file slurm.dat --quadrant_index 5738
+generate-features --source-catalog ZTF_sources_20230109 --alerts-catalog ZTF_alerts --gaia-catalog Gaia_EDR3 --bright-star-query-radius-arcsec 300.0 --xmatch-radius-arcsec 2.0 --query-size-limit 10000 --period-batch-size 1000 --samples-per-peak 10 --Ncore 20 --min-n-lc-points 50 --min-cadence-minutes 30.0 --dirname generated_features_DR15 --filename gen_features --doGPU --doRemoveTerrestrial --doCesium --doQuadrantFile --quadrant-file slurm.dat --quadrant-index 5738
 ```
 
 ### Slurm scripts
-For large-scale feature generation, `generate_features.py` is intended to be run on a high-performance computing cluster. Often these clusters require jobs to be submitted using a utility like `slurm` (Simple Linux Utility for Resource Management) to generate scripts. These scripts contain information about the type, amount and duration of computing resources to allocate to the user.
+For large-scale feature generation, `generate-features` is intended to be run on a high-performance computing cluster. Often these clusters require jobs to be submitted using a utility like `slurm` (Simple Linux Utility for Resource Management) to generate scripts. These scripts contain information about the type, amount and duration of computing resources to allocate to the user.
 
-Scope's `generate_features_slurm.py` code creates two slurm scripts: (1) runs single instance of `generate_features.py`, and (2) runs the `generate_features_job_submission.py` which submits multiple jobs in parallel, periodically checking to see if additional jobs can be started. See below for more information about these components of feature generation.
+Scope's `generate-features-slurm` code creates two slurm scripts: (1) runs single instance of `generate-features`, and (2) runs the `generate-features-job-submission` which submits multiple jobs in parallel, periodically checking to see if additional jobs can be started. See below for more information about these components of feature generation.
 
-`generate_features_slurm.py` can receive all of the arguments used by `generate_features.py`. These arguments are passed to the instances of feature generation begun by running slurm script (1). There are also additional arguments specific to cluster resource management:
+`generate-features-slurm` can receive all of the arguments used by `generate-features`. These arguments are passed to the instances of feature generation begun by running slurm script (1). There are also additional arguments specific to cluster resource management:
 
 inputs:
-1. --job_name : name of submitted jobs (str)
-2. --cluster_name : name of HPC cluster (str)
-3. --partition_type : cluster partition to use (str)
+1. --job-name : name of submitted jobs (str)
+2. --cluster-name : name of HPC cluster (str)
+3. --partition-type : cluster partition to use (str)
 4. --nodes : number of nodes to request (int)
 5. --gpus : number of GPUs to request (int)
-6. --memory_GB : amount of memory to request in GB (int)
-7. --time : amount of time before instance times out (str)
-8. --mail_user: user's email address for job updates (str)
-9. --account_name : name of account having HPC allocation (str)
-10. --python_env_name : name of Python environment to activate before running `generate_features.py` (str)
-11. --kowalski_instance_name : name of Kowalski instance containing ZTF source catalog (str)
+6. --memory-GB : amount of memory to request in GB (int)
+7. --submit-memory-GB : Memory allocation to request for job submission (int)
+8. --time : amount of time before instance times out (str)
+9. --mail-user: user's email address for job updates (str)
+10. --account-name : name of account having HPC allocation (str)
+11. --python-env-name : name of Python environment to activate before running `generate_features.py` (str)
 12. --generateQuadrantFile : flag to map fields/ccds/quads containing sources to job numbers, save file (bool)
-13. --max_instances : maximum number of HPC instances to run in parallel (int)
-14. --wait_time_minutes : amount of time to wait between status checks in minutes (float)
-15. --doSubmitLoop : flag to run loop initiating instances until out of jobs (hard on Kowalski)
-16. --runParallel : flag to run jobs in parallel using slurm [recommended]. Otherwise, run in series on a single instance
-17. --user : if using slurm, your username. This will be used to periodically run `squeue` and list your running jobs (str)
+13. --field-list : space-separated list of fields for which to generate quadrant file. If None, all populated fields included (int)
+14. --max-instances : maximum number of HPC instances to run in parallel (int)
+15. --wait-time-minutes : amount of time to wait between status checks in minutes (float)
+16. --doSubmitLoop : flag to run loop initiating instances until out of jobs (hard on Kowalski)
+17. --runParallel : flag to run jobs in parallel using slurm [recommended]. Otherwise, run in series on a single instance
+18. --user : if using slurm, your username. This will be used to periodically run `squeue` and list your running jobs (str)
+19. --submit-interval-minutes : Time to wait between job submissions, minutes (float)
 
 ## Feature definitions
 ### Selected phenomenological feature definitions
@@ -265,21 +266,21 @@ inputs:
 | name | definition |
 | ---- | ---------- |
 |ad |	Anderson-Darling statistic |
-|chi2red | Reduced chi^2 |
-|f1_BIC	| Bayesian information criterion, first order (Fourier analysis) |
-|f1_a	| a coefficient, first order (Fourier analysis) |
-|f1_amp	| Amplitude, first order (Fourier analysis) |
-|f1_b	| b coefficient (Fourier analysis) |
-|f1_phi0 | Zero-phase, first order (Fourier analysis) |
-|f1_power	| Power, first order (Fourier analysis) |
-|f1_relamp1	| Relative amplitude, first order (Fourier analysis) |
-|f1_relamp2	| Relative amplitude, second order (Fourier analysis) |
-|f1_relamp3	| Relative amplitude, third order (Fourier analysis) |
-|f1_relamp4	| Relative amplitude, fourth order (Fourier analysis) |
-|f1_relphi1	| Relative phase, first order (Fourier analysis) |
-|f1_relphi2	| Relative phase, second order (Fourier analysis) |
-|f1_relphi3	| Relative phase, third order (Fourier analysis) |
-|f1_relphi4	| Relative phase, fourth order (Fourier analysis) |
+|chi2red | Reduced chi^2 after mean subtraction |
+|f1_BIC	| Bayesian information criterion of best-fitting series (Fourier analysis) |
+|f1_a	| a coefficient of best-fitting series (Fourier analysis) |
+|f1_amp	| Amplitude of best-fitting series (Fourier analysis) |
+|f1_b	| b coefficient of best-fitting series (Fourier analysis) |
+|f1_phi0 | Zero-phase of best-fitting series (Fourier analysis) |
+|f1_power	| Normalized chi^2 of best-fitting series (Fourier analysis) |
+|f1_relamp1	| Relative amplitude, first harmonic (Fourier analysis) |
+|f1_relamp2	| Relative amplitude, second harmonic (Fourier analysis) |
+|f1_relamp3	| Relative amplitude, third harmonic (Fourier analysis) |
+|f1_relamp4	| Relative amplitude, fourth harmonic (Fourier analysis) |
+|f1_relphi1	| Relative phase, first harmonic (Fourier analysis) |
+|f1_relphi2	| Relative phase, second harmonic (Fourier analysis) |
+|f1_relphi3	| Relative phase, third harmonic (Fourier analysis) |
+|f1_relphi4	| Relative phase, fourth harmonic (Fourier analysis) |
 |i60r	| Mag ratio between 20th, 80th percentiles |
 |i70r	| Mag ratio between 15th, 85th percentiles |
 |i80r	| Mag ratio between 10th, 90th percentiles |
@@ -315,6 +316,7 @@ inputs:
 | AllWISE_w3mpro | AllWISE W3 mag |
 | AllWISE_w4mpro | AllWISE W4 mag |
 | Gaia_EDR3__parallax | Gaia parallax |
+| Gaia_EDR3__parallax_error | Gaia parallax error |
 | Gaia_EDR3__phot_bp_mean_mag | Gaia BP mag |
 | Gaia_EDR3__phot_bp_rp_excess_factor | Gaia BP-RP excess factor |
 | Gaia_EDR3__phot_g_mean_mag | Gaia G mag |
@@ -340,12 +342,12 @@ It is useful to know the classifications of any persistent ZTF sources that are
 
 To set up a `cron` job, first run `EDITOR=emacs crontab -e`. You can replace `emacs` with your text editor of choice as long as it is installed on your machine. This command will open a text file in which to place `cron` commands. An example command is as follows:
 ```bash
-0 */2 * * * ~/scope/gcn_cronjob.py > ~/scope/log_gcn_cronjob.txt 2>&1
+0 */2 * * * cd scope && ~/miniforge3/envs/scope-env/bin/python ~/scope/gcn_cronjob.py > ~/scope/log_gcn_cronjob.txt 2>&1
 
 ```
 Above, the `0 */2 * * *` means that this command will run every two hours, on minute 0 of that hour. Time increments increase from left to right; in this example, the five numbers are minute, hour, day (of month), month, day (of week). The `*/2` means that the hour has to be divisible by 2 for the job to run. Check out [crontab.guru](https://crontab.guru) to learn more about `cron` timing syntax.
 
-Next in the line, `~/scope/gcn_cronjob.py` is the command that gets run. The `>` character forwards the output from the command (e.g. what your script prints) into a log file in a specific location (here `~/scope/log_gcn_cronjob.txt`). Finally, the `2>&1` suppresses 'emails' from `cron` about the status of your job (unnecessary since the log is being saved to the user-specified file).
+Next in the line, we change directories to `scope` in order for the code to access our `config.yaml` file located in this directory. Then, `~/miniforge3/envs/scope-env/bin/python ~/scope/gcn_cronjob.py` is the command that gets run (using the Python environment installed in `scope-env`). The `>` character forwards the output from the command (e.g. what your script prints) into a log file in a specific location (here `~/scope/log_gcn_cronjob.txt`). Finally, the `2>&1` suppresses 'emails' from `cron` about the status of your job (unnecessary since the log is being saved to the user-specified file).
 
 Save the text file once you finish modifying it to install the cron job. **Ensure that the last line of your file is a newline to avoid issues when running.** Your computer may pop up a window to which you should respond in the affirmative in order to successfully initialize the job. To check which `cron` jobs have been installed, run `crontab -l`. To uninstall your jobs, run `crontab -r`.
 
@@ -355,18 +357,18 @@ Because `cron` runs in a simple environment, the usual details of environment se
 ```
 PYTHONPATH = /Users/username/scope
 
-0 */2 * * * /opt/homebrew/bin/gtimeout 2h ~/miniforge3/envs/scope-env/bin/python ~/scope/gcn_cronjob.py > ~/scope/log_gcn_cronjob.txt 2>&1
+0 */2 * * * /opt/homebrew/bin/gtimeout 2h ~/miniforge3/envs/scope-env/bin/python scope-gcn-cronjob > ~/scope/log_gcn_cronjob.txt 2>&1
 
 ```
 In the first line above, the `PYTHONPATH` environment variable is defined to include the `scope` directory. Without this line, any code that imports from `scope` will throw an error, since the user's usual `PYTHONPATH` variable is not accessed in the `cron` environment.
 
-The second line begins with the familiar `cron` timing pattern described above. It continues by specifying the a maximum runtime of 2 hours before timing out using the `gtimeout` command. On a Mac, this can be installed with `homebrew` by running `brew install coreutils`. Note that the full path to `gtimeout` must be specified. After the timeout comes the call to the `gcn_cronjob.py` script. Note that the usual `#/usr/bin/env python` line at the top of SCoPe's python scripts does not work within the `cron` environment. Instead, `python` must be explicitly specified, and in order to have access to the modules installed in `scope-env` we must provide a full path like the one above (`~/miniforge3/envs/scope-env/bin/python`). The line concludes by sending the script's output to a dedicated log file. This file gets overwritten each time the script runs.
+The second line begins with the familiar `cron` timing pattern described above. It continues by specifying the a maximum runtime of 2 hours before timing out using the `gtimeout` command. On a Mac, this can be installed with `homebrew` by running `brew install coreutils`. Note that the full path to `gtimeout` must be specified. After the timeout comes the call to the `gcn_cronjob.py` script. Note that the usual `#/usr/bin/env python` line at the top of SCoPe's python scripts does not work within the `cron` environment. Instead, `python` must be explicitly specified, and in order to have access to the modules and scripts installed in `scope-env` we must provide a full path like the one above (`~/miniforge3/envs/scope-env/bin/python`). The line concludes by sending the script's output to a dedicated log file. This file gets overwritten each time the script runs.
 
 ### Check if `cron` job is running
 It can be useful to know whether the script within a cron job is currently running. One way to do this for `gcn_cronjob.py` is to run the command `ps aux | grep gcn_cronjob.py`. This will always return one item (representing the command you just ran), but if the script is currently running you will see more than one item.
 
 ## Local feature generation/inference
-SCoPe contains a script that runs local feature generation and inference on sources specified in an input file. Example input files are contained within the `tools` directory (`local_scope_radec.csv` and `local_scope_ztfid.csv`). After receiving either ra/dec coordinates or ZTF light curve IDs (plus an object ID for each entry), the `run_scope_local.py` script will generate features and run inference using existing trained models, saving the results to timestamped directories. This script accepts most arguments from `generate_features.py` and `inference.py`. Additional inputs specific to this script are listed below.
+SCoPe contains a script that runs local feature generation and inference on sources specified in an input file. Example input files are contained within the `tools` directory (`local_scope_radec.csv` and `local_scope_ztfid.csv`). After receiving either ra/dec coordinates or ZTF light curve IDs (plus an object ID for each entry), the `run-scope-local` script will generate features and run inference using existing trained models, saving the results to timestamped directories. This script accepts most arguments from `generate-features` and `scope-inference`. Additional inputs specific to this script are listed below.
 
 inputs:
 1. --path-dataset : path (from base scope directory or fully qualified) to parquet, hdf5 or csv file containing specific sources (str)
@@ -380,28 +382,28 @@ current_dt : formatted datetime string used to label output directories
 
 ### Example usage
 ```
-./run_scope_local.py --path-dataset tools/local_scope_ztfid.csv --doCPU --doRemoveTerrestrial --scale_features min_max --group-names DR16_stats nobalance_DR16_DNN_stats --algorithms xgb
+run-scope-local --path-dataset tools/local_scope_ztfid.csv --doCPU --doRemoveTerrestrial --scale_features min_max --group-names DR16_stats nobalance_DR16_DNN_stats --algorithms xgb
 
-./run_scope_local.py --path-dataset tools/local_scope_radec.csv --doCPU --write_csv --doRemoveTerrestrial --group-names DR16_stats nobalance_DR16_DNN_stats --algorithms xgb dnn
+run-scope-local --path-dataset tools/local_scope_radec.csv --doCPU --write_csv --doRemoveTerrestrial --group-names DR16_stats nobalance_DR16_DNN_stats --algorithms xgb dnn
 ```
 
-## Scope Download Classification
+## scope-download-classification
 inputs:
 1. --file : CSV file containing obj_id and/or ra dec coordinates. Set to "parse" to download sources by group id.
-2. --group_ids : target group id(s) on Fritz for download (if CSV file not provided)
+2. --group-ids : target group id(s) on Fritz for download, space-separated (if CSV file not provided)
 3. --start : Index or page number (if in "parse" mode) to begin downloading (optional)
-4. --merge_features : Flag to merge features from Kowalski with downloaded sources
-5. --features_catalog : Name of features catalog to query
-6. --features_limit : Limit on number of sources to query at once
-7. --taxonomy_map : Filename of taxonomy mapper (JSON format)
-8. --output_dir : Name of directory to save downloaded files
-9. --output_filename : Name of file containing merged classifications and features
-10. --output_format : Output format of saved files, if not specified in (9). Must be one of parquet, h5, or csv.
-11. --get_ztf_filters : Flag to add ZTF filter IDs (separate catalog query) to default features
-12. --impute_missing_features : Flag to impute missing features using scope.utils.impute_features
-13. --update_training_set : if downloading an active learning sample, update the training set with the new classification based on votes
-14. --updated_training_set_prefix : Prefix to add to updated training set file
-15. --min_vote_diff : Minimum number of net votes (upvotes - downvotes) to keep an active learning classification. Caution: if zero, all classifications of reviewed sources will be added
+4. --merge-features : Flag to merge features from Kowalski with downloaded sources
+5. --features-catalog : Name of features catalog to query
+6. --features-limit : Limit on number of sources to query at once
+7. --taxonomy-map : Filename of taxonomy mapper (JSON format)
+8. --output-dir : Name of directory to save downloaded files
+9. --output-filename : Name of file containing merged classifications and features
+10. --output-format : Output format of saved files, if not specified in (9). Must be one of parquet, h5, or csv.
+11. --get-ztf-filters : Flag to add ZTF filter IDs (separate catalog query) to default features
+12. --impute-missing-features : Flag to impute missing features using scope.utils.impute_features
+13. --update-training-set : if downloading an active learning sample, update the training set with the new classification based on votes
+14. --updated-training-set-prefix : Prefix to add to updated training set file
+15. --min-vote-diff : Minimum number of net votes (upvotes - downvotes) to keep an active learning classification. Caution: if zero, all classifications of reviewed sources will be added
 
 process:
 1. if CSV file provided, query by object ids or ra, dec
@@ -411,60 +413,60 @@ process:
 5. if merge_features, query Kowalski and merge sources with features, saving new CSV file
 6. Fritz sources with multiple associated ZTF IDs will generate multiple rows in the merged feature file
 7. To skip the source download part of the code, provide an input CSV file containing columns named 'obj_id', 'classification', 'probability', 'period_origin', 'period', 'ztf_id_origin', and 'ztf_id'.
-8. Set `--update_training_set` to read the config-specified training set and merge new sources/classifications from an active learning group
+8. Set `--update-training-set` to read the config-specified training set and merge new sources/classifications from an active learning group
 
 output: data with new columns appended.
 
 ```sh
-./scope_download_classification.py --file sample.csv --group_ids 360 361 --start 10 --merge_features True --features_catalog ZTF_source_features_DR16 --features_limit 5000 --taxonomy_map golden_dataset_mapper.json --output_dir fritzDownload --output_filename merged_classifications_features --output_format parquet -get_ztf_filters --impute_missing_features
+scope-download-classification --file sample.csv --group-ids 360 361 --start 10 --merge-features True --features-catalog ZTF_source_features_DR16 --features-limit 5000 --taxonomy-map golden_dataset_mapper.json --output-dir fritzDownload --output-filename merged_classifications_features --output-format parquet -get-ztf-filters --impute-missing-features
 ```
 
-## Scope Download GCN Sources
+## scope-download-gcn-sources
 inputs:
 1. --dateobs: unique dateObs of GCN event (str)
-2. --group_ids: group ids to query sources [all if not specified] (list)
-3. --days_range: max days past event to search for sources (float)
-4. --radius_arcsec: radius [arcsec] around new sources to search for existing ZTF sources (float)
-5. --save_filename: filename to save source ids/coordinates (str)
+2. --group-ids: group ids to query sources, space-separated [all if not specified] (list)
+3. --days-range: max days past event to search for sources (float)
+4. --radius-arcsec: radius [arcsec] around new sources to search for existing ZTF sources (float)
+5. --save-filename: filename to save source ids/coordinates (str)
 
 process:
 1. query all sources associated with GCN event
 2. get fritz names, ras and decs for each page of sources
-3. save json file in a useful format to use with `generate_features.py --doSpecificIDs`
+3. save json file in a useful format to use with `generate-features  --doSpecificIDs`
 
 ```sh
-./scope_download_gcn_sources.py --dateobs 2023-05-21T05:30:43
+scope-download-gcn-sources --dateobs 2023-05-21T05:30:43
 ```
 
-## Scope Upload Classification
+## scope-upload-classification
 inputs:
 1. --file : path to CSV, HDF5 or Parquet file containing ra, dec, period, and labels
-2. --group_ids : target group id(s) on Fritz for upload
+2. --group-ids : target group id(s) on Fritz for upload, space-separated
 3. --classification : Name(s) of input file columns containing classification probabilities (one column per label). Set this to "read" to automatically upload all classes specified in the taxonomy mapper at once.
-4. --taxonomy_map : Filename of taxonomy mapper (JSON format)
+4. --taxonomy-map : Filename of taxonomy mapper (JSON format)
 5. --comment : Comment to post (if specified)
 6. --start : Index to start uploading (zero-based)
 7. --stop : Index to stop uploading (inclusive)
-8. --classification_origin: origin of classifications. If 'SCoPe' (default), Fritz will apply custom color-coding
-9. --skip_phot : flag to skip photometry upload (skips for existing sources only)
-10. --post_survey_id : flag to post an annotation for the Gaia, AllWISE or PS1 id associated with each source
-11. --survey_id_origin : Annotation origin name for survey_id
-12. --p_threshold : Probability threshold for posted classification (values must be >= than this number to post)
-13. --match_ids : flag to match input and existing survey_id values during upload. It is recommended to instead match obj_ids (see next line)
-14. --use_existing_obj_id : flag to use existing source names in a column named 'obj_id' (a coordinate-based ID is otherwise generated by default)
-15. --post_upvote : flag to post an upvote to newly uploaded classifications. Not recommended when posting automated classifications for active learning.
-16. --check_labelled_box : flag to check the 'labelled' box for each source when uploading classifications. Not recommended when posting automated classifications for active learning.
-17. --write_obj_id : flag to output a copy of the input file with an 'obj_id' column containing the coordinate-based IDs for each posted object. Use this file as input for future uploads to add to this column.
-18. --result_dir : name of directory where upload results file is saved. Default is 'fritzUpload' within the tools directory.
-19. --result_filetag: name of tag appended to the result filename. Default is 'fritzUpload'.
-20. --result_format : result file format; one of csv, h5 or parquet. Default is parquet.
-21. --replace_classifications : flag to delete each source's existing classifications before posting new ones.
-22. --radius_arcsec: photometry search radius for uploaded sources.
-23. --no_ml: flag to post classifications that do not originate from an ML classifier.
-24. --post_phot_as_comment: flag to post photometry as a comment on the source (bool)
-25. --post_phasefolded_phot: flag to post phase-folded photometry as comment in addition to time series (bool)
-26. --phot_dirname: name of directory in which to save photometry plots (str)
-27. --instrument_name: name of instrument used for observations (str)
+8. --classification-origin: origin of classifications. If 'SCoPe' (default), Fritz will apply custom color-coding
+9. --skip-phot : flag to skip photometry upload (skips for existing sources only)
+10. --post-survey-id : flag to post an annotation for the Gaia, AllWISE or PS1 id associated with each source
+11. --survey-id-origin : Annotation origin name for survey_id
+12. --p-threshold : Probability threshold for posted classification (values must be >= than this number to post)
+13. --match-ids : flag to match input and existing survey_id values during upload. It is recommended to instead match obj_ids (see next line)
+14. --use-existing-obj-id : flag to use existing source names in a column named 'obj_id' (a coordinate-based ID is otherwise generated by default)
+15. --post-upvote : flag to post an upvote to newly uploaded classifications. Not recommended when posting automated classifications for active learning.
+16. --check-labelled-box : flag to check the 'labelled' box for each source when uploading classifications. Not recommended when posting automated classifications for active learning.
+17. --write-obj-id : flag to output a copy of the input file with an 'obj_id' column containing the coordinate-based IDs for each posted object. Use this file as input for future uploads to add to this column.
+18. --result-dir : name of directory where upload results file is saved. Default is 'fritzUpload' within the tools directory.
+19. --result-filetag: name of tag appended to the result filename. Default is 'fritzUpload'.
+20. --result-format : result file format; one of csv, h5 or parquet. Default is parquet.
+21. --replace-classifications : flag to delete each source's existing classifications before posting new ones.
+22. --radius-arcsec: photometry search radius for uploaded sources.
+23. --no-ml: flag to post classifications that do not originate from an ML classifier.
+24. --post-phot-as-comment: flag to post photometry as a comment on the source (bool)
+25. --post-phasefolded-phot: flag to post phase-folded photometry as comment in addition to time series (bool)
+26. --phot-dirname: name of directory in which to save photometry plots (str)
+27. --instrument-name: name of instrument used for observations (str)
 
 process:
 0. include Kowalski host, port, protocol, and token or username+password in config.yaml
@@ -476,14 +478,14 @@ process:
 6. (post comment to each uploaded source)
 
 ```sh
-./scope_upload_classification.py --file sample.csv --group_ids 500 250 750 --classification variable flaring --taxonomy_map map.json --comment confident --start 35 --stop 50 --skip_phot --p_threshold 0.9 --write_obj_id --result_format csv --use_existing_obj_id --post_survey_id --replace_classifications
+scope-upload-classification --file sample.csv --group-ids 500 250 750 --classification variable flaring --taxonomy-map map.json --comment confident --start 35 --stop 50 --skip-phot --p-threshold 0.9 --write-obj-id --result-format csv --use-existing-obj-id --post-survey-id --replace-classifications
 ```
 
-## Scope Manage Annotation
+## scope-manage-annotation
 inputs:
 1. --action : one of "post", "update", or "delete"
 2. --source : ZTF ID or path to .csv file with multiple objects (ID column "obj_id")
-3. --target : group id(s) on Fritz
+3. --group-ids : target group id(s) on Fritz, space-separated
 4. --origin : name of annotation
 5. --key : name of annotation
 6. --value : value of annotation (required for "post" and "update" - if source is a .csv file, value will auto-populate from `source[key]`)
@@ -494,10 +496,10 @@ process:
 3. confirm changes with printed messages
 
 ```sh
-./scope_manage_annotation.py --action post --source sample.csv --group_ids 200 300 400 --origin revisedperiod --key period
+scope-manage-annotation --action post --source sample.csv --group_ids 200 300 400 --origin revisedperiod --key period
 ```
 
-## Scope Upload Disagreements
+## Scope Upload Disagreements (deprecated)
 inputs:
 1. dataset
 2. group id on Fritz
diff --git a/gcn_cronjob.py b/gcn_cronjob.py
index 3c89cb69..29ed47f6 100755
--- a/gcn_cronjob.py
+++ b/gcn_cronjob.py
@@ -5,21 +5,20 @@
 from datetime import datetime, timedelta
 import argparse
 import pathlib
-import yaml
 from tools.scope_download_gcn_sources import download_gcn_sources
 import os
-from scope.utils import read_parquet
+from scope.utils import read_parquet, parse_load_config
 import numpy as np
 import warnings
 import json
+from scope.scope_class import Scope
+from tools.combine_preds import combine_preds
+from tools.scope_upload_classification import upload_classification
 
 
-BASE_DIR = pathlib.Path(__file__).parent.absolute()
 NUM_PER_PAGE = 100
-
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 
 def query_gcn_events(
@@ -28,24 +27,23 @@ def query_gcn_events(
     post_group_ids: list = [1544],
     days_range: float = 7.0,
     radius_arcsec: float = 0.5,
-    save_filename: str = 'tools/fritzDownload/specific_ids_GCN_sources',
+    save_filename: str = 'fritzDownload/specific_ids_GCN_sources',
     taxonomy_map: str = 'tools/fritz_mapper.json',
     combined_preds_dirname: str = 'GCN_dnn_xgb',
     dateobs: str = None,
     p_threshold: float = 0.7,
     username: str = 'bhealy',
-    generated_features_dirname: str = 'generated_features_gcn_sources',
+    generated_features_dirname: str = 'generated_features_GCN_sources',
     partition: str = 'gpu-debug',
     doNotPost: bool = False,
     agg_method: str = 'mean',
     dnn_preds_directory: str = 'GCN_dnn',
     xgb_preds_directory: str = 'GCN_xgb',
-    path_to_python: str = '~/miniforge3/envs/scope-env/bin/python',
     checkpoint_filename: str = 'gcn_sources_checkpoint.json',
     checkpoint_refresh_days: float = 180.0,
     ignore_checkpoint: bool = False,
 ):
-
+    scope = Scope()
     currentDate = datetime.utcnow()
     current_dt = currentDate.strftime("%Y-%m-%dT%H:%M:%S")
 
@@ -133,7 +131,6 @@ def query_gcn_events(
                 # EM+GW group on Fritz
                 post_group_ids.append(1544)
 
-        post_group_ids_str = "".join([f"{x} " for x in post_group_ids]).strip()
         print(f'Running for event {dateobs}...')
 
         # Colons can confuse the file system; replace them for saving
@@ -141,8 +138,7 @@ def query_gcn_events(
 
         # Check for existing sources file
         filepath = (
-            BASE_DIR
-            / f'tools/fritzDownload/specific_ids_GCN_sources.{save_dateobs}.parquet'
+            BASE_DIR / f'fritzDownload/specific_ids_GCN_sources.{save_dateobs}.parquet'
         )
         if filepath.exists():
             existing_sources = read_parquet(filepath)
@@ -251,34 +247,77 @@ def query_gcn_events(
                         print(
                             "Consolidating DNN and XGB classification results for Fritz..."
                         )
-                        os.system(
-                            f"{path_to_python} {BASE_DIR}/scope.py select_fritz_sample --fields='{save_dateobs}_specific_ids' --group='DR16' --algorithm='xgb' \
-                                --probability_threshold=0 --consol_filename='inference_results_{save_dateobs}' --al_directory='GCN' \
-                                --al_filename='GCN_sources_{save_dateobs}' --write_consolidation_results --select_top_n --doAllSources --write_csv"
-                        )
 
-                        os.system(
-                            f"{path_to_python} {BASE_DIR}/scope.py select_fritz_sample --fields='{save_dateobs}_specific_ids' --group='nobalance_DR16_DNN' --algorithm='dnn' \
-                                --probability_threshold=0 --consol_filename='inference_results_{save_dateobs}' --al_directory='GCN' \
-                                --al_filename='GCN_sources_{save_dateobs}' --write_consolidation_results --select_top_n --doAllSources --write_csv"
-                        )
+                        try:
+                            generator = scope.select_fritz_sample(
+                                fields=[f"{save_dateobs}_specific_ids"],
+                                group="DR16_importance",
+                                algorithm="xgb",
+                                probability_threshold=0.0,
+                                consol_filename=f"inference_results_{save_dateobs}",
+                                al_directory="GCN",
+                                al_filename=f"GCN_sources_{save_dateobs}",
+                                write_consolidation_results=True,
+                                select_top_n=True,
+                                doAllSources=True,
+                                write_csv=True,
+                            )
+                            [x for x in generator]
+
+                            generator = scope.select_fritz_sample(
+                                fields=[f"{save_dateobs}_specific_ids"],
+                                group="nobalance_DR16_DNN",
+                                algorithm="dnn",
+                                probability_threshold=0.0,
+                                consol_filename=f"inference_results_{save_dateobs}",
+                                al_directory="GCN",
+                                al_filename=f"GCN_sources_{save_dateobs}",
+                                write_consolidation_results=True,
+                                select_top_n=True,
+                                doAllSources=True,
+                                write_csv=True,
+                            )
+                            [x for x in generator]
+
+                        except Exception as e:
+                            print(f"Exception raised during select_fritz_sample: {e}")
 
                         print("Combining DNN and XGB preds...")
-                        os.system(
-                            f"{path_to_python} {BASE_DIR}/tools/combine_preds.py --dateobs {save_dateobs} --combined_preds_dirname {combined_preds_dirname}/{save_dateobs} \
-                                  --merge_dnn_xgb --write_csv --p_threshold {p_threshold} --agg_method {agg_method} --dnn_directory {dnn_preds_directory} \
-                                  --xgb_directory {xgb_preds_directory}"
-                        )
+
+                        try:
+                            combine_preds(
+                                dateobs=save_dateobs,
+                                combined_preds_dirname=f"{combined_preds_dirname}/{save_dateobs}",
+                                merge_dnn_xgb=True,
+                                write_csv=True,
+                                p_threshold=p_threshold,
+                                agg_method=agg_method,
+                                dnn_directory=dnn_preds_directory,
+                                xgb_directory=xgb_preds_directory,
+                            )
+                        except Exception as e:
+                            print(f"Exception raised during combine_preds: {e}")
 
                     if not doNotPost:
                         print(
                             f"Uploading classifications with p > {p_threshold}. Posting light curves as comments."
                         )
-                        os.system(
-                            f"{path_to_python} {BASE_DIR}/tools/scope_upload_classification.py --file {BASE_DIR}/{combined_preds_dirname}/{save_dateobs}/merged_GCN_sources_{save_dateobs}.parquet \
-                                --classification read --taxonomy_map {BASE_DIR}/{taxonomy_map} --skip_phot --use_existing_obj_id --group_ids {post_group_ids_str} --radius_arcsec {radius_arcsec} \
-                                --p_threshold {p_threshold} --post_phot_as_comment --post_phasefolded_phot"
-                        )
+
+                        try:
+                            upload_classification(
+                                file=f"{BASE_DIR}/{combined_preds_dirname}/{save_dateobs}/merged_GCN_sources_{save_dateobs}.parquet",
+                                classification="read",
+                                taxonomy_map=f"{BASE_DIR}/{taxonomy_map}",
+                                skip_phot=True,
+                                use_existing_obj_id=True,
+                                group_ids=post_group_ids,
+                                radius_arcsec=radius_arcsec,
+                                p_threshold=p_threshold,
+                                post_phot_as_comment=True,
+                                post_phasefolded_phot=True,
+                            )
+                        except Exception as e:
+                            print(f"Exception raised during upload_classification: {e}")
 
                     print(f"Finished for {dateobs}.")
 
@@ -296,7 +335,7 @@ def query_gcn_events(
         json.dump(checkpoint_dict, f)
 
 
-if __name__ == '__main__':
+def get_parser():
     parser = argparse.ArgumentParser()
 
     parser.add_argument(
@@ -306,45 +345,45 @@ def query_gcn_events(
         help="Number of days before today to query GCN events",
     )
     parser.add_argument(
-        "--query_group_ids",
+        "--query-group-ids",
         type=int,
         nargs='+',
         default=[],
         help="group ids to query sources (all if not specified)",
     )
     parser.add_argument(
-        "--post_group_ids",
+        "--post-group-ids",
         type=int,
         nargs='+',
         default=[1544],
         help="group ids to post source classifications (EM+GW group if not specified)",
     )
     parser.add_argument(
-        "--days_range",
+        "--days-range",
         type=float,
         default=7.0,
         help="max days past event to search for sources",
     )
     parser.add_argument(
-        "--radius_arcsec",
+        "--radius-arcsec",
         type=float,
         default=0.5,
         help="radius around new sources to search for existing ZTF sources",
     )
     parser.add_argument(
-        "--save_filename",
+        "--save-filename",
         type=str,
-        default='tools/fritzDownload/specific_ids_GCN_sources',
+        default='fritzDownload/specific_ids_GCN_sources',
         help="filename to save source ids/coordinates",
     )
     parser.add_argument(
-        "--taxonomy_map",
+        "--taxonomy-map",
         type=str,
         default='tools/fritz_mapper.json',
         help="path to taxonomy map for uploading classifications to Fritz",
     )
     parser.add_argument(
-        "--combined_preds_dirname",
+        "--combined-preds-dirname",
         type=str,
         default='GCN_dnn_xgb',
         help="dirname in which to save combined preds files",
@@ -356,7 +395,7 @@ def query_gcn_events(
         help="If querying specific dateobs, specify here to override daysAgo.",
     )
     parser.add_argument(
-        "--p_threshold",
+        "--p-threshold",
         type=float,
         default=0.7,
         help="minimum classification probability to post to Fritz",
@@ -368,7 +407,7 @@ def query_gcn_events(
         help="Username for compute resources (e.g. Expanse)",
     )
     parser.add_argument(
-        "--generated_features_dirname",
+        "--generated-features-dirname",
         type=str,
         default='generated_features_GCN_sources',
         help="dirname containing generated GCN source features",
@@ -385,48 +424,47 @@ def query_gcn_events(
         help="If set, run analysis but do not post classifications. Useful for testing",
     )
     parser.add_argument(
-        "--agg_method",
+        "--agg-method",
         type=str,
         default='mean',
         help="Aggregation method for classification probabilities (mean or max)",
     )
     parser.add_argument(
-        "--dnn_preds_directory",
+        "--dnn-preds-directory",
         type=str,
         default='GCN_dnn',
         help="dirname in which dnn preds are saved",
     )
     parser.add_argument(
-        "--xgb_preds_directory",
+        "--xgb-preds-directory",
         type=str,
         default='GCN_xgb',
         help="dirname in which xgb preds preds are saved",
     )
     parser.add_argument(
-        "--path_to_python",
-        type=str,
-        default='~/miniforge3/envs/scope-env/bin/python',
-        help="path to python within scope environment (run 'which python' while your scope environment is active to find)",
-    )
-    parser.add_argument(
-        "--checkpoint_filename",
+        "--checkpoint-filename",
         type=str,
         default='gcn_sources_checkpoint.json',
         help="filename containing source ids already classified",
     )
     parser.add_argument(
-        "--checkpoint_refresh_days",
+        "--checkpoint-refresh-days",
         type=float,
         default=180.0,
         help="days after checkpoint start_date to delete json file and re-generate",
     )
     parser.add_argument(
-        "--ignore_checkpoint",
+        "--ignore-checkpoint",
         action='store_true',
         help="If set, ignore current classified sources listed in checkpoint file (bool)",
     )
 
-    args = parser.parse_args()
+    return parser
+
+
+if __name__ == "__main__":
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     query_gcn_events(
         daysAgo=args.daysAgo,
@@ -446,7 +484,6 @@ def query_gcn_events(
         agg_method=args.agg_method,
         dnn_preds_directory=args.dnn_preds_directory,
         xgb_preds_directory=args.xgb_preds_directory,
-        path_to_python=args.path_to_python,
         checkpoint_filename=args.checkpoint_filename,
         checkpoint_refresh_days=args.checkpoint_refresh_days,
         ignore_checkpoint=args.ignore_checkpoint,
diff --git a/tools/kowalski_query_examples.ipynb b/kowalski_query_examples.ipynb
similarity index 100%
rename from tools/kowalski_query_examples.ipynb
rename to kowalski_query_examples.ipynb
diff --git a/pyproject.toml b/pyproject.toml
index 0324e470..ff94686e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,90 @@
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
+
 [tool.black]
-target-version = ['py37', 'py38']
+target-version = ['py39', 'py310', 'py311']
 skip-string-normalization = true
+
+[tool.poetry]
+name = "scope-ml"
+version = "0.9.0"
+description = "SCoPe: ZTF Source Classification Project"
+readme = "README.md"
+authors = ["Brian F. Healy, Michael W. Coughlin, Ashish A. Mahabal, Theophile J. du Laz, Andrew Drake, Matthew J. Graham, Lynne A. Hillenbrand, Jan van Roestel, Paula Szkody et al."]
+maintainers = ["Brian F. Healy <healyb@umn.edu>"]
+license = "MIT"
+repository = "https://github.com/ZwickyTransientFacility/scope"
+documentation = "https://zwickytransientfacility.github.io/scope-docs/"
+packages = [
+    {include = "scope"},
+    {include = "tools"},
+]
+exclude = ["**/*.parquet", "**/*.csv", "**/*.ipynb", "**/*.json", "**/*.h5", "**/*.yaml", "**/*.html", "**/*.txt"]
+include = ["config.defaults.yaml", "tools/golden_dataset_mapper.json", "tools/fritz_mapper.json", "tools/DNN_AL_mapper.json", "tools/XGB_AL_mapper.json", "tools/local_scope_ztfid.csv", "tools/local_scope_radec.csv"]
+
+[tool.poetry.dependencies]
+python = "^3.9, <3.12"
+deepdiff = ">=5.0"
+gsutil = ">=4.60"
+keras-tuner = ">=1.0.2"
+matplotlib = ">=3.3"
+questionary = ">=1.8.1"
+scikit-learn = ">=0.24.1"
+tensorflow = ">=2.14.0,<=2.15.0"
+wandb = ">=0.12.1"
+h5py = ">=3.10.0"
+astropy = ">=5.2.2"
+fast-histogram = ">=0.11"
+healpy = ">=1.16.2"
+jinja2 = "<=3.1"
+myst-parser = ">=0.18.1"
+pandas = ">=1.2"
+penquins = ">=2.3.1"
+pyyaml = ">=5.3.1"
+tdtax = ">=0.1.6"
+pyarrow = ">=9.0.0"
+numba = ">=0.56.4"
+numpy = ">=1.23,<1.24"
+cesium = ">=0.11.1"
+xgboost = ">=1.7.5"
+seaborn = ">=0.12.2"
+pydot = ">=1.4.2"
+notebook = ">=7.0.6"
+tables = ">=3.7,<3.9.2"
+
+[tool.poetry.dev-dependencies]
+pre-commit = ">=3.5.0"
+pytest = ">=6.1.2"
+sphinx = ">=4.2"
+sphinx-press-theme = ">=0.8.0"
+poetry = ">=1.7.1"
+
+[tool.poetry.scripts]
+scope-initialize = "scope.__init__:initialize"
+scope-develop = "scope._instantiate:develop"
+scope-lint = "scope.scope_class:Scope.lint"
+scope-doc = "scope._instantiate:doc"
+scope-train = "scope._instantiate:train"
+create-training-script = "scope._instantiate:create_training_script"
+assemble-training-stats = "scope._instantiate:assemble_training_stats"
+create-inference-script = "scope._instantiate:create_inference_script"
+select-fritz-sample = "scope._instantiate:select_fritz_sample"
+scope-test-limited = "scope._instantiate:test_limited"
+scope-test = "scope._instantiate:test"
+scope-download-classification = "tools.scope_download_classification:main"
+scope-upload-classification = "tools.scope_upload_classification:main"
+scope-manage-annotation = "tools.scope_manage_annotation:main"
+post-taxonomy = "tools.taxonomy:main"
+generate-features = "tools.generate_features:main"
+generate-features-slurm = "tools.generate_features_slurm:main"
+generate-features-job-submission = "tools.generate_features_job_submission:main"
+train-algorithm-slurm = "tools.train_algorithm_slurm:main"
+train-algorithm-job-submission = "tools.train_algorithm_job_submission:main"
+run-inference = "tools.inference:main"
+run-inference-slurm = "tools.run_inference_slurm:main"
+run-inference-job-submission = "tools.run_inference_job_submission:main"
+combine-preds = "tools.combine_preds:main"
+get-quad-ids = "tools.get_quad_ids:main"
+run-scope-local = "tools.run_scope_local:main"
+analyze-logs = "tools.analyze_logs:main"
diff --git a/requirements.txt b/requirements.txt
index 1c61c5a5..966080d2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1,27 @@
--r .requirements/dev.txt
--r .requirements/doc.txt
+h5py>=3.10.0
+astropy>=5.2.2
+fast-histogram>=0.11
+healpy>=1.16.2
+Jinja2<=3.1
+myst-parser>=0.18.1
+pandas>=1.2
+penquins>=2.3.1
+pyyaml>=5.3.1
+tdtax>=0.1.6
+tables>=3.7,<3.9.2
+pyarrow>=9.0.0
+numba>=0.56.4
+numpy>=1.23,<1.24
+cesium>=0.11.1
+xgboost>=1.7.5
+seaborn>=0.12.2
+pydot>=1.4.2
+notebook>=7.0.6
+deepdiff>=5.0
+gsutil>=4.60
+keras-tuner>=1.0.2
+matplotlib>=3.3
+questionary>=1.8.1
+scikit-learn>=0.24.1
+tensorflow>=2.14.0,<=2.15.0
+wandb>=0.12.1
diff --git a/scope/__init__.py b/scope/__init__.py
index 0ffb151c..8956345c 100644
--- a/scope/__init__.py
+++ b/scope/__init__.py
@@ -1,13 +1,12 @@
-from .nn import *
-from .utils import *
-from .models import *
-from .fritz import *
+import shutil
+import os
+import site
 
 # Below code adapted from https://github.com/skyportal/skyportal/blob/main/skyportal/__init__.py
 # 2022-10-18
-__version__ = '0.5.dev0'
+__version__ = "0.9.0"
 
-if 'dev' in __version__:
+if "dev" in __version__:
     # Append last commit date and hash to dev version information, if available
 
     import subprocess
@@ -15,7 +14,7 @@
 
     try:
         p = subprocess.Popen(
-            ['git', 'log', '-1', '--format="%h %aI"'],
+            ["git", "log", "-1", '--format="%h %aI"'],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             cwd=os.path.dirname(__file__),
@@ -26,15 +25,67 @@
         out, err = p.communicate()
         if p.returncode == 0:
             git_hash, git_date = (
-                out.decode('utf-8')
+                out.decode("utf-8")
                 .strip()
-                .replace('"', '')
-                .split('T')[0]
-                .replace('-', '')
+                .replace('"', "")
+                .split("T")[0]
+                .replace("-", "")
                 .split()
             )
 
-            __version__ = '+'.join(
-                [tag for tag in __version__.split('+') if not tag.startswith('git')]
+            __version__ = "+".join(
+                [tag for tag in __version__.split("+") if not tag.startswith("git")]
             )
-            __version__ += f'+git{git_date}.{git_hash}'
+            __version__ += f"+git{git_date}.{git_hash}"
+
+
+def initialize():
+    """create directories, copy config and data files"""
+    main_dir = "scope"
+    scope_dirs = ["tools"]
+    os.makedirs(main_dir, exist_ok=True)
+    for directory in scope_dirs:
+        os.makedirs(f"{main_dir}/{directory}", exist_ok=True)
+
+    site_packages_path = site.getsitepackages()[0]
+    default_config_name = "config.defaults.yaml"
+    copied_config_name = "config.yaml"
+    tools_dir = "tools"
+    mappers = [
+        "golden_dataset_mapper.json",
+        "fritz_mapper.json",
+        "DNN_AL_mapper.json",
+        "XGB_AL_mapper.json",
+        "local_scope_ztfid.csv",
+        "local_scope_radec.csv",
+    ]
+
+    print()
+    # Copy config defaults to new directory strucutre if needed
+    if not os.path.exists(f"{main_dir}/{copied_config_name}"):
+        shutil.copy(
+            f"{site_packages_path}/{default_config_name}",
+            f"{main_dir}/{default_config_name}",
+        )
+        shutil.copy(
+            f"{site_packages_path}/{default_config_name}",
+            f"{main_dir}/{copied_config_name}",
+        )
+        print(
+            f"Created new '{copied_config_name}' config file. Please customize/add tokens there before running scope."
+        )
+    else:
+        print(
+            f"Warning: {copied_config_name} already exists in the '{main_dir}' directory."
+        )
+
+    print()
+    for mapper in mappers:
+        print(f"Copying default data '{mapper}' to '{main_dir}/{tools_dir}'")
+        shutil.copy(
+            f"{site_packages_path}/{tools_dir}/{mapper}",
+            f"{main_dir}/{tools_dir}/{mapper}",
+        )
+
+    print()
+    print(f"scope-ml initialized. Run scripts from '{main_dir}' directory.")
diff --git a/scope/_instantiate.py b/scope/_instantiate.py
new file mode 100644
index 00000000..ce0b4052
--- /dev/null
+++ b/scope/_instantiate.py
@@ -0,0 +1,40 @@
+# For use by pip-installed scope package
+from scope.scope_class import Scope
+
+scope = Scope()
+
+
+def develop():
+    scope.develop()
+
+
+def doc():
+    scope.doc()
+
+
+def train():
+    scope.parse_run_train()
+
+
+def create_training_script():
+    scope.parse_run_create_training_script()
+
+
+def assemble_training_stats():
+    scope.parse_run_assemble_training_stats()
+
+
+def create_inference_script():
+    scope.parse_run_create_inference_script()
+
+
+def select_fritz_sample():
+    scope.parse_run_select_fritz_sample()
+
+
+def test_limited():
+    scope.test_limited()
+
+
+def test():
+    scope.parse_run_test()
diff --git a/scope/fritz.py b/scope/fritz.py
index bd6252a1..0df37fea 100755
--- a/scope/fritz.py
+++ b/scope/fritz.py
@@ -1,19 +1,17 @@
 import urllib
 import requests
-import pathlib
-import yaml
 import time
 from typing import Optional, Mapping
 import numpy as np
 import pandas as pd
 from requests.exceptions import InvalidJSONError, JSONDecodeError
 from urllib3.exceptions import ProtocolError
-
+from scope.utils import parse_load_config
 
 # define the baseurl and set the fritz token to connect
-config_path = pathlib.Path(__file__).parent.parent.absolute() / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+
+config = parse_load_config()
+
 BASE_URL = f"{config['fritz']['protocol']}://{config['fritz']['host']}/"
 MAX_ATTEMPTS = config['fritz']['max_attempts']
 SLEEP_TIME = config['fritz']['sleep_time']
diff --git a/scope/nn.py b/scope/nn.py
index c7193077..b23b7fe2 100644
--- a/scope/nn.py
+++ b/scope/nn.py
@@ -11,7 +11,7 @@
     auc,
     precision_recall_curve,
 )
-from scope.utils import make_confusion_matrix, plot_roc, plot_pr
+from .utils import make_confusion_matrix, plot_roc, plot_pr
 import numpy as np
 import wandb
 import json
@@ -269,43 +269,43 @@ def build_model(
 
         # fixme: for now, simply use Keras' Functional API
         if (not dense_branch) and (not conv_branch):
-            raise ValueError('model must have at least one branch')
+            raise ValueError("model must have at least one branch")
 
         features_input = tf.keras.Input(
-            shape=kwargs.get("features_input_shape", (40,)), name='features'
+            shape=kwargs.get("features_input_shape", (40,)), name="features"
         )
         dmdt_input = tf.keras.Input(
-            shape=kwargs.get("dmdt_input_shape", (26, 26, 1)), name='dmdt'
+            shape=kwargs.get("dmdt_input_shape", (26, 26, 1)), name="dmdt"
         )
 
         # dense branch to digest features
         if dense_branch:
-            x_dense = tf.keras.layers.Dense(256, activation='relu', name='dense_fc_1')(
+            x_dense = tf.keras.layers.Dense(256, activation="relu", name="dense_fc_1")(
                 features_input
             )
             x_dense = tf.keras.layers.Dropout(0.25)(x_dense)
-            x_dense = tf.keras.layers.Dense(32, activation='relu', name='dense_fc_2')(
+            x_dense = tf.keras.layers.Dense(32, activation="relu", name="dense_fc_2")(
                 x_dense
             )
 
         # CNN branch to digest dmdt
         if conv_branch:
             x_conv = tf.keras.layers.SeparableConv2D(
-                16, (3, 3), activation='relu', name='conv_conv_1'
+                16, (3, 3), activation="relu", name="conv_conv_1"
             )(dmdt_input)
             # x_conv = tf.keras.layers.Dropout(0.25)(x_conv)
             x_conv = tf.keras.layers.SeparableConv2D(
-                16, (3, 3), activation='relu', name='conv_conv_2'
+                16, (3, 3), activation="relu", name="conv_conv_2"
             )(x_conv)
             x_conv = tf.keras.layers.Dropout(0.25)(x_conv)
             x_conv = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x_conv)
 
             x_conv = tf.keras.layers.SeparableConv2D(
-                32, (3, 3), activation='relu', name='conv_conv_3'
+                32, (3, 3), activation="relu", name="conv_conv_3"
             )(x_conv)
             # x_conv = tf.keras.layers.Dropout(0.25)(x_conv)
             x_conv = tf.keras.layers.SeparableConv2D(
-                32, (3, 3), activation='relu', name='conv_conv_4'
+                32, (3, 3), activation="relu", name="conv_conv_4"
             )(x_conv)
             x_conv = tf.keras.layers.Dropout(0.25)(x_conv)
             # x_conv = tf.keras.layers.MaxPooling2D(pool_size=(2, 2))(x_conv)
@@ -322,10 +322,10 @@ def build_model(
         x = tf.keras.layers.Dropout(0.4)(x)
 
         # one more dense layer?
-        x = tf.keras.layers.Dense(16, activation='relu', name='fc_1')(x)
+        x = tf.keras.layers.Dense(16, activation="relu", name="fc_1")(x)
 
         # Logistic regression to output the final score
-        x = tf.keras.layers.Dense(1, activation='sigmoid', name='score')(x)
+        x = tf.keras.layers.Dense(1, activation="sigmoid", name="score")(x)
 
         m = tf.keras.Model(inputs=[features_input, dmdt_input], outputs=x)
 
@@ -385,13 +385,13 @@ def assign_datasets(
         val_dataset,
         wandb_token,
     ):
-        self.meta['features_input_shape'] = features_input_shape
-        self.meta['train_dataset_repeat'] = train_dataset_repeat
-        self.meta['val_dataset_repeat'] = val_dataset_repeat
-        self.meta['steps_per_epoch_train'] = steps_per_epoch_train
-        self.meta['steps_per_epoch_val'] = steps_per_epoch_val
-        self.meta['train_dataset'] = train_dataset
-        self.meta['val_dataset'] = val_dataset
+        self.meta["features_input_shape"] = features_input_shape
+        self.meta["train_dataset_repeat"] = train_dataset_repeat
+        self.meta["val_dataset_repeat"] = val_dataset_repeat
+        self.meta["steps_per_epoch_train"] = steps_per_epoch_train
+        self.meta["steps_per_epoch_val"] = steps_per_epoch_val
+        self.meta["train_dataset"] = train_dataset
+        self.meta["val_dataset"] = val_dataset
 
         wandb.login(key=wandb_token)
 
@@ -399,7 +399,7 @@ def sweep(
         self,
     ):
         wandb.init(
-            job_type='sweep',
+            job_type="sweep",
         )
 
         wandb_epochs = wandb.config.epochs
@@ -420,7 +420,7 @@ def sweep(
         wandb_decay = wandb.config.decay
 
         self.setup(
-            features_input_shape=self.meta['features_input_shape'],
+            features_input_shape=self.meta["features_input_shape"],
             dense_branch=wandb_dense_branch,
             conv_branch=wandb_conv_branch,
             dmdt_input_shape=(26, 26, 1),
@@ -440,36 +440,36 @@ def sweep(
         )
 
         self.train(
-            train_dataset=self.meta['train_dataset_repeat'],
-            val_dataset=self.meta['val_dataset_repeat'],
-            steps_per_epoch_train=self.meta['steps_per_epoch_train'],
-            steps_per_epoch_val=self.meta['steps_per_epoch_val'],
+            train_dataset=self.meta["train_dataset_repeat"],
+            val_dataset=self.meta["val_dataset_repeat"],
+            steps_per_epoch_train=self.meta["steps_per_epoch_train"],
+            steps_per_epoch_val=self.meta["steps_per_epoch_val"],
             epochs=wandb_epochs,
         )
 
-        stats_train = self.evaluate(self.meta['train_dataset'], name='train', verbose=0)
-        stats_val = self.evaluate(self.meta['val_dataset'], name='val', verbose=0)
+        stats_train = self.evaluate(self.meta["train_dataset"], name="train", verbose=0)
+        stats_val = self.evaluate(self.meta["val_dataset"], name="val", verbose=0)
 
         wandb.log(
             {
-                'dense_branch': wandb_dense_branch,
-                'conv_branch': wandb_conv_branch,
-                'loss': wandb_loss,
-                'optimizer': wandb_optimizer,
-                'lr': wandb_lr,
-                'momentum': wandb_momentum,
-                'monitor': wandb_monitor,
-                'patience': wandb_patience,
-                'callbacks': wandb_callbacks,
-                'run_eagerly': wandb_run_eagerly,
-                'beta_1': wandb_beta_1,
-                'beta_2': wandb_beta_2,
-                'epsilon': wandb_epsilon,
-                'amsgrad': wandb_amsgrad,
-                'decay': wandb_decay,
-                'epochs': wandb_epochs,
-                'train_loss': stats_train[0],
-                'val_loss': stats_val[0],
+                "dense_branch": wandb_dense_branch,
+                "conv_branch": wandb_conv_branch,
+                "loss": wandb_loss,
+                "optimizer": wandb_optimizer,
+                "lr": wandb_lr,
+                "momentum": wandb_momentum,
+                "monitor": wandb_monitor,
+                "patience": wandb_patience,
+                "callbacks": wandb_callbacks,
+                "run_eagerly": wandb_run_eagerly,
+                "beta_1": wandb_beta_1,
+                "beta_2": wandb_beta_2,
+                "epsilon": wandb_epsilon,
+                "amsgrad": wandb_amsgrad,
+                "decay": wandb_decay,
+                "epochs": wandb_epochs,
+                "train_loss": stats_train[0],
+                "val_loss": stats_val[0],
             }
         )
 
@@ -499,14 +499,14 @@ def train(
             verbose=verbose,
         )
 
-    def evaluate(self, eval_dataset, name='test', **kwargs):
+    def evaluate(self, eval_dataset, name="test", **kwargs):
         y_eval = np.concatenate([y for _, y in eval_dataset], axis=0)
         y_pred = np.around(self.predict(eval_dataset, name=f"_{name}", **kwargs))
 
-        self.meta[f'y_{name}'] = y_eval
+        self.meta[f"y_{name}"] = y_eval
 
         # Generate confusion matrix
-        self.meta[f'cm_{name}'] = confusion_matrix(y_eval, y_pred, normalize='all')
+        self.meta[f"cm_{name}"] = confusion_matrix(y_eval, y_pred, normalize="all")
 
         return self.model.evaluate(eval_dataset, **kwargs)
 
@@ -514,9 +514,9 @@ def predict(self, X, name=None, **kwargs):
         y_pred = self.model.predict(X)
 
         if name is not None:
-            self.meta[f'y_pred{name}'] = y_pred
+            self.meta[f"y_pred{name}"] = y_pred
         else:
-            self.meta['y_pred'] = y_pred
+            self.meta["y_pred"] = y_pred
 
         return y_pred
 
@@ -534,7 +534,7 @@ def save(
         output_path: str = "./",
         output_format: str = "h5",
         plot: bool = False,
-        names: list = ['train', 'val', 'test'],
+        names: list = ["train", "val", "test"],
         cm_include_count: bool = False,
         cm_include_percent: bool = True,
         annotate_scores: bool = False,
@@ -549,8 +549,8 @@ def save(
             output_path.mkdir(parents=True, exist_ok=True)
 
         output_name = self.name if not tag else tag
-        if not output_name.endswith('.h5'):
-            output_name += '.h5'
+        if not output_name.endswith(".h5"):
+            output_name += ".h5"
         self.model.save(output_path / output_name, save_format=output_format)
 
         stats_dct = {}
@@ -561,48 +561,48 @@ def save(
                 path = output_path / f"{tag}_plots" / name
                 if not path.exists():
                     path.mkdir(parents=True, exist_ok=True)
-                cmpdf = tag + '_cm.pdf'
-                recallpdf = tag + '_recall.pdf'
-                rocpdf = tag + '_roc.pdf'
-                stats_json = tag + '_stats.json'
+                cmpdf = tag + "_cm.pdf"
+                recallpdf = tag + "_recall.pdf"
+                rocpdf = tag + "_roc.pdf"
+                stats_json = tag + "_stats.json"
 
-                if self.meta[f'cm_{name}'] is not None:
-                    cname = tag.split('.')[0]
+                if self.meta[f"cm_{name}"] is not None:
+                    cname = tag.split(".")[0]
                     accuracy, precision, recall, f1_score = make_confusion_matrix(
-                        self.meta[f'cm_{name}'],
+                        self.meta[f"cm_{name}"],
                         figsize=(8, 6),
                         cbar=False,
                         count=cm_include_count,
                         percent=cm_include_percent,
-                        categories=['not ' + cname, cname],
+                        categories=["not " + cname, cname],
                         annotate_scores=annotate_scores,
                     )
-                    stats_dct['accuracy'] = accuracy
-                    stats_dct['precision'] = precision
-                    stats_dct['recall'] = recall
-                    stats_dct['f1_score'] = f1_score
-                    sns.set_context('talk')
+                    stats_dct["accuracy"] = accuracy
+                    stats_dct["precision"] = precision
+                    stats_dct["recall"] = recall
+                    stats_dct["f1_score"] = f1_score
+                    sns.set_context("talk")
                     plt.title(cname)
-                    plt.savefig(path / cmpdf, bbox_inches='tight')
+                    plt.savefig(path / cmpdf, bbox_inches="tight")
                     plt.close()
 
-                y_compare = self.meta.get(f'y_{name}', None)
-                y_pred = self.meta.get(f'y_pred_{name}', None)
+                y_compare = self.meta.get(f"y_{name}", None)
+                y_pred = self.meta.get(f"y_pred_{name}", None)
 
                 if (y_compare is not None) & (y_pred is not None):
 
                     fpr, tpr, _ = roc_curve(y_compare, y_pred)
                     roc_auc = auc(fpr, tpr)
                     precision, recall, _ = precision_recall_curve(y_compare, y_pred)
-                    stats_dct['roc_auc'] = roc_auc
+                    stats_dct["roc_auc"] = roc_auc
 
                     plot_roc(fpr, tpr, roc_auc)
-                    plt.savefig(path / rocpdf, bbox_inches='tight')
+                    plt.savefig(path / rocpdf, bbox_inches="tight")
                     plt.close()
 
                     plot_pr(recall, precision)
-                    plt.savefig(path / recallpdf, bbox_inches='tight')
+                    plt.savefig(path / recallpdf, bbox_inches="tight")
                     plt.close()
 
-                with open(path / stats_json, 'w') as f:
+                with open(path / stats_json, "w") as f:
                     json.dump(stats_dct, f)
diff --git a/scope.py b/scope/scope_class.py
similarity index 55%
rename from scope.py
rename to scope/scope_class.py
index 840e0a7b..b9ac4fa6 100755
--- a/scope.py
+++ b/scope/scope_class.py
@@ -1,30 +1,26 @@
 #!/usr/bin/env python
 from contextlib import contextmanager
 import datetime
-from deepdiff import DeepDiff
-import fire
 import numpy as np
 import os
 import pandas as pd
 import pathlib
 from penquins import Kowalski
-from pprint import pprint
-import questionary
 import subprocess
 import sys
 import tdtax
 from typing import Optional, Sequence, Union
-import yaml
-from scope.utils import (
+from .utils import (
     forgiving_true,
-    load_config,
     read_hdf,
     read_parquet,
     write_parquet,
+    parse_load_config,
 )
-from scope.fritz import radec_to_iau_name
+from .fritz import radec_to_iau_name
 import json
 import shutil
+import argparse
 
 
 @contextmanager
@@ -46,56 +42,15 @@ def status(message):
         print(f"\r[✓] {message}")
 
 
-def check_configs(config_wildcards: Sequence = ("config.*yaml",)):
-    """
-    - Check if config files exist
-    - Offer to use the config files that match the wildcards
-    - For config.yaml, check its contents against the defaults to make sure nothing is missing/wrong
-
-    :param config_wildcards:
-    :return:
-    """
-    path = pathlib.Path(__file__).parent.absolute()
-
-    for config_wildcard in config_wildcards:
-        config = config_wildcard.replace("*", "")
-        # use config defaults if configs do not exist?
-        if not (path / config).exists():
-            answer = questionary.select(
-                f"{config} does not exist, do you want to use one of the following"
-                " (not recommended without inspection)?",
-                choices=[p.name for p in path.glob(config_wildcard)],
-            ).ask()
-            subprocess.run(["cp", f"{path / answer}", f"{path / config}"])
-
-        # check contents of config.yaml WRT config.defaults.yaml
-        if config == "config.yaml":
-            with open(path / config.replace(".yaml", ".defaults.yaml")) as config_yaml:
-                config_defaults = yaml.load(config_yaml, Loader=yaml.FullLoader)
-            with open(path / config) as config_yaml:
-                config_wildcard = yaml.load(config_yaml, Loader=yaml.FullLoader)
-            deep_diff = DeepDiff(config_defaults, config_wildcard, ignore_order=True)
-            difference = {
-                k: v for k, v in deep_diff.items() if k in ("dictionary_item_removed",)
-            }
-            if len(difference) > 0:
-                print("config.yaml structure differs from config.defaults.yaml")
-                pprint(difference)
-                raise KeyError("Fix config.yaml before proceeding")
-
-
 class Scope:
     def __init__(self):
-        # check configuration
-        with status("Checking configuration"):
-            check_configs(config_wildcards=["config.*yaml"])
-
-            self.base_path = pathlib.Path(__file__).parent.absolute()
-
-            self.config = load_config(self.base_path / "config.yaml")
+        # load configuration
+        with status("Loading configuration"):
+            self.base_path = pathlib.Path.cwd()
+            self.config = parse_load_config()
 
             self.default_path_dataset = (
-                self.base_path / self.config['training']['dataset']
+                self.base_path / self.config["training"]["dataset"]
             )
 
             # use tokens specified as env vars (if exist)
@@ -113,15 +68,15 @@ def __init__(self):
 
             hosts = [
                 x
-                for x in self.config['kowalski']['hosts']
-                if self.config['kowalski']['hosts'][x]['token'] is not None
+                for x in self.config["kowalski"]["hosts"]
+                if self.config["kowalski"]["hosts"][x]["token"] is not None
             ]
             instances = {
                 host: {
-                    'protocol': self.config['kowalski']['protocol'],
-                    'port': self.config['kowalski']['port'],
-                    'host': f'{host}.caltech.edu',
-                    'token': self.config['kowalski']['hosts'][host]['token'],
+                    "protocol": self.config["kowalski"]["protocol"],
+                    "port": self.config["kowalski"]["port"],
+                    "host": f"{host}.caltech.edu",
+                    "token": self.config["kowalski"]["hosts"][host]["token"],
                 }
                 for host in hosts
             }
@@ -158,8 +113,8 @@ def _get_features(
         if catalog is None:
             catalog = self.config["kowalski"]["collections"]["features"]
 
-        period_colname = 'period'
-        if not ((period_suffix is None) | (period_suffix == 'None')):
+        period_colname = "period"
+        if not ((period_suffix is None) | (period_suffix == "None")):
             period_colname = f"{period_colname}_{period_suffix}"
 
         features_dct = {}
@@ -186,7 +141,7 @@ def _get_features(
             if len(responses[name]) > 0:
                 response = responses[name]
                 if response.get("status", "error") == "success":
-                    features_response = response.get('data').get(catalog)
+                    features_response = response.get("data").get(catalog)
                     features_dct.update(features_response)
         features_nearest = [v[0] for k, v in features_response.items() if len(v) > 0]
         df = pd.DataFrame.from_records(features_nearest)
@@ -246,7 +201,7 @@ def _get_nearest_gaia(
             if len(responses[name]) > 0:
                 response = responses[name]
                 if response.get("status", "error") == "success":
-                    gaia_response = response.get('data').get(catalog)
+                    gaia_response = response.get("data").get(catalog)
                     gaia_dct.update(gaia_response)
         gaia_nearest = [v[0] for k, v in gaia_dct.items() if len(v) > 0]
         df = pd.DataFrame.from_records(gaia_nearest)
@@ -320,7 +275,7 @@ def _get_light_curve_data(
             if len(responses[name]) > 0:
                 response = responses[name]
                 if response.get("status", "error") == "success":
-                    lcs = response.get('data').get(catalog).get('target')
+                    lcs = response.get("data").get(catalog).get("target")
                     light_curves_raw += lcs
 
         light_curves = []
@@ -368,7 +323,7 @@ def lint(cls):
     def doc(self):
         """Build docs"""
 
-        from scope.utils import (
+        from .utils import (
             make_tdtax_taxonomy,
             plot_gaia_density,
             plot_gaia_hr,
@@ -376,11 +331,11 @@ def doc(self):
             plot_periods,
         )
 
-        period_suffix_config = self.config['features']['info']['period_suffix']
+        period_suffix_config = self.config["features"]["info"]["period_suffix"]
 
         # generate taxonomy.html
         with status("Generating taxonomy visualization"):
-            path_static = pathlib.Path(__file__).parent.absolute() / "doc" / "_static"
+            path_static = self.base_path / "doc" / "_static"
             if not path_static.exists():
                 path_static.mkdir(parents=True, exist_ok=True)
             tdtax.write_viz(
@@ -410,10 +365,10 @@ def doc(self):
 
         # example periods
         with status("Generating example period histograms"):
-            path_doc_data = pathlib.Path(__file__).parent.absolute() / "doc" / "data"
+            path_doc_data = self.base_path / "doc" / "data"
 
             # stored as ra/decs in csv format under /data/golden
-            golden_sets = pathlib.Path(__file__).parent.absolute() / "data" / "golden"
+            golden_sets = self.base_path / "data" / "golden"
             for golden_set in golden_sets.glob("*.csv"):
                 golden_set_name = golden_set.stem
                 positions = pd.read_csv(golden_set).to_numpy().tolist()
@@ -438,15 +393,11 @@ def doc(self):
 
         # example skymaps for all Golden sets
         with status("Generating skymaps diagrams for Golden sets"):
-            path_doc_data = pathlib.Path(__file__).parent.absolute() / "doc" / "data"
+            path_doc_data = self.base_path / "doc" / "data"
 
-            path_gaia_density = (
-                pathlib.Path(__file__).parent.absolute()
-                / "data"
-                / "Gaia_hp8_densitymap.fits"
-            )
+            path_gaia_density = self.base_path / "data" / "Gaia_hp8_densitymap.fits"
             # stored as ra/decs in csv format under /data/golden
-            golden_sets = pathlib.Path(__file__).parent.absolute() / "data" / "golden"
+            golden_sets = self.base_path / "data" / "golden"
             for golden_set in golden_sets.glob("*.csv"):
                 golden_set_name = golden_set.stem
                 positions = pd.read_csv(golden_set).to_numpy().tolist()
@@ -459,7 +410,7 @@ def doc(self):
 
         # example light curves
         with status("Generating example light curves"):
-            path_doc_data = pathlib.Path(__file__).parent.absolute() / "doc" / "data"
+            path_doc_data = self.base_path / "doc" / "data"
 
             for sample_object_name, sample_object in self.config["docs"][
                 "field_guide"
@@ -479,13 +430,10 @@ def doc(self):
         # example HR diagrams for all Golden sets
         with status("Generating HR diagrams for Golden sets"):
             path_gaia_hr_histogram = (
-                pathlib.Path(__file__).parent.absolute()
-                / "doc"
-                / "data"
-                / "gaia_hr_histogram.dat"
+                self.base_path / "doc" / "data" / "gaia_hr_histogram.dat"
             )
             # stored as ra/decs in csv format under /data/golden
-            golden_sets = pathlib.Path(__file__).parent.absolute() / "data" / "golden"
+            golden_sets = self.base_path / "data" / "golden"
             for golden_set in golden_sets.glob("*.csv"):
                 golden_set_name = golden_set.stem
                 positions = pd.read_csv(golden_set).to_numpy().tolist()
@@ -503,11 +451,11 @@ def doc(self):
     @staticmethod
     def fetch_models(gcs_path: str = "gs://ztf-scope/models"):
         """
-        Fetch SCoPe models from GCP
+        (deprecated) Fetch SCoPe models from GCP
 
         :return:
         """
-        path_models = pathlib.Path(__file__).parent / "models"
+        path_models = pathlib.Path.cwd() / "models"
         if not path_models.exists():
             path_models.mkdir(parents=True, exist_ok=True)
 
@@ -527,11 +475,11 @@ def fetch_models(gcs_path: str = "gs://ztf-scope/models"):
     @staticmethod
     def fetch_datasets(gcs_path: str = "gs://ztf-scope/datasets"):
         """
-        Fetch SCoPe datasets from GCP
+        (deprecated) Fetch SCoPe datasets from GCP
 
         :return:
         """
-        path_datasets = pathlib.Path(__file__).parent / "data" / "training"
+        path_datasets = pathlib.Path.cwd() / "data" / "training"
         if not path_datasets.exists():
             path_datasets.mkdir(parents=True, exist_ok=True)
 
@@ -548,26 +496,310 @@ def fetch_datasets(gcs_path: str = "gs://ztf-scope/datasets"):
         if p.returncode != 0:
             raise RuntimeError("Failed to fetch SCoPe datasets")
 
+    def parse_run_train(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--tag",
+            type=str,
+            help="classifier designation, refers to 'class' in config.taxonomy",
+        )
+        parser.add_argument(
+            "--path-dataset",
+            type=str,
+            help="local path to .parquet, .h5 or .csv file with the dataset",
+        )
+        parser.add_argument(
+            "--algorithm",
+            type=str,
+            default="dnn",
+            help="name of ML algorithm to use",
+        )
+        parser.add_argument(
+            "--gpu",
+            type=int,
+            help="GPU id to use, zero-based. check tf.config.list_physical_devices('GPU') for available devices",
+        )
+        parser.add_argument(
+            "--verbose",
+            action="store_true",
+            help="if set, print verbose output",
+        )
+        parser.add_argument(
+            "--job-type",
+            type=str,
+            default="train",
+            help="name of job type for WandB",
+        )
+        parser.add_argument(
+            "--group",
+            type=str,
+            default="experiment",
+            help="name of group for WandB",
+        )
+        parser.add_argument(
+            "--run-sweeps",
+            action="store_true",
+            help="if set, run WandB sweeps instead of training",
+        )
+        parser.add_argument(
+            "--period-suffix",
+            type=str,
+            help="suffix of period/Fourier features to use for training",
+        )
+        parser.add_argument(
+            "--threshold",
+            type=float,
+            help="classification threshold separating positive from negative examples",
+        )
+        parser.add_argument(
+            "--balance",
+            type=float,
+            default=-1,
+            help="factor by which to weight majority vs. minority examples",
+        )
+        parser.add_argument(
+            "--weight-per-class",
+            action="store_true",
+            help="if set, weight training data based on fraction of positive/negative samples",
+        )
+        parser.add_argument(
+            "--scale-features",
+            type=str,
+            help="method by which to scale input features (min_max or median_std)",
+        )
+        parser.add_argument(
+            "--test-size",
+            type=float,
+            help="fractional size of test set, taken from initial learning set",
+        )
+        parser.add_argument(
+            "--val-size",
+            type=float,
+            help="fractional size of val set, taken from initial learning set less test set",
+        )
+        parser.add_argument(
+            "--random-state",
+            type=int,
+            help="random seed to set for reproducibility",
+        )
+        parser.add_argument(
+            "--feature-stats",
+            type=str,
+            help="feature stats to use to standardize features. If set to 'config', source feature stats from values in config file. Otherwise, compute them from data, taking balance into account",
+        )
+        parser.add_argument(
+            "--batch-size",
+            type=int,
+            help="batch size to use for training",
+        )
+        parser.add_argument(
+            "--shuffle-buffer-size",
+            type=int,
+            help="buffer size to use when shuffling training set",
+        )
+        parser.add_argument(
+            "--epochs",
+            type=int,
+            help="number of training epochs",
+        )
+        parser.add_argument(
+            "--float-convert-types",
+            type=int,
+            nargs=2,
+            help="convert floats from a to b bits (e.g. 64 32)",
+        )
+        parser.add_argument(
+            "--lr",
+            type=float,
+            help="dnn learning rate",
+        )
+        parser.add_argument(
+            "--beta-1",
+            type=float,
+            help="dnn beta_1",
+        )
+        parser.add_argument(
+            "--beta-2",
+            type=float,
+            help="dnn beta_2",
+        )
+        parser.add_argument(
+            "--epsilon",
+            type=float,
+            help="dnn epsilon",
+        )
+        parser.add_argument(
+            "--decay",
+            type=float,
+            help="dnn decay",
+        )
+        parser.add_argument(
+            "--momentum",
+            type=float,
+            help="dnn momentum",
+        )
+        parser.add_argument(
+            "--monitor",
+            type=float,
+            help="dnn monitor quantity",
+        )
+        parser.add_argument(
+            "--patience",
+            type=int,
+            help="dnn patience (in epochs)",
+        )
+        parser.add_argument(
+            "--callbacks",
+            type=str,
+            nargs="+",
+            help="dnn callbacks",
+        )
+        parser.add_argument(
+            "--run-eagerly",
+            action="store_true",
+            help="dnn run_eagerly",
+        )
+        parser.add_argument(
+            "--pre-trained-model",
+            type=str,
+            help="name of dnn pre-trained model to load, if any",
+        )
+        parser.add_argument(
+            "--save",
+            action="store_true",
+            help="if set, save trained model",
+        )
+        parser.add_argument(
+            "--plot",
+            action="store_true",
+            help="if set, generate/save diagnostic training plots",
+        )
+        parser.add_argument(
+            "--weights-only",
+            action="store_true",
+            help="if set and pre-trained model specified, load only weights",
+        )
+        parser.add_argument(
+            "--skip-cv",
+            action="store_true",
+            help="if set, skip XGB cross-validation",
+        )
+
+        args, _ = parser.parse_known_args()
+        self.train(**vars(args))
+
+    # args to add for ds.make (override config-specified values)
+    # threshold
+    # balance
+    # weight_per_class (test this to make sure it works as intended)
+    # scale_features
+    # test_size
+    # val_size
+    # random_state
+    # feature_stats
+    # batch_size
+    # shuffle_buffer_size
+    # epochs
+    # float_convert_types
+
+    # Args to add with descriptions (or references to tf docs)
+    # lr
+    # beta_1
+    # beta_2
+    # epsilon
+    # decay
+    # amsgrad
+    # momentum
+    # monitor
+    # patience
+    # callbacks
+    # run_eagerly
+    # pre_trained_model
+    # save
+    # plot
+    # weights_only
+
     def train(
         self,
         tag: str,
         path_dataset: Union[str, pathlib.Path] = None,
-        algorithm: str = 'DNN',
+        algorithm: str = "dnn",
         gpu: Optional[int] = None,
         verbose: bool = False,
-        job_type: str = 'train',
-        group: str = 'experiment',
+        job_type: str = "train",
+        group: str = "experiment",
         run_sweeps: bool = False,
+        period_suffix: str = None,
+        threshold: float = 0.7,
+        balance: Union[float, str] = -1,
+        weight_per_class=False,
+        scale_features: str = "min_max",
+        test_size: float = 0.1,
+        val_size: float = 0.1,
+        random_state: int = 42,
+        feature_stats: str = None,
+        batch_size: int = 64,
+        shuffle_buffer_size: int = 512,
+        epochs: int = 100,
+        float_convert_types: list = [64, 32],
+        lr: float = 3e-4,
+        beta_1: float = 0.9,
+        beta_2: float = 0.999,
+        epsilon: float = 1e-7,
+        decay: float = 0.0,
+        amsgrad: float = 3e-4,
+        momentum: float = 0.9,
+        monitor: str = "val_loss",
+        patience: int = 20,
+        callbacks: list = ["reduce_lr_on_plateau", "early_stopping"],
+        run_eagerly: bool = False,
+        pre_trained_model: str = None,
+        save: bool = False,
+        plot: bool = False,
+        weights_only: bool = False,
+        skip_cv: bool = False,
         **kwargs,
     ):
         """Train classifier
 
-        :param tag: classifier designation, refers to "class" in config.taxonomy
-        :param path_dataset: local path to .parquet, .h5 or .csv file with the dataset
-        :param algorithm: name of ML algorithm to use
-        :param gpu: GPU id to use, zero-based. check tf.config.list_physical_devices('GPU') for available devices
-        :param verbose:
-        :param kwargs: refer to utils.DNN.setup and utils.Dataset.make
+        :param tag: classifier designation, refers to "class" in config.taxonomy (str)
+        :param path_dataset: local path to .parquet, .h5 or .csv file with the dataset (str)
+        :param algorithm: name of ML algorithm to use (str)
+        :param gpu: GPU id to use, zero-based. check tf.config.list_physical_devices('GPU') for available devices (int)
+        :param verbose: if set, print verbose output (bool)
+        :param job_type: name of job type for WandB (str)
+        :param group: name of group for WandB (str)
+        :param run_sweeps: if set, run WandB sweeps instead of training (bool)
+        :param period_suffix: suffix of period/Fourier features to use for training (str)
+        :param threshold: classification threshold separating positive from negative examples (float)
+        :param balance: factor by which to weight majority vs. minority examples (float or None)
+        :param weight_per_class: if set, weight training data based on fraction of positive/negative samples (bool)
+        :param scale_features: method by which to scale input features [min_max or median_std] (str)
+        :param test_size: fractional size of test set, taken from initial learning set (float)
+        :param val_size: fractional size of val set, taken from learning set less test set (float)
+        :param random_state: random seed to set for reproducibility (int)
+        :param feature_stats: feature stats to use to standardize features. If set to 'config', source feature stats from values in config file. Otherwise, compute them from data, taking balance into account (str)
+        :param batch_size: batch size to use for training (int)
+        :param shuffle_buffer_size: buffer size to use when shuffling training set (int)
+        :param epochs: number of training epochs (int)
+        :param float_convert_types: convert from a-bit to b-bit [e.g. 64 to 32] (list)
+        :param lr: dnn learning rate (float)
+        :param beta_1: dnn beta_1 (float)
+        :param beta_2: dnn beta_2 (float)
+        :param epsilon: dnn epsilon (float)
+        :param decay: dnn decay (float)
+        :param amsgrad: dnn amsgrad (float)
+        :param momentum: dnn momentum (float)
+        :param monitor: dnn monitor quantity (str)
+        :param patience: dnn patience [in epochs] (int)
+        :param callbacks: dnn callbacks (list)
+        :param run_eagerly: dnn run_eagerly (bool)
+        :param pre_trained_model: name of dnn pre-trained model to load, if any (str)
+        :param save: if set, save trained model (bool)
+        :param plot: if set, generate/save diagnostic training plots (bool)
+        :param weights_only: if set and pre-trained model specified, load only weights (bool)
+        :param skip_cv: if set, skip XGB cross-validation (bool)
+
         :return:
         """
 
@@ -584,36 +816,36 @@ def train(
         import wandb
         from wandb.keras import WandbCallback
 
-        from scope.nn import DNN
-        from scope.xgb import XGB
-        from scope.utils import Dataset
+        from .nn import DNN
+        from .xgb import XGB
+        from .utils import Dataset
 
         if path_dataset is None:
             path_dataset = self.default_path_dataset
 
-        label_params = self.config["training"]["classes"][tag]
-        train_config_xgb = self.config["training"]['xgboost']
+        config_params = self.config["training"]["classes"][tag]
+        train_config_dnn = self.config["training"]["dnn"]
+        train_config_xgb = self.config["training"]["xgboost"]
 
-        period_suffix = kwargs.get(
-            'period_suffix', self.config['features']['info']['period_suffix']
-        )
+        if period_suffix is None:
+            period_suffix = self.config["features"]["info"]["period_suffix"]
 
-        if algorithm in ['DNN', 'NN', 'dnn', 'nn']:
-            algorithm = 'dnn'
-        elif algorithm in ['XGB', 'xgb', 'XGBoost', 'xgboost', 'XGBOOST']:
-            algorithm = 'xgb'
+        if algorithm in ["DNN", "NN", "dnn", "nn"]:
+            algorithm = "dnn"
+        elif algorithm in ["XGB", "xgb", "XGBoost", "xgboost", "XGBOOST"]:
+            algorithm = "xgb"
         else:
-            raise ValueError('Current supported algorithms are DNN and XGB.')
+            raise ValueError("Current supported algorithms are DNN and XGB.")
 
-        all_features = self.config["features"][label_params["features"]]
+        all_features = self.config["features"][config_params["features"]]
         features = [
             key for key in all_features if forgiving_true(all_features[key]["include"])
         ]
-        if not ((period_suffix is None) | (period_suffix == 'None')):
-            periodic_bool = [all_features[x]['periodic'] for x in features]
+        if not ((period_suffix is None) | (period_suffix == "None")):
+            periodic_bool = [all_features[x]["periodic"] for x in features]
             for j, name in enumerate(features):
                 if periodic_bool[j]:
-                    features[j] = f'{name}_{period_suffix}'
+                    features[j] = f"{name}_{period_suffix}"
 
         ds = Dataset(
             tag=tag,
@@ -621,32 +853,36 @@ def train(
             features=features,
             verbose=verbose,
             algorithm=algorithm,
-            **kwargs,
-        )
-
-        label = label_params["label"]
-
-        # values from kwargs override those defined in config. if latter is absent, use reasonable default
-        threshold = kwargs.get("threshold", label_params.get("threshold", 0.5))
-        balance = kwargs.get("balance", label_params.get("balance", None))
-        weight_per_class = kwargs.get(
-            "weight_per_class", label_params.get("weight_per_class", False)
+            period_suffix=period_suffix,
         )
-        scale_features = kwargs.get("scale_features", "min_max")
 
-        test_size = kwargs.get("test_size", label_params.get("test_size", 0.1))
-        val_size = kwargs.get("val_size", label_params.get("val_size", 0.1))
-        random_state = kwargs.get("random_state", label_params.get("random_state", 42))
-        feature_stats = kwargs.get("feature_stats", None)
-        if feature_stats == 'config':
+        label = config_params["label"]
+
+        # values from argparse args override those defined in config. if latter is absent, use reasonable default
+        if threshold is None:
+            threshold = config_params.get("threshold", 0.7)
+        if balance == -1:
+            balance = config_params.get("balance", None)
+        if not weight_per_class:
+            weight_per_class = config_params.get("weight_per_class", False)
+        if scale_features is None:
+            scale_features = config_params.get("scale_features", "min_max")
+        if test_size is None:
+            test_size = config_params.get("test_size", 0.1)
+        if val_size is None:
+            val_size = config_params.get("val_size", 0.1)
+        if random_state is None:
+            random_state = config_params.get("random_state", 42)
+        if feature_stats == "config":
             feature_stats = self.config.get("feature_stats", None)
-
-        batch_size = kwargs.get("batch_size", label_params.get("batch_size", 64))
-        shuffle_buffer_size = kwargs.get(
-            "shuffle_buffer_size", label_params.get("shuffle_buffer_size", 512)
-        )
-        epochs = kwargs.get("epochs", label_params.get("epochs", 100))
-        float_convert_types = kwargs.get("float_convert_types", (64, 32))
+        if batch_size is None:
+            batch_size = config_params.get("batch_size", 64)
+        if shuffle_buffer_size is None:
+            shuffle_buffer_size = config_params.get("shuffle_buffer_size", 512)
+        if epochs is None:
+            epochs = config_params.get("epochs", 100)
+        if float_convert_types is None:
+            float_convert_types = config_params.get("float_convert_types", [64, 32])
 
         datasets, indexes, steps_per_epoch, class_weight = ds.make(
             target_label=label,
@@ -664,32 +900,53 @@ def train(
             float_convert_types=float_convert_types,
         )
 
-        # Define default hyperparameters for model
-        dense_branch = kwargs.get("dense_branch", True)
-        conv_branch = kwargs.get("conv_branch", True)
-        loss = kwargs.get("loss", "binary_crossentropy")
-        optimizer = kwargs.get("optimizer", "adam")
-        lr = float(kwargs.get("lr", 3e-4))
-        beta_1 = kwargs.get("beta_1", 0.9)
-        beta_2 = kwargs.get("beta_2", 0.999)
-        epsilon = kwargs.get("epsilon", 1e-7)  # None?
-        decay = kwargs.get("decay", 0.0)
-        amsgrad = kwargs.get("amsgrad", 3e-4)
-        momentum = float(kwargs.get("momentum", 0.9))
-        monitor = kwargs.get("monitor", "val_loss")
-        patience = int(kwargs.get("patience", 20))
-        callbacks = kwargs.get("callbacks", ("reduce_lr_on_plateau", "early_stopping"))
-        run_eagerly = kwargs.get("run_eagerly", False)
-        pre_trained_model = kwargs.get("pre_trained_model")
-        save = kwargs.get("save", False)
-        plot = kwargs.get("plot", False)
-        weights_only = kwargs.get("weights_only", False)
-        skip_cv = kwargs.get("skip_cv", False)
+        if lr is None:
+            lr = float(config_params.get("lr", 3e-4))
+        if beta_1 is None:
+            beta_1 = float(config_params.get("beta_1", 0.9))
+        if beta_2 is None:
+            beta_2 = float(config_params.get("beta_2", 0.999))
+        if epsilon is None:
+            epsilon = float(config_params.get("epsilon", 1e-7))
+        if decay is None:
+            decay = float(config_params.get("decay", 0.0))
+        if amsgrad is None:
+            amsgrad = float(config_params.get("amsgrad", 3e-4))
+        if momentum is None:
+            momentum = float(config_params.get("momentum", 0.9))
+        if monitor is None:
+            monitor = config_params.get("monitor", "val_loss")
+        if patience is None:
+            patience = int(config_params.get("patience", 20))
+        if callbacks is None:
+            callbacks = tuple(
+                config_params.get(
+                    "callbacks", ["reduce_lr_on_plateau", "early_stopping"]
+                )
+            )
+        else:
+            callbacks = tuple(callbacks)
+        if not run_eagerly:
+            run_eagerly = config_params.get("run_eagerly", False)
+        if pre_trained_model is None:
+            pre_trained_model = config_params.get("pre_trained_model")
+        if not save:
+            save = config_params.get("save", False)
+        if not plot:
+            plot = config_params.get("plot", False)
+        if not weights_only:
+            weights_only = config_params.get("weights_only", False)
+
+        # Define default parameters for all DNN models
+        dense_branch = train_config_dnn.get("dense_branch", True)
+        conv_branch = train_config_dnn.get("conv_branch", True)
+        loss = train_config_dnn.get("loss", "binary_crossentropy")
+        optimizer = train_config_dnn.get("optimizer", "adam")
 
         # xgb-specific arguments (descriptions adapted from https://xgboost.readthedocs.io/en/stable/parameter.html and https://xgboost.readthedocs.io/en/stable/python/python_api.html)
         # max_depth: maximum depth of a tree
-        max_depth_config = train_config_xgb['gridsearch_params_start_stop_step'].get(
-            'max_depth', [3, 8, 2]
+        max_depth_config = train_config_xgb["gridsearch_params_start_stop_step"].get(
+            "max_depth", [3, 8, 2]
         )
         max_depth_start = max_depth_config[0]
         max_depth_stop = max_depth_config[1]
@@ -697,66 +954,68 @@ def train(
 
         # min_child_weight: minimum sum of instance weight (hessian) needed in a child
         min_child_weight_config = train_config_xgb[
-            'gridsearch_params_start_stop_step'
-        ].get('min_child_weight', [1, 6, 2])
+            "gridsearch_params_start_stop_step"
+        ].get("min_child_weight", [1, 6, 2])
         min_child_weight_start = min_child_weight_config[0]
         min_child_weight_stop = min_child_weight_config[1]
         min_child_weight_step = min_child_weight_config[2]
 
         # eta = kwargs.get("xgb_eta", 0.1)
-        eta_list = train_config_xgb['other_training_params'].get(
-            'eta_list', [0.3, 0.2, 0.1, 0.05]
+        eta_list = train_config_xgb["other_training_params"].get(
+            "eta_list", [0.3, 0.2, 0.1, 0.05]
         )
 
         # subsample: Subsample ratio of the training instances (setting to 0.5 means XGBoost would randomly sample half of the training data prior to growing trees)
-        # subsample = kwargs.get("xgb_subsample", 0.7)
-        subsample_config = train_config_xgb['gridsearch_params_start_stop_step'].get(
-            'subsample', [6, 11, 2]
+        subsample_config = train_config_xgb["gridsearch_params_start_stop_step"].get(
+            "subsample", [6, 11, 2]
         )
         subsample_start = subsample_config[0]
         subsample_stop = subsample_config[1]
         subsample_step = subsample_config[2]
 
         # colsample_bytree: subsample ratio of columns when constructing each tree.
-        # colsample_bytree = kwargs.get("xgb_colsample_bytree", 0.7)
         colsample_bytree_config = train_config_xgb[
-            'gridsearch_params_start_stop_step'
-        ].get('subsample', [6, 11, 2])
+            "gridsearch_params_start_stop_step"
+        ].get("subsample", [6, 11, 2])
         colsample_bytree_start = colsample_bytree_config[0]
         colsample_bytree_stop = colsample_bytree_config[1]
         colsample_bytree_step = colsample_bytree_config[2]
 
         # confusion matrix plotting parameters:
-        cm_include_count = kwargs.get("cm_include_count", False)
-        cm_include_percent = kwargs.get("cm_include_percent", True)
-        annotate_scores = kwargs.get("annotate_scores", False)
+        cm_include_count = train_config_xgb["plot_params"].get(
+            "cm_include_count", False
+        )
+        cm_include_percent = train_config_xgb["plot_params"].get(
+            "cm_include_percent", True
+        )
+        annotate_scores = train_config_xgb["plot_params"].get("annotate_scores", False)
 
         # seed: random seed
-        seed = train_config_xgb['other_training_params'].get('seed', 42)
+        seed = random_state
 
         # nfold: number of folds during cross-validation
-        nfold = train_config_xgb['other_training_params'].get('nfold', 5)
+        nfold = train_config_xgb["other_training_params"].get("nfold", 5)
 
         # metrics: evaluation metrics to use during cross-validation
-        metrics = train_config_xgb['other_training_params'].get('metrics', ['auc'])
+        metrics = train_config_xgb["other_training_params"].get("metrics", ["auc"])
 
         # objective: name of learning objective
-        objective = train_config_xgb['other_training_params'].get(
+        objective = train_config_xgb["other_training_params"].get(
             "objective", "binary:logistic"
         )
 
         # eval_metric: Evaluation metrics for validation data
-        eval_metric = train_config_xgb['other_training_params'].get(
+        eval_metric = train_config_xgb["other_training_params"].get(
             "eval_metric", "auc"
         )
 
         # early_stopping_rounds: Validation metric needs to improve at least once in every early_stopping_rounds round(s) to continue training
-        early_stopping_rounds = train_config_xgb['other_training_params'].get(
+        early_stopping_rounds = train_config_xgb["other_training_params"].get(
             "early_stopping_rounds", 10
         )
 
         # num_boost_round: Number of boosting iterations
-        num_boost_round = train_config_xgb['other_training_params'].get(
+        num_boost_round = train_config_xgb["other_training_params"].get(
             "num_boost_round", 999
         )
 
@@ -765,14 +1024,16 @@ def train(
         conv_branch = forgiving_true(conv_branch)
         run_eagerly = forgiving_true(run_eagerly)
         save = forgiving_true(save)
+        plot = forgiving_true(plot)
+        cm_include_count = forgiving_true(cm_include_count)
+        cm_include_percent = forgiving_true(cm_include_percent)
+        annotate_scores = forgiving_true(annotate_scores)
 
         time_tag = datetime.datetime.utcnow().strftime("%Y%m%d_%H%M%S")
 
-        output_path = (
-            pathlib.Path(__file__).parent.absolute() / f"models_{algorithm}" / group
-        )
+        output_path = self.base_path / f"models_{algorithm}" / group
 
-        if algorithm == 'dnn':
+        if algorithm == "dnn":
 
             classifier = DNN(name=tag)
 
@@ -786,17 +1047,17 @@ def train(
                     steps_per_epoch_val=steps_per_epoch["val"],
                     train_dataset=datasets["train"],
                     val_dataset=datasets["val"],
-                    wandb_token=self.config['wandb']['token'],
+                    wandb_token=self.config["wandb"]["token"],
                 )
 
                 wandb.login(key=self.config["wandb"]["token"])
 
                 # Define sweep config
-                sweep_configuration = self.config['wandb']['sweep_config_dnn']
-                sweep_configuration['name'] = f"{group}-{tag}-{time_tag}"
+                sweep_configuration = self.config["wandb"]["sweep_config_dnn"]
+                sweep_configuration["name"] = f"{group}-{tag}-{time_tag}"
 
-                entity = self.config['wandb']['entity']
-                project = self.config['wandb']['project']
+                entity = self.config["wandb"]["entity"]
+                project = self.config["wandb"]["project"]
 
                 # Set up sweep/id
                 sweep_id = wandb.sweep(
@@ -808,27 +1069,27 @@ def train(
                 wandb.agent(sweep_id, function=classifier.sweep)
 
                 print(
-                    'Sweep complete. Adjust hyperparameters in config file and run scope.py train again without the --run_sweeps flag.'
+                    "Sweep complete. Adjust hyperparameters in config file and run scope-train again without the --run-sweeps flag."
                 )
 
                 # Stop sweep job
                 try:
-                    print('Stopping sweep.')
+                    print("Stopping sweep.")
                     os.system(
-                        f'python -m wandb sweep --stop {entity}/{project}/{sweep_id}'
+                        f"python -m wandb sweep --stop {entity}/{project}/{sweep_id}"
                     )
                 except Exception:
-                    print('Sweep already stopped.')
+                    print("Sweep already stopped.")
 
                 return
 
             if pre_trained_model is not None:
                 classifier.load(pre_trained_model, weights_only=weights_only)
                 model_input = classifier.model.input
-                training_set_inputs = datasets['train'].element_spec[0]
+                training_set_inputs = datasets["train"].element_spec[0]
                 # Compare input shapes with model inputs
                 print(
-                    'Comparing shapes of input features with inputs for existing model...'
+                    "Comparing shapes of input features with inputs for existing model..."
                 )
                 for inpt in model_input:
                     inpt_name = inpt.name
@@ -836,7 +1097,7 @@ def train(
                     inpt_shape.assert_is_compatible_with(
                         training_set_inputs[inpt_name].shape
                     )
-                print('Input shapes are consistent.')
+                print("Input shapes are consistent.")
                 classifier.set_callbacks(callbacks, tag, **kwargs)
 
             else:
@@ -913,17 +1174,17 @@ def train(
                     verbose=verbose,
                 )
 
-        elif algorithm == 'xgb':
+        elif algorithm == "xgb":
 
             # XGB-specific code
-            X_train = ds.df_ds.loc[indexes['train']][features]
-            y_train = ds.target[indexes['train']]
+            X_train = ds.df_ds.loc[indexes["train"]][features]
+            y_train = ds.target[indexes["train"]]
 
-            X_val = ds.df_ds.loc[indexes['val']][features]
-            y_val = ds.target[indexes['val']]
+            X_val = ds.df_ds.loc[indexes["val"]][features]
+            y_val = ds.target[indexes["val"]]
 
-            X_test = ds.df_ds.loc[indexes['test']][features]
-            y_test = ds.target[indexes['test']]
+            X_test = ds.df_ds.loc[indexes["test"]][features]
+            y_test = ds.target[indexes["test"]]
 
             scale_pos_weight = class_weight[1] / class_weight[0]
 
@@ -968,27 +1229,27 @@ def train(
         if verbose:
             print("Evaluating on train/val/test sets:")
         # TODO: there should not need to be this algorithm-based split in the call to classifier.evaluate()
-        if algorithm == 'xgb':
-            stats_train = classifier.evaluate(X_train, y_train, name='train')
-            stats_val = classifier.evaluate(X_val, y_val, name='val')
-            stats_test = classifier.evaluate(X_test, y_test, name='test')
+        if algorithm == "xgb":
+            stats_train = classifier.evaluate(X_train, y_train, name="train")
+            stats_val = classifier.evaluate(X_val, y_val, name="val")
+            stats_test = classifier.evaluate(X_test, y_test, name="test")
         else:
             stats_train = classifier.evaluate(
-                datasets["train"], name='train', verbose=verbose
+                datasets["train"], name="train", verbose=verbose
             )
             stats_val = classifier.evaluate(
-                datasets["val"], name='val', verbose=verbose
+                datasets["val"], name="val", verbose=verbose
             )
             stats_test = classifier.evaluate(
-                datasets["test"], name='test', verbose=verbose
+                datasets["test"], name="test", verbose=verbose
             )
 
-        print('training stats: ', stats_train)
-        print('validation stats: ', stats_val)
+        print("training stats: ", stats_train)
+        print("validation stats: ", stats_val)
         if verbose:
-            print('test stats: ', stats_test)
+            print("test stats: ", stats_test)
 
-        if algorithm == 'DNN':
+        if algorithm == "DNN":
             param_names = (
                 "loss",
                 "tp",
@@ -1043,16 +1304,66 @@ def train(
 
             return time_tag
 
+    def parse_run_create_training_script(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--filename",
+            type=str,
+            default="train_script.sh",
+            help="filename of shell script (must not currently exist)",
+        )
+        parser.add_argument(
+            "--algorithm",
+            type=str,
+            default="dnn",
+            help="name of algorithm to use for training",
+        )
+        parser.add_argument(
+            "--min-count",
+            type=int,
+            default=100,
+            help="minimum number of positive examples to include in script",
+        )
+        parser.add_argument(
+            "--path-dataset",
+            type=str,
+            help="local path to .parquet, .h5 or .csv file with the dataset, if not provided in config.yaml",
+        )
+        parser.add_argument(
+            "--pre-trained-group-name",
+            type=str,
+            help="name of group containing pre-trained models within models directory",
+        )
+        parser.add_argument(
+            "--add-keywords",
+            type=str,
+            default="",
+            help="str containing additional training keywords to append to each line in the script",
+        )
+        parser.add_argument(
+            "--train-all",
+            action="store_true",
+            help="if group_name is specified, set this keyword to train all classes regardeless of whether a trained model exists",
+        )
+        parser.add_argument(
+            "--period-suffix",
+            type=str,
+            help="suffix of period/Fourier features to use for training",
+        )
+
+        args, _ = parser.parse_known_args()
+        self.create_training_script(**vars(args))
+
     def create_training_script(
         self,
-        filename: str = 'train_script.sh',
-        algorithm: str = 'dnn',
+        filename: str = "train_script.sh",
+        algorithm: str = "dnn",
         min_count: int = 100,
         path_dataset: str = None,
         pre_trained_group_name: str = None,
-        add_keywords: str = '',
+        add_keywords: str = "",
         train_all: bool = False,
-        **kwargs,
+        period_suffix: str = None,
     ):
         """
         Create training shell script from classes in config file meeting minimum count requirement
@@ -1064,63 +1375,61 @@ def create_training_script(
         :param pre_trained_group_name: name of group containing pre-trained models within models directory (str)
         :param add_keywords: str containing additional training keywords to append to each line in the script
         :param train_all: if group_name is specified, set this keyword to train all classes regardeless of whether a trained model exists (bool)
+        :param period_suffix: suffix of period/Fourier features to use for training (str)
 
         :return:
 
-        :examples:  ./scope.py create_training_script --filename='train_dnn.sh' --algorithm='dnn' --min_count=1000 \
-                    --path_dataset='tools/fritzDownload/merged_classifications_features.parquet' --add_keywords='--save --plot --group=groupname'
+        :examples:  create-training-script --filename train_dnn.sh --algorithm dnn --min-count 1000 \
+                    --path-dataset tools/fritzDownload/merged_classifications_features.parquet --add-keywords '--save --plot --group groupname'
 
-                    ./scope.py create_training_script --filename='train_xgb.sh' --algorithm='xgb' --min_count=100 \
-                    --add_keywords='--save --plot --batch_size=32 --group=groupname'
+                    create-training-script --filename train_xgb.sh --algorithm xgb --min-count 100 \
+                    --add-keywords '--save --plot --batch-size 32 --group groupname'
         """
         path = str(self.base_path / filename)
 
         phenom_tags = []
         ontol_tags = []
 
-        period_suffix = kwargs.get(
-            'period_suffix', self.config['features']['info']['period_suffix']
-        )
+        if period_suffix is None:
+            period_suffix = self.config["features"]["info"]["period_suffix"]
 
         if path_dataset is None:
-            dataset_name = self.config['training']['dataset']
+            dataset_name = self.config["training"]["dataset"]
             path_dataset = str(self.base_path / dataset_name)
 
-        if path_dataset.endswith('.parquet'):
+        if path_dataset.endswith(".parquet"):
             dataset = read_parquet(path_dataset)
-        elif path_dataset.endswith('.h5'):
+        elif path_dataset.endswith(".h5"):
             dataset = read_hdf(path_dataset)
-        elif path_dataset.endswith('.csv'):
+        elif path_dataset.endswith(".csv"):
             dataset = pd.read_csv(path_dataset)
         else:
             raise ValueError(
-                'Dataset in config file must end with .parquet, .h5 or .csv'
+                "Dataset in config file must end with .parquet, .h5 or .csv"
             )
 
-        with open(path, 'x') as script:
+        with open(path, "x") as script:
 
-            script.write('#!/bin/bash\n')
+            script.write("#!/bin/bash\n")
 
-            for tag in self.config['training']['classes'].keys():
-                label = self.config['training']['classes'][tag]['label']
-                threshold = self.config['training']['classes'][tag]['threshold']
-                branch = self.config['training']['classes'][tag]['features']
+            for tag in self.config["training"]["classes"].keys():
+                label = self.config["training"]["classes"][tag]["label"]
+                threshold = self.config["training"]["classes"][tag]["threshold"]
+                branch = self.config["training"]["classes"][tag]["features"]
                 num_pos = np.sum(dataset[label] > threshold)
 
                 if num_pos > min_count:
                     print(
-                        f'Label {label}: {num_pos} positive examples with P > {threshold}'
+                        f"Label {label}: {num_pos} positive examples with P > {threshold}"
                     )
-                    if branch == 'phenomenological':
+                    if branch == "phenomenological":
                         phenom_tags += [tag]
                     else:
                         ontol_tags += [tag]
 
             if pre_trained_group_name is not None:
                 group_path = (
-                    pathlib.Path(__file__).parent.absolute()
-                    / f'models_{algorithm}'
-                    / pre_trained_group_name
+                    self.base_path / f"models_{algorithm}" / pre_trained_group_name
                 )
                 gen = os.walk(group_path)
                 model_tags = [tag[1] for tag in gen]
@@ -1134,112 +1443,228 @@ def create_training_script(
                     set.intersection(set(ontol_tags), set(model_tags))
                 )
 
-                script.write('# Phenomenological\n')
+                script.write("# Phenomenological\n")
                 for tag in phenom_tags:
                     if tag in phenom_hasmodel:
-                        tag_file_gen = (group_path / tag).glob('*.h5')
+                        tag_file_gen = (group_path / tag).glob("*.h5")
                         most_recent_file = max(
                             [file for file in tag_file_gen], key=os.path.getctime
                         ).name
 
                         script.writelines(
-                            f'./scope.py train --tag={tag} --algorithm={algorithm} --path_dataset={path_dataset} --pre_trained_model=models/{pre_trained_group_name}/{tag}/{most_recent_file} --period_suffix={period_suffix} --verbose {add_keywords} \n'
+                            f"scope-train --tag {tag} --algorithm {algorithm} --path_dataset {path_dataset} --pre_trained_model models/{pre_trained_group_name}/{tag}/{most_recent_file} --period_suffix {period_suffix} --verbose {add_keywords} \n"
                         )
 
                     elif train_all:
                         script.writelines(
-                            f'./scope.py train --tag={tag} --algorithm={algorithm} --path_dataset={path_dataset} --period_suffix={period_suffix} --verbose {add_keywords} \n'
+                            f"scope-train --tag {tag} --algorithm {algorithm} --path_dataset {path_dataset} --period_suffix {period_suffix} --verbose {add_keywords} \n"
                         )
 
-                script.write('# Ontological\n')
+                script.write("# Ontological\n")
                 for tag in ontol_tags:
                     if tag in ontol_hasmodel:
-                        tag_file_gen = (group_path / tag).glob('*.h5')
+                        tag_file_gen = (group_path / tag).glob("*.h5")
                         most_recent_file = max(
                             [file for file in tag_file_gen], key=os.path.getctime
                         ).name
 
                         script.writelines(
-                            f'./scope.py train --tag={tag} --algorithm={algorithm} --path_dataset={path_dataset} --pre_trained_model=models/{pre_trained_group_name}/{tag}/{most_recent_file} --period_suffix={period_suffix} --verbose {add_keywords} \n'
+                            f"scope-train --tag {tag} --algorithm {algorithm} --path_dataset {path_dataset} --pre_trained_model models/{pre_trained_group_name}/{tag}/{most_recent_file} --period_suffix {period_suffix} --verbose {add_keywords} \n"
                         )
 
                     elif train_all:
                         script.writelines(
-                            f'./scope.py train --tag={tag} --algorithm={algorithm} --path_dataset={path_dataset} --period_suffix={period_suffix} --verbose {add_keywords} \n'
+                            f"scope-train --tag {tag} --algorithm {algorithm} --path_dataset {path_dataset} --period_suffix {period_suffix} --verbose {add_keywords} \n"
                         )
 
             else:
-                script.write('# Phenomenological\n')
+                script.write("# Phenomenological\n")
                 script.writelines(
                     [
-                        f'./scope.py train --tag={tag} --algorithm={algorithm} --path_dataset={path_dataset} --period_suffix={period_suffix} --verbose {add_keywords} \n'
+                        f"scope-train --tag {tag} --algorithm {algorithm} --path_dataset {path_dataset} --period_suffix {period_suffix} --verbose {add_keywords} \n"
                         for tag in phenom_tags
                     ]
                 )
-                script.write('# Ontological\n')
+                script.write("# Ontological\n")
                 script.writelines(
                     [
-                        f'./scope.py train --tag={tag} --algorithm={algorithm} --path_dataset={path_dataset} --period_suffix={period_suffix} --verbose {add_keywords} \n'
+                        f"scope-train --tag {tag} --algorithm {algorithm} --path_dataset {path_dataset} --period_suffix {period_suffix} --verbose {add_keywords} \n"
                         for tag in ontol_tags
                     ]
                 )
+        print(f"Wrote traininig script to {path}.")
+
+    def parse_run_assemble_training_stats(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--group-name",
+            type=str,
+            default="experiment",
+            help="trained model group name",
+        )
+        parser.add_argument(
+            "--algorithm",
+            type=str,
+            default="dnn",
+            help="name of ML algorithm",
+        )
+        parser.add_argument(
+            "--set-name",
+            type=str,
+            default="val",
+            help="one of train, val or test",
+        )
+        parser.add_argument(
+            "--importance-directory",
+            type=str,
+            default="xgb_feature_importance",
+            help="name of directory to save XGB feature importance",
+        )
+        parser.add_argument(
+            "--stats-directory",
+            type=str,
+            default="stats",
+            help="name of directory to save training stats",
+        )
+
+        args, _ = parser.parse_known_args()
+        self.assemble_training_stats(**vars(args))
 
     def assemble_training_stats(
         self,
-        group_name: str = 'experiment',
-        algorithm: str = 'dnn',
-        set_name: str = 'val',
-        importance_directory: str = 'xgb_feature_importance',
-        stats_directory: str = 'stats',
+        group_name: str = "experiment",
+        algorithm: str = "dnn",
+        set_name: str = "val",
+        importance_directory: str = "xgb_feature_importance",
+        stats_directory: str = "stats",
     ):
+        """
+        Assemble training stats from individal class results
+
+        :param group_name: trained model group name (str)
+        :param algorithm: name of ML algorithm (str)
+        :param set_name: one of train, val or test (str)
+        :param importance_directory: name of directory to save XGB feature importance (str)
+        :param stats_directory: name of directory to save training stats (str)
+
+        :return:
+
+        :example: assemble-training-stats --group-name DR16 --algorithm xgb --set-name test \
+                  --importance-directory xgb_importance --stats-directory xgb_stats
+        """
         base_path = self.base_path
-        group_path = base_path / f'models_{algorithm}' / group_name
+        group_path = base_path / f"models_{algorithm}" / group_name
 
-        if algorithm in ['xgb', 'xgboost', 'XGB', 'XGBoost']:
+        if algorithm in ["xgb", "xgboost", "XGB", "XGBoost"]:
             importance_path = base_path / importance_directory
             importance_path.mkdir(exist_ok=True)
 
             # XGB feature importance
-            labels = [x for x in group_path.iterdir() if x.name != '.DS_Store']
+            labels = [x for x in group_path.iterdir() if x.name != ".DS_Store"]
             statpaths = []
             for label in labels:
                 statpaths.append(
-                    [x for x in label.glob(f'*plots/{set_name}/*impvars.json')][0]
+                    [x for x in label.glob(f"*plots/{set_name}/*impvars.json")][0]
                 )
 
             for statpath in statpaths:
                 strpath = str(statpath)
-                os.system(f'cp {strpath} {importance_path}/.')
+                os.system(f"cp {strpath} {importance_path}/.")
 
         # DNN/XGB stats
         stats_path = base_path / f"{algorithm}_{stats_directory}"
         stats_path.mkdir(exist_ok=True)
-        labels = [x for x in group_path.iterdir() if x.name != '.DS_Store']
+        labels = [x for x in group_path.iterdir() if x.name != ".DS_Store"]
         statpaths = []
         for label in labels:
             statpaths.append(
-                [x for x in label.glob(f'*plots/{set_name}/*stats.json')][0]
+                [x for x in label.glob(f"*plots/{set_name}/*stats.json")][0]
             )
 
         for statpath in statpaths:
             strpath = str(statpath)
-            os.system(f'cp {strpath} {stats_path}/.')
+            os.system(f"cp {strpath} {stats_path}/.")
+
+        print("Finished assembling stats.")
+
+    def parse_run_create_inference_script(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--filename",
+            type=str,
+            default="get_all_preds_dnn.sh",
+            help="filename of shell script (must not currently exist)",
+        )
+        parser.add_argument(
+            "--group-name",
+            type=str,
+            default="experiment",
+            help="name of group containing trained models within models directory",
+        )
+        parser.add_argument(
+            "--algorithm",
+            type=str,
+            default="dnn",
+            help="algorithm to use in script",
+        )
+        parser.add_argument(
+            "--scale-features",
+            type=str,
+            default="min_max",
+            help="method to scale features (currently 'min_max' or 'median_std')",
+        )
+        parser.add_argument(
+            "--feature-directory",
+            type=str,
+            default="features",
+            help="name of directory containing downloaded or generated features",
+        )
+        parser.add_argument(
+            "--write-csv",
+            action="store_true",
+            help="if set, write CSV file in addition to parquet",
+        )
+        parser.add_argument(
+            "--batch-size",
+            type=int,
+            default=100000,
+            help="batch size to use when reading feature files",
+        )
+        parser.add_argument(
+            "--use-custom-python",
+            action="store_true",
+            help="if True, the call to run-inference will be preceded by a specific path to python",
+        )
+        parser.add_argument(
+            "--path-to-python",
+            type=str,
+            default="~/miniforge3/envs/scope-env/bin/python",
+            help="if --use-custom-python is set (e.g. for a cron job), path to custom python installation",
+        )
+        parser.add_argument(
+            "--period-suffix",
+            type=str,
+            help="suffix of period/Fourier features to use for training",
+        )
+
+        args, _ = parser.parse_known_args()
+        self.create_inference_script(**vars(args))
 
     def create_inference_script(
         self,
-        filename: str = 'get_all_preds_dnn.sh',
-        group_name: str = 'experiment',
-        algorithm: str = 'dnn',
-        scale_features: str = 'min_max',
-        feature_directory: str = 'features',
+        filename: str = "get_all_preds_dnn.sh",
+        group_name: str = "experiment",
+        algorithm: str = "dnn",
+        scale_features: str = "min_max",
+        feature_directory: str = "features",
         write_csv: bool = False,
         batch_size: int = 100000,
         use_custom_python: bool = False,
-        path_to_python: str = '~/miniforge3/envs/scope-env/bin/python',
-        **kwargs,
+        path_to_python: str = "~/miniforge3/envs/scope-env/bin/python",
+        period_suffix: str = None,
     ):
         """
-        Create inference shell script
+        Save shell script to use when running inference
 
         :param filename: filename of shell script (must not currently exist) (str)
         :param group_name: name of group containing trained models within models directory (str)
@@ -1248,84 +1673,84 @@ def create_inference_script(
         :param feature_directory: name of directory containing downloaded or generated features (str)
         :param write_csv: if True, write CSV file in addition to parquet (bool)
         :param batch_size: batch size to use when reading feature files (int)
-        :param use_custom_python: if True, the call to inference.py will be preceded by a specific path to python (bool)
+        :param use_custom_python: if True, the call to run-inference will be preceded by a specific path to python (bool)
         :param path_to_python: if use_custom_python is set (e.g. for a cron job), path to custom python installation (str)
+        :param period_suffix: suffix of period/Fourier features to use for training (str)
 
         :return:
-        Saves shell script to use when running inference
 
-        :example:  ./scope.py create_inference_script --filename='get_all_preds_dnn.sh' --group_name='experiment' \
-                    --algorithm='dnn' --feature_directory='generated_features'
+        :example:  create-inference-script --filename get_all_preds_dnn.sh --group-name experiment \
+                    --algorithm dnn --feature-directory generated_features
         """
-
         base_path = self.base_path
         path = str(base_path / filename)
-        group_path = base_path / f'models_{algorithm}' / group_name
+        group_path = base_path / f"models_{algorithm}" / group_name
 
-        addtl_args = ''
+        addtl_args = ""
         if write_csv:
-            addtl_args += '--write_csv'
+            addtl_args += "--write-csv"
 
         gen = os.walk(group_path)
         model_tags = [tag[1] for tag in gen]
         model_tags = model_tags[0]
 
-        period_suffix = kwargs.get(
-            'period_suffix', self.config['features']['info']['period_suffix']
-        )
+        if period_suffix is None:
+            period_suffix = self.config["features"]["info"]["period_suffix"]
 
         if not use_custom_python:
-            path_to_python = ''
+            path_to_python = ""
 
-        with open(path, 'x') as script:
-            script.write('#!/bin/bash\n')
+        with open(path, "x") as script:
+            script.write("#!/bin/bash\n")
             script.write(
-                '# Call script followed by field number, e.g: ./get_all_preds_dnn.sh 301\n'
+                "# Call script followed by field number, e.g: ./get_all_preds_dnn.sh 301\n"
             )
 
-            paths_models_str = ''
-            model_class_names_str = ''
+            paths_models_str = ""
+            model_class_names_str = ""
 
-            if algorithm in ['dnn', 'DNN', 'nn', 'NN']:
-                algorithm = 'dnn'
+            if algorithm in ["dnn", "DNN", "nn", "NN"]:
+                algorithm = "dnn"
                 script.write('echo "dnn inference"\n')
                 # Select most recent model for each tag
                 for tag in model_tags:
-                    tag_file_gen = (group_path / tag).glob('*.h5')
+                    tag_file_gen = (group_path / tag).glob("*.h5")
                     most_recent_file = max(
                         [file for file in tag_file_gen], key=os.path.getctime
                     ).name
 
-                    paths_models_str += f'{str(base_path)}/models_{algorithm}/{group_name}/{tag}/{most_recent_file} '
-                    model_class_names_str += f'{tag} '
+                    paths_models_str += f"{str(base_path)}/models_{algorithm}/{group_name}/{tag}/{most_recent_file} "
+                    model_class_names_str += f"{tag} "
 
                 script.write(
-                    f'echo -n "Running inference..." && {path_to_python} {str(base_path)}/tools/inference.py --paths_models {paths_models_str} --model_class_names {model_class_names_str} --field $1 --whole_field --flag_ids --scale_features {scale_features} --feature_directory {feature_directory} --period_suffix {period_suffix} --batch_size {batch_size} {addtl_args} && echo "done"\n'
+                    f'echo -n "Running inference..." && {path_to_python} run-inference --paths-models {paths_models_str} --model-class-names {model_class_names_str} --field $1 --whole-field --flag-ids --scale-features {scale_features} --feature-directory {feature_directory} --period-suffix {period_suffix} --batch-size {batch_size} {addtl_args} && echo "done"\n'
                 )
 
-            elif algorithm in ['XGB', 'xgb', 'XGBoost', 'xgboost', 'XGBOOST']:
-                algorithm = 'xgb'
+            elif algorithm in ["XGB", "xgb", "XGBoost", "xgboost", "XGBOOST"]:
+                algorithm = "xgb"
                 script.write('echo "xgb inference"\n')
                 for tag in model_tags:
-                    tag_file_gen = (group_path / tag).glob('*.json')
+                    tag_file_gen = (group_path / tag).glob("*.json")
                     most_recent_file = max(
                         [file for file in tag_file_gen], key=os.path.getctime
                     ).name
 
-                    paths_models_str += f'{str(base_path)}/models_{algorithm}/{group_name}/{tag}/{most_recent_file} '
-                    model_class_names_str += f'{tag} '
+                    paths_models_str += f"{str(base_path)}/models_{algorithm}/{group_name}/{tag}/{most_recent_file} "
+                    model_class_names_str += f"{tag} "
 
                 script.write(
-                    f'echo -n "Running inference..." && {path_to_python} {str(base_path)}/tools/inference.py --paths_models {paths_models_str} --model_class_names {model_class_names_str} --scale_features {scale_features} --feature_directory {feature_directory} --period_suffix {period_suffix} --batch_size {batch_size} --xgb_model --field $1 --whole_field --flag_ids {addtl_args} && echo "done"\n'
+                    f'echo -n "Running inference..." && {path_to_python} run-inference --paths-models {paths_models_str} --model-class-names {model_class_names_str} --scale-features {scale_features} --feature-directory {feature_directory} --period-suffix {period_suffix} --batch-size {batch_size} --xgb-model --field $1 --whole-field --flag-ids {addtl_args} && echo "done"\n'
                 )
 
             else:
-                raise ValueError('algorithm must be dnn or xgb')
+                raise ValueError("algorithm must be dnn or xgb")
+
+        print(f"Wrote inference script to {path}")
 
     def consolidate_inference_results(
         self,
         dataset: pd.DataFrame,
-        statistic: str = 'mean',
+        statistic: str = "mean",
     ):
         """
         Consolidate inference results from multiple rows to one per source (called in select_fritz_sample)
@@ -1340,147 +1765,147 @@ def consolidate_inference_results(
         # Begin with Gaia EDR3 ID
         # If no Gaia ID, use AllWISE
         # If no AllWISE, use PS1
-        withGaiaID = dataset[dataset['Gaia_EDR3___id'] != 0].reset_index(drop=True)
-        nanGaiaID = dataset[dataset['Gaia_EDR3___id'] == 0].reset_index(drop=True)
+        withGaiaID = dataset[dataset["Gaia_EDR3___id"] != 0].reset_index(drop=True)
+        nanGaiaID = dataset[dataset["Gaia_EDR3___id"] == 0].reset_index(drop=True)
 
-        withAllWiseID = nanGaiaID[nanGaiaID['AllWISE___id'] != 0].reset_index(drop=True)
-        nanAllWiseID = nanGaiaID[nanGaiaID['AllWISE___id'] == 0].reset_index(drop=True)
+        withAllWiseID = nanGaiaID[nanGaiaID["AllWISE___id"] != 0].reset_index(drop=True)
+        nanAllWiseID = nanGaiaID[nanGaiaID["AllWISE___id"] == 0].reset_index(drop=True)
 
-        withPS1ID = nanAllWiseID[nanAllWiseID['PS1_DR1___id'] != 0].reset_index(
+        withPS1ID = nanAllWiseID[nanAllWiseID["PS1_DR1___id"] != 0].reset_index(
             drop=True
         )
 
         # Define columns for each subset that should not be averaged or otherwise aggregated
 
-        skipList = ['Gaia_EDR3___id', 'AllWISE___id', 'PS1_DR1___id', '_id']
+        skipList = ["Gaia_EDR3___id", "AllWISE___id", "PS1_DR1___id", "_id"]
 
         skip_mean_cols_Gaia = withGaiaID[skipList]
         skip_mean_cols_AllWise = withAllWiseID[skipList]
         skip_mean_cols_PS1 = withPS1ID[skipList]
 
         if statistic in [
-            'mean',
-            'Mean',
-            'MEAN',
-            'average',
-            'AVERAGE',
-            'Average',
-            'avg',
-            'AVG',
+            "mean",
+            "Mean",
+            "MEAN",
+            "average",
+            "AVERAGE",
+            "Average",
+            "avg",
+            "AVG",
         ]:
             groupedMeans_Gaia = (
-                withGaiaID.groupby('Gaia_EDR3___id')
+                withGaiaID.groupby("Gaia_EDR3___id")
                 .mean()
-                .drop(['_id', 'AllWISE___id', 'PS1_DR1___id'], axis=1)
+                .drop(["_id", "AllWISE___id", "PS1_DR1___id"], axis=1)
                 .reset_index()
             )
 
             groupedMeans_AllWise = (
-                withAllWiseID.groupby('AllWISE___id')
+                withAllWiseID.groupby("AllWISE___id")
                 .mean()
-                .drop(['_id', 'Gaia_EDR3___id', 'PS1_DR1___id'], axis=1)
+                .drop(["_id", "Gaia_EDR3___id", "PS1_DR1___id"], axis=1)
                 .reset_index()
             )
 
             groupedMeans_PS1 = (
-                withPS1ID.groupby('PS1_DR1___id')
+                withPS1ID.groupby("PS1_DR1___id")
                 .mean()
-                .drop(['_id', 'Gaia_EDR3___id', 'AllWISE___id'], axis=1)
+                .drop(["_id", "Gaia_EDR3___id", "AllWISE___id"], axis=1)
                 .reset_index()
             )
 
-        elif statistic in ['max', 'Max', 'MAX', 'maximum', 'Maximum', 'MAXIMUM']:
+        elif statistic in ["max", "Max", "MAX", "maximum", "Maximum", "MAXIMUM"]:
             groupedMeans_Gaia = (
-                withGaiaID.groupby('Gaia_EDR3___id')
+                withGaiaID.groupby("Gaia_EDR3___id")
                 .max()
-                .drop(['_id', 'AllWISE___id', 'PS1_DR1___id'], axis=1)
+                .drop(["_id", "AllWISE___id", "PS1_DR1___id"], axis=1)
                 .reset_index()
             )
 
             groupedMeans_AllWise = (
-                withAllWiseID.groupby('AllWISE___id')
+                withAllWiseID.groupby("AllWISE___id")
                 .max()
-                .drop(['_id', 'Gaia_EDR3___id', 'PS1_DR1___id'], axis=1)
+                .drop(["_id", "Gaia_EDR3___id", "PS1_DR1___id"], axis=1)
                 .reset_index()
             )
 
             groupedMeans_PS1 = (
-                withPS1ID.groupby('PS1_DR1___id')
+                withPS1ID.groupby("PS1_DR1___id")
                 .max()
-                .drop(['_id', 'Gaia_EDR3___id', 'AllWISE___id'], axis=1)
+                .drop(["_id", "Gaia_EDR3___id", "AllWISE___id"], axis=1)
                 .reset_index()
             )
 
-        elif statistic in ['median', 'Median', 'MEDIAN', 'med', 'MED']:
+        elif statistic in ["median", "Median", "MEDIAN", "med", "MED"]:
             groupedMeans_Gaia = (
-                withGaiaID.groupby('Gaia_EDR3___id')
+                withGaiaID.groupby("Gaia_EDR3___id")
                 .median()
-                .drop(['_id', 'AllWISE___id', 'PS1_DR1___id'], axis=1)
+                .drop(["_id", "AllWISE___id", "PS1_DR1___id"], axis=1)
                 .reset_index()
             )
 
             groupedMeans_AllWise = (
-                withAllWiseID.groupby('AllWISE___id')
+                withAllWiseID.groupby("AllWISE___id")
                 .median()
-                .drop(['_id', 'Gaia_EDR3___id', 'PS1_DR1___id'], axis=1)
+                .drop(["_id", "Gaia_EDR3___id", "PS1_DR1___id"], axis=1)
                 .reset_index()
             )
 
             groupedMeans_PS1 = (
-                withPS1ID.groupby('PS1_DR1___id')
+                withPS1ID.groupby("PS1_DR1___id")
                 .median()
-                .drop(['_id', 'Gaia_EDR3___id', 'AllWISE___id'], axis=1)
+                .drop(["_id", "Gaia_EDR3___id", "AllWISE___id"], axis=1)
                 .reset_index()
             )
 
         else:
             raise ValueError(
-                'Mean, median and max are the currently supported statistics.'
+                "Mean, median and max are the currently supported statistics."
             )
 
         # Construct new survey_id column that contains the ID used to add grouped source to the list
-        string_ids_Gaia = groupedMeans_Gaia['Gaia_EDR3___id'].astype(str)
-        groupedMeans_Gaia['survey_id'] = ["Gaia_EDR3___" + s for s in string_ids_Gaia]
+        string_ids_Gaia = groupedMeans_Gaia["Gaia_EDR3___id"].astype(str)
+        groupedMeans_Gaia["survey_id"] = ["Gaia_EDR3___" + s for s in string_ids_Gaia]
 
-        string_ids_AllWise = groupedMeans_AllWise['AllWISE___id'].astype(str)
-        groupedMeans_AllWise['survey_id'] = [
+        string_ids_AllWise = groupedMeans_AllWise["AllWISE___id"].astype(str)
+        groupedMeans_AllWise["survey_id"] = [
             "AllWISE___" + s for s in string_ids_AllWise
         ]
 
-        string_ids_PS1 = groupedMeans_PS1['PS1_DR1___id'].astype(str)
-        groupedMeans_PS1['survey_id'] = ["PS1_DR1___" + s for s in string_ids_PS1]
+        string_ids_PS1 = groupedMeans_PS1["PS1_DR1___id"].astype(str)
+        groupedMeans_PS1["survey_id"] = ["PS1_DR1___" + s for s in string_ids_PS1]
 
         # Merge averaged, non-averaged columns on obj_id
         allRows_Gaia = pd.merge(
-            groupedMeans_Gaia, skip_mean_cols_Gaia, on=['Gaia_EDR3___id']
+            groupedMeans_Gaia, skip_mean_cols_Gaia, on=["Gaia_EDR3___id"]
         )
-        noDup_ids_Gaia = allRows_Gaia.drop_duplicates('Gaia_EDR3___id')[
-            ['Gaia_EDR3___id', '_id']
+        noDup_ids_Gaia = allRows_Gaia.drop_duplicates("Gaia_EDR3___id")[
+            ["Gaia_EDR3___id", "_id"]
         ]
         groupedMeans_Gaia = pd.merge(
-            groupedMeans_Gaia, noDup_ids_Gaia, on='Gaia_EDR3___id'
+            groupedMeans_Gaia, noDup_ids_Gaia, on="Gaia_EDR3___id"
         )
-        groupedMeans_Gaia.drop('Gaia_EDR3___id', axis=1, inplace=True)
+        groupedMeans_Gaia.drop("Gaia_EDR3___id", axis=1, inplace=True)
 
         allRows_AllWise = pd.merge(
-            groupedMeans_AllWise, skip_mean_cols_AllWise, on=['AllWISE___id']
+            groupedMeans_AllWise, skip_mean_cols_AllWise, on=["AllWISE___id"]
         )
-        noDup_ids_AllWise = allRows_AllWise.drop_duplicates('AllWISE___id')[
-            ['AllWISE___id', '_id']
+        noDup_ids_AllWise = allRows_AllWise.drop_duplicates("AllWISE___id")[
+            ["AllWISE___id", "_id"]
         ]
         groupedMeans_AllWise = pd.merge(
-            groupedMeans_AllWise, noDup_ids_AllWise, on='AllWISE___id'
+            groupedMeans_AllWise, noDup_ids_AllWise, on="AllWISE___id"
         )
-        groupedMeans_AllWise.drop('AllWISE___id', axis=1, inplace=True)
+        groupedMeans_AllWise.drop("AllWISE___id", axis=1, inplace=True)
 
         allRows_PS1 = pd.merge(
-            groupedMeans_PS1, skip_mean_cols_PS1, on=['PS1_DR1___id']
+            groupedMeans_PS1, skip_mean_cols_PS1, on=["PS1_DR1___id"]
         )
-        noDup_ids_PS1 = allRows_PS1.drop_duplicates('PS1_DR1___id')[
-            ['PS1_DR1___id', '_id']
+        noDup_ids_PS1 = allRows_PS1.drop_duplicates("PS1_DR1___id")[
+            ["PS1_DR1___id", "_id"]
         ]
-        groupedMeans_PS1 = pd.merge(groupedMeans_PS1, noDup_ids_PS1, on='PS1_DR1___id')
-        groupedMeans_PS1.drop('PS1_DR1___id', axis=1, inplace=True)
+        groupedMeans_PS1 = pd.merge(groupedMeans_PS1, noDup_ids_PS1, on="PS1_DR1___id")
+        groupedMeans_PS1.drop("PS1_DR1___id", axis=1, inplace=True)
 
         # Create dataframe with one row per source
         consol_rows = pd.concat(
@@ -1490,44 +1915,149 @@ def consolidate_inference_results(
         # Create dataframe containing all rows (including duplicates for multiple light curves)
         all_rows = pd.concat([allRows_Gaia, allRows_AllWise, allRows_PS1])
         all_rows.drop(
-            ['Gaia_EDR3___id', 'AllWISE___id', 'PS1_DR1___id'], axis=1, inplace=True
+            ["Gaia_EDR3___id", "AllWISE___id", "PS1_DR1___id"], axis=1, inplace=True
         )
 
         # Reorder columns for better legibility
-        consol_rows = consol_rows.set_index('survey_id').reset_index()
-        all_rows = all_rows.set_index('survey_id').reset_index()
+        consol_rows = consol_rows.set_index("survey_id").reset_index()
+        all_rows = all_rows.set_index("survey_id").reset_index()
 
         return consol_rows, all_rows
 
+    def parse_run_select_fritz_sample(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--fields",
+            type=Union[int, str],
+            nargs="+",
+            default=["all"],
+            help="list of field predictions (integers) to include, 'all' to use all available fields, or 'specific_ids' if running on e.g. GCN sources",
+        )
+        parser.add_argument(
+            "--group",
+            type=str,
+            default="experiment",
+            help="name of group containing trained models within models directory",
+        )
+        parser.add_argument(
+            "--min-class-examples",
+            type=int,
+            default=1000,
+            help="minimum number of examples to include for each class. Some classes may contain fewer than this if the sample is limited",
+        )
+        parser.add_argument(
+            "--select-top-n",
+            action="store_true",
+            help="if set, select top N probabilities above probability_threshold from each class",
+        )
+        parser.add_argument(
+            "--include-all-highprob-labels",
+            action="store_true",
+            help="if select_top_n is set, setting this keyword includes any classification above the probability_threshold for all top N sources. Otherwise, literally only the top N probabilities for each classification will be included, which may artifically exclude relevant labels.",
+        )
+        parser.add_argument(
+            "--probability-threshold",
+            type=float,
+            default=0.9,
+            help="minimum probability to select for Fritz",
+        )
+        parser.add_argument(
+            "--al-directory",
+            type=str,
+            default="AL_datasets",
+            help="name of directory to create/populate with Fritz sample",
+        )
+        parser.add_argument(
+            "--al-filename",
+            type=str,
+            default="active_learning_set",
+            help="name of file (no extension) to store Fritz sample",
+        )
+        parser.add_argument(
+            "--algorithm",
+            type=str,
+            default="dnn",
+            help="ML algorithm (dnn or xgb)",
+        )
+        parser.add_argument(
+            "--exclude-training-sources",
+            action="store_true",
+            help="if set, exclude sources in current training set from AL sample",
+        )
+        parser.add_argument(
+            "--write-csv",
+            action="store_true",
+            help="if set, write CSV file in addition to parquet",
+        )
+        parser.add_argument(
+            "--verbose",
+            action="store_true",
+            help="if set, print additional information",
+        )
+        parser.add_argument(
+            "--consolidation-statistic",
+            type=str,
+            default="mean",
+            help="method to combine multiple classification probabilities for a single source ('mean', 'median' or 'max' currently supported)",
+        )
+        parser.add_argument(
+            "--read-consolidation-results",
+            action="store_true",
+            help="if set, search for and read an existing consolidated file having _consol.parquet suffix",
+        )
+        parser.add_argument(
+            "--write-consolidation-results",
+            action="store_true",
+            help="if set, save two files: consolidated inference results [1 row per source] and full results [≥ 1 row per source]",
+        )
+        parser.add_argument(
+            "--consol-filename",
+            type=str,
+            default="inference_results",
+            help="name of file (no extension) to store consolidated and full results",
+        )
+        parser.add_argument(
+            "--doNotSave",
+            action="store_true",
+            help="if set, do not write results",
+        )
+        parser.add_argument(
+            "--doAllSources",
+            action="store_true",
+            help="if set, ignore min_class_examples and run for all sources",
+        )
+
+        args, _ = parser.parse_known_args()
+        self.select_fritz_sample(**vars(args))
+
     def select_fritz_sample(
         self,
-        fields: Union[list, str] = 'all',
-        group: str = 'experiment',
+        fields: list = ["all"],
+        group: str = "experiment",
         min_class_examples: int = 1000,
         select_top_n: bool = False,
         include_all_highprob_labels: bool = False,
         probability_threshold: float = 0.9,
-        al_directory: str = 'AL_datasets',
-        al_filename: str = 'active_learning_set',
-        algorithm: str = 'dnn',
+        al_directory: str = "AL_datasets",
+        al_filename: str = "active_learning_set",
+        algorithm: str = "dnn",
         exclude_training_sources: bool = False,
         write_csv: bool = True,
         verbose: bool = False,
-        consolidation_statistic: str = 'mean',
+        consolidation_statistic: str = "mean",
         read_consolidation_results: bool = False,
         write_consolidation_results: bool = False,
-        consol_filename: str = 'inference_results',
+        consol_filename: str = "inference_results",
         doNotSave: bool = False,
         doAllSources: bool = False,
     ):
         """
         Select subset of predictions to use for posting to Fritz (active learning, GCN source classifications).
 
-        :param fields: list of field predictions (integers) to include, 'all' to use all available fields, or 'specific_ids' if running on e.g. GCN sources (list or str)
-            note: do not use spaces if providing a list of comma-separated integers to this argument.
+        :param fields: list of field predictions (integers) to include, 'all' to use all available fields, or 'specific_ids' if running on e.g. GCN sources (list)
         :param group: name of group containing trained models within models directory (str)
         :param min_class_examples: minimum number of examples to include for each class. Some classes may contain fewer than this if the sample is limited (int)
-        :param select_top_n: if True, select top N probabilities above probability_threshold from each class (bool)
+        :param select_top_n: if set, select top N probabilities above probability_threshold from each class (bool)
         :param include_all_highprob_labels: if select_top_n is set, setting this keyword includes any classification above the probability_threshold for all top N sources.
             Otherwise, literally only the top N probabilities for each classification will be included, which may artifically exclude relevant labels.
         :param probability_threshold: minimum probability to select for Fritz (float)
@@ -1535,11 +2065,11 @@ def select_fritz_sample(
         :param al_filename: name of file (no extension) to store Fritz sample (str)
         :param algorithm: algorithm [dnn or xgb] (str)
         :param exclude_training_sources: if True, exclude sources in current training set from AL sample (bool)
-        :param write_csv: if True, write CSV file in addition to parquet (bool)
-        :param verbose: if True, print additional information (bool)
+        :param write_csv: if set, write CSV file in addition to parquet (bool)
+        :param verbose: if set, print additional information (bool)
         :param consolidation_statistic: method to combine multiple classification probabilities for a single source [mean, median or max currently supported] (str)
-        :param read_consolidation_results: if True, search for and read an existing consolidated file having _consol.parquet suffix (bool)
-        :param write_consolidation_results: if True, save two files: consolidated inference results [1 row per source] and full results [≥ 1 row per source] (bool)
+        :param read_consolidation_results: if set, search for and read an existing consolidated file having _consol.parquet suffix (bool)
+        :param write_consolidation_results: if set, save two files: consolidated inference results [1 row per source] and full results [≥ 1 row per source] (bool)
         :param consol_filename: name of file (no extension) to store consolidated and full results (str)
         :param doNotSave: if set, do not write results (bool)
         :param doAllSources: if set, ignore min_class_examples and run for all sources (bool)
@@ -1547,62 +2077,62 @@ def select_fritz_sample(
         :return:
         final_toPost: DataFrame containing sources with high-confidence classifications to post
 
-        :examples:  ./scope.py select_fritz_sample --fields=[296,297] --group='experiment' --min_class_examples=1000 --probability_threshold=0.9 --exclude_training_sources --write_consolidation_results
-                    ./scope.py select_fritz_sample --fields=[296,297] --group='experiment' --min_class_examples=500 --select_top_n --include_all_highprob_labels --probability_threshold=0.7 --exclude_training_sources --read_consolidation_results
-                    ./scope.py select_fritz_sample --fields='specific_ids' --group='DR16' --algorithm='xgb' --probability_threshold=0.9 --consol_filename='inference_results_specific_ids' --al_directory='GCN' --al_filename='GCN_sources' --write_consolidation_results --select_top_n --doAllSources --write_csv
+        :examples:  select-fritz-sample --fields 296 297 --group experiment --min-class-examples 1000 --probability-threshold 0.9 --exclude-training-sources --write-consolidation-results
+                    select-fritz-sample --fields 296 297 --group experiment --min-class-examples 500 --select-top-n --include-all-highprob-labels --probability-threshold 0.7 --exclude-training-sources --read-consolidation-results
+                    select-fritz-sample --fields specific_ids --group DR16 --algorithm xgb --probability-threshold 0.9 --consol-filename inference_results_specific_ids --al-directory=GCN --al-filename GCN_sources --write-consolidation-results --select-top-n --doAllSources --write-csv
 
         """
         base_path = self.base_path
-        if algorithm in ['DNN', 'NN', 'dnn', 'nn']:
-            algorithm = 'dnn'
-        elif algorithm in ['XGB', 'xgb', 'XGBoost', 'xgboost', 'XGBOOST']:
-            algorithm = 'xgb'
+        if algorithm in ["DNN", "NN", "dnn", "nn"]:
+            algorithm = "dnn"
+        elif algorithm in ["XGB", "xgb", "XGBoost", "xgboost", "XGBOOST"]:
+            algorithm = "xgb"
         else:
-            raise ValueError('Algorithm must be either dnn or xgb.')
+            raise ValueError("Algorithm must be either dnn or xgb.")
 
-        preds_path = base_path / f'preds_{algorithm}'
+        preds_path = base_path / f"preds_{algorithm}"
 
         # Strip extension from filename if provided
-        al_filename = al_filename.split('.')[0]
-        AL_directory_path = str(base_path / f'{al_directory}_{algorithm}' / al_filename)
+        al_filename = al_filename.split(".")[0]
+        AL_directory_path = str(base_path / f"{al_directory}_{algorithm}" / al_filename)
         os.makedirs(AL_directory_path, exist_ok=True)
 
         df_coll = []
         df_coll_allRows = []
-        if fields in ['all', 'All', 'ALL']:
+        if "all" in fields:
             gen_fields = os.walk(preds_path)
             fields = [x for x in gen_fields][0][1]
-            print(f'Generating Fritz sample from {len(fields)} fields:')
-        elif 'specific_ids' in fields:
-            fields = [f'field_{fields}']
-            print('Generating Fritz sample from specific ids across multiple fields:')
+            print(f"Generating Fritz sample from {len(fields)} fields:")
+        elif "specific_ids" in fields:
+            fields = [f"field_{fields}"]
+            print("Generating Fritz sample from specific ids across multiple fields:")
         else:
-            fields = [f'field_{f}' for f in fields]
-            print(f'Generating Fritz sample from {len(fields)} fields:')
+            fields = [f"field_{f}" for f in fields]
+            print(f"Generating Fritz sample from {len(fields)} fields:")
 
         column_nums = []
 
         AL_directory_PL = pathlib.Path(AL_directory_path)
-        gen = AL_directory_PL.glob(f'{consol_filename}_consol.parquet')
+        gen = AL_directory_PL.glob(f"{consol_filename}_consol.parquet")
         existing_consol_files = [str(x) for x in gen]
 
         if (read_consolidation_results) & (len(existing_consol_files) > 0):
-            print('Loading existing consolidated results...')
+            print("Loading existing consolidated results...")
             preds_df = read_parquet(existing_consol_files[0])
 
         else:
-            print('Consolidating classification probabilities to one per source...')
+            print("Consolidating classification probabilities to one per source...")
             for field in fields:
                 print(field)
-                h = read_parquet(str(preds_path / field / f'{field}.parquet'))
+                h = read_parquet(str(preds_path / field / f"{field}.parquet"))
 
                 has_obj_id = False
-                if 'obj_id' in h.columns:
+                if "obj_id" in h.columns:
                     has_obj_id = True
                     id_mapper = (
-                        h[['_id', 'obj_id']].set_index('_id').to_dict(orient='index')
+                        h[["_id", "obj_id"]].set_index("_id").to_dict(orient="index")
                     )
-                    h.drop('obj_id', axis=1, inplace=True)
+                    h.drop("obj_id", axis=1, inplace=True)
 
                 consolidated_df, all_rows_df = self.consolidate_inference_results(
                     h, statistic=consolidation_statistic
@@ -1619,20 +2149,20 @@ def select_fritz_sample(
 
                 if len(np.unique(column_nums)) > 1:
                     raise ValueError(
-                        'Not all predictions have the same number of columns.'
+                        "Not all predictions have the same number of columns."
                     )
 
                 # Create consolidated dataframe (one row per source)
                 preds_df = pd.concat(df_coll, axis=0)
 
                 cols = [x for x in preds_df.columns]
-                cols.remove('_id')
-                cols.remove('survey_id')
-                agg_dct = {c: 'mean' for c in cols}
+                cols.remove("_id")
+                cols.remove("survey_id")
+                agg_dct = {c: "mean" for c in cols}
 
                 # One more groupby to combine sources across multiple fields
                 preds_df = (
-                    preds_df.groupby(['survey_id', '_id']).agg(agg_dct).reset_index()
+                    preds_df.groupby(["survey_id", "_id"]).agg(agg_dct).reset_index()
                 )
 
                 # Create dataframe including all light curves (multiple rows per source)
@@ -1640,28 +2170,28 @@ def select_fritz_sample(
 
                 if not has_obj_id:
                     # Generate position-based obj_ids for Fritz
-                    raArr = [ra for ra in preds_df['ra']]
-                    decArr = [dec for dec in preds_df['dec']]
+                    raArr = [ra for ra in preds_df["ra"]]
+                    decArr = [dec for dec in preds_df["dec"]]
                     obj_ids = [radec_to_iau_name(x, y) for x, y in zip(raArr, decArr)]
                 else:
                     obj_ids = []
-                    for ID in preds_df['_id']:
-                        obj_ids += [id_mapper[ID]['obj_id']]
+                    for ID in preds_df["_id"]:
+                        obj_ids += [id_mapper[ID]["obj_id"]]
 
-                preds_df['obj_id'] = obj_ids
+                preds_df["obj_id"] = obj_ids
 
                 # Assign obj_ids to all rows
                 preds_df_allRows = pd.merge(
-                    preds_df_allRows, preds_df[['obj_id', 'survey_id']], on='survey_id'
+                    preds_df_allRows, preds_df[["obj_id", "survey_id"]], on="survey_id"
                 )
 
                 # Drop sources which are so close that they cannot be resolved by our position-based ID (~0.0004 of sources)
                 preds_df_allRows = (
-                    preds_df_allRows.set_index('obj_id')
-                    .drop(preds_df[preds_df.duplicated('obj_id')]['obj_id'])
+                    preds_df_allRows.set_index("obj_id")
+                    .drop(preds_df[preds_df.duplicated("obj_id")]["obj_id"])
                     .reset_index()
                 )
-                preds_df = preds_df.drop_duplicates('obj_id', keep=False).reset_index(
+                preds_df = preds_df.drop_duplicates("obj_id", keep=False).reset_index(
                     drop=True
                 )
 
@@ -1669,40 +2199,40 @@ def select_fritz_sample(
                 if write_consolidation_results:
                     write_parquet(
                         preds_df,
-                        f'{AL_directory_path}/{consol_filename}_consol.parquet',
+                        f"{AL_directory_path}/{consol_filename}_consol.parquet",
                     )
                     write_parquet(
                         preds_df_allRows,
-                        f'{AL_directory_path}/{consol_filename}_full.parquet',
+                        f"{AL_directory_path}/{consol_filename}_full.parquet",
                     )
                     if write_csv:
                         preds_df.to_csv(
-                            f'{AL_directory_path}/{consol_filename}_consol.csv',
+                            f"{AL_directory_path}/{consol_filename}_consol.csv",
                             index=False,
                         )
                         preds_df_allRows.to_csv(
-                            f'{AL_directory_path}/{consol_filename}_full.csv',
+                            f"{AL_directory_path}/{consol_filename}_full.csv",
                             index=False,
                         )
 
         # Define non-variable class as 1 - variable
         include_nonvar = False
-        if f'vnv_{algorithm}' in preds_df.columns:
+        if f"vnv_{algorithm}" in preds_df.columns:
             include_nonvar = True
-            preds_df[f'nonvar_{algorithm}'] = np.round(
-                1 - preds_df[f'vnv_{algorithm}'], 2
+            preds_df[f"nonvar_{algorithm}"] = np.round(
+                1 - preds_df[f"vnv_{algorithm}"], 2
             )
 
         if exclude_training_sources:
             # Get training set from config file
-            training_set_config = self.config['training']['dataset']
+            training_set_config = self.config["training"]["dataset"]
             training_set_path = str(base_path / training_set_config)
 
-            if training_set_path.endswith('.parquet'):
+            if training_set_path.endswith(".parquet"):
                 training_set = read_parquet(training_set_path)
-            elif training_set_path.endswith('.h5'):
+            elif training_set_path.endswith(".h5"):
                 training_set = read_hdf(training_set_path)
-            elif training_set_path.endswith('.csv'):
+            elif training_set_path.endswith(".csv"):
                 training_set = pd.read_csv(training_set_path)
             else:
                 raise ValueError(
@@ -1710,25 +2240,25 @@ def select_fritz_sample(
                 )
 
             intersec = set.intersection(
-                set(preds_df['obj_id'].values), set(training_set['obj_id'].values)
+                set(preds_df["obj_id"].values), set(training_set["obj_id"].values)
             )
-            print(f'Dropping {len(intersec)} sources already in training set...')
-            preds_df = preds_df.set_index('obj_id').drop(list(intersec)).reset_index()
+            print(f"Dropping {len(intersec)} sources already in training set...")
+            preds_df = preds_df.set_index("obj_id").drop(list(intersec)).reset_index()
 
         # Use trained model names to establish classes to train
-        gen = os.walk(base_path / f'models_{algorithm}' / group)
+        gen = os.walk(base_path / f"models_{algorithm}" / group)
         model_tags = [tag[1] for tag in gen]
         model_tags = model_tags[0]
         model_tags = np.array(model_tags)
         if include_nonvar:
-            model_tags = np.concatenate([model_tags, ['nonvar']])
+            model_tags = np.concatenate([model_tags, ["nonvar"]])
 
-        print(f'Selecting AL sample for {len(model_tags)} classes...')
+        print(f"Selecting AL sample for {len(model_tags)} classes...")
 
         toPost_df = pd.DataFrame(columns=preds_df.columns)
         completed_dict = {}
-        preds_df.set_index('obj_id', inplace=True)
-        toPost_df.set_index('obj_id', inplace=True)
+        preds_df.set_index("obj_id", inplace=True)
+        toPost_df.set_index("obj_id", inplace=True)
 
         # Fix random state to allow reproducible results
         rng = np.random.RandomState(9)
@@ -1736,17 +2266,17 @@ def select_fritz_sample(
         # Reset min_class_examples if doAllSources is set
         if doAllSources:
             min_class_examples = len(preds_df)
-            print(f'Selecting sample from all sources ({min_class_examples})')
+            print(f"Selecting sample from all sources ({min_class_examples})")
 
         if not select_top_n:
             for tag in model_tags:
                 # Idenfity all sources above probability threshold
                 highprob_preds = preds_df[
-                    preds_df[f'{tag}_{algorithm}'].values >= probability_threshold
+                    preds_df[f"{tag}_{algorithm}"].values >= probability_threshold
                 ]
                 # Find existing sources in AL sample above probability threshold
                 existing_df = toPost_df[
-                    toPost_df[f'{tag}_{algorithm}'].values >= probability_threshold
+                    toPost_df[f"{tag}_{algorithm}"].values >= probability_threshold
                 ]
                 existing_count = len(existing_df)
 
@@ -1767,21 +2297,21 @@ def select_fritz_sample(
                         concat_toPost_df = highprob_preds
 
                 toPost_df = pd.concat([toPost_df, concat_toPost_df], axis=0)
-                toPost_df.drop_duplicates(keep='first', inplace=True)
+                toPost_df.drop_duplicates(keep="first", inplace=True)
 
         else:
             # Select top N classifications above probability threshold for all classes
             print(
-                f'Selecting top {min_class_examples} classifications above P = {probability_threshold}...'
+                f"Selecting top {min_class_examples} classifications above P = {probability_threshold}..."
             )
 
             preds_df.reset_index(inplace=True)
             topN_df = pd.DataFrame()
-            class_list = [f'{t}_{algorithm}' for t in model_tags]
+            class_list = [f"{t}_{algorithm}" for t in model_tags]
 
             for tag in model_tags:
                 goodprob_preds = preds_df[
-                    preds_df[f'{tag}_{algorithm}'].values >= probability_threshold
+                    preds_df[f"{tag}_{algorithm}"].values >= probability_threshold
                 ]
 
                 if not include_all_highprob_labels:
@@ -1789,15 +2319,15 @@ def select_fritz_sample(
                     topN_preds = (
                         goodprob_preds[
                             [
-                                'obj_id',
-                                'survey_id',
-                                'ra',
-                                'dec',
-                                'period',
-                                f'{tag}_{algorithm}',
+                                "obj_id",
+                                "survey_id",
+                                "ra",
+                                "dec",
+                                "period",
+                                f"{tag}_{algorithm}",
                             ]
                         ]
-                        .sort_values(by=f'{tag}_{algorithm}', ascending=False)
+                        .sort_values(by=f"{tag}_{algorithm}", ascending=False)
                         .iloc[:min_class_examples]
                         .reset_index(drop=True)
                     )
@@ -1806,7 +2336,7 @@ def select_fritz_sample(
                     # Include not only the top N probabilities for each class but also any other classifications above probability_threshold for these sources
                     topN_preds = (
                         goodprob_preds.sort_values(
-                            by=f'{tag}_{algorithm}', ascending=False
+                            by=f"{tag}_{algorithm}", ascending=False
                         )
                         .iloc[:min_class_examples]
                         .reset_index(drop=True)
@@ -1820,26 +2350,26 @@ def select_fritz_sample(
 
                 topN_df = pd.concat([topN_df, topN_preds]).reset_index(drop=True)
 
-            toPost_df = topN_df.fillna(0.0).groupby('obj_id').max().reset_index()
+            toPost_df = topN_df.fillna(0.0).groupby("obj_id").max().reset_index()
 
         for tag in model_tags:
             # Make metadata dictionary of example count per class
-            completed_dict[f'{tag}_{algorithm}'] = int(
-                np.sum(toPost_df[f'{tag}_{algorithm}'].values >= probability_threshold)
+            completed_dict[f"{tag}_{algorithm}"] = int(
+                np.sum(toPost_df[f"{tag}_{algorithm}"].values >= probability_threshold)
             )
 
         final_toPost = toPost_df.reset_index(drop=True)
 
         if not doNotSave:
             # Write parquet and csv files
-            write_parquet(final_toPost, f'{AL_directory_path}/{al_filename}.parquet')
+            write_parquet(final_toPost, f"{AL_directory_path}/{al_filename}.parquet")
             if write_csv:
                 final_toPost.to_csv(
-                    f'{AL_directory_path}/{al_filename}.csv', index=False
+                    f"{AL_directory_path}/{al_filename}.csv", index=False
                 )
 
             # Write metadata
-            meta_filepath = f'{AL_directory_path}/meta.json'
+            meta_filepath = f"{AL_directory_path}/meta.json"
             with open(meta_filepath, "w") as f:
                 try:
                     json.dump(completed_dict, f)  # dump dictionary to a json file
@@ -1858,14 +2388,14 @@ def test_limited(self):
 
         # create a mock dataset and check that the training pipeline works
         dataset = f"{uuid.uuid4().hex}_orig.csv"
-        path_mock = pathlib.Path(__file__).parent.absolute() / "data" / "training"
-        group_mock = 'scope_test_limited'
+        path_mock = self.base_path / "data" / "training"
+        group_mock = "scope_test_limited"
 
         try:
-            with status('Test training'):
+            with status("Test training"):
                 print()
 
-                period_suffix_config = self.config['features']['info']['period_suffix']
+                period_suffix_config = self.config["features"]["info"]["period_suffix"]
 
                 if not path_mock.exists():
                     path_mock.mkdir(parents=True, exist_ok=True)
@@ -1874,19 +2404,19 @@ def test_limited(self):
                 feature_names_orig = [
                     key
                     for key in all_feature_names
-                    if forgiving_true(all_feature_names[key]['include'])
+                    if forgiving_true(all_feature_names[key]["include"])
                 ]
 
                 feature_names = feature_names_orig.copy()
                 if not (
-                    (period_suffix_config is None) | (period_suffix_config == 'None')
+                    (period_suffix_config is None) | (period_suffix_config == "None")
                 ):
                     periodic_bool = [
-                        all_feature_names[x]['periodic'] for x in feature_names
+                        all_feature_names[x]["periodic"] for x in feature_names
                     ]
                     for j, name in enumerate(feature_names):
                         if periodic_bool[j]:
-                            feature_names[j] = f'{name}_{period_suffix_config}'
+                            feature_names[j] = f"{name}_{period_suffix_config}"
 
                 class_names = [
                     self.config["training"]["classes"][class_name]["label"]
@@ -1912,16 +2442,16 @@ def test_limited(self):
                 df_mock_orig = pd.DataFrame.from_records(entries)
                 df_mock_orig.to_csv(path_mock / dataset, index=False)
 
-                algorithms = ['xgb', 'dnn']
+                algorithms = ["xgb", "dnn"]
                 model_paths = []
 
                 # Train twice: once on Kowalski features, once on generated features with different periodic feature names
                 for algorithm in algorithms:
                     tag = "vnv"
-                    if algorithm == 'xgb':
-                        extension = 'json'
-                    elif algorithm == 'dnn':
-                        extension = 'h5'
+                    if algorithm == "xgb":
+                        extension = "json"
+                    elif algorithm == "dnn":
+                        extension = "h5"
                     time_tag = self.train(
                         tag=tag,
                         path_dataset=path_mock / dataset,
@@ -1935,7 +2465,7 @@ def test_limited(self):
                         group=group_mock,
                     )
                     path_model = (
-                        pathlib.Path(__file__).parent.absolute()
+                        self.base_path
                         / f"models_{algorithm}"
                         / group_mock
                         / tag
@@ -1943,7 +2473,7 @@ def test_limited(self):
                     )
                     model_paths += [path_model]
 
-            print('model_paths', model_paths)
+            print("model_paths", model_paths)
 
         finally:
             # clean up after thyself
@@ -1953,6 +2483,16 @@ def test_limited(self):
             for path in model_paths:
                 shutil.rmtree(path.parent.parent)
 
+    def parse_run_test(self):
+        parser = argparse.ArgumentParser()
+        parser.add_argument(
+            "--doGPU",
+            action="store_true",
+            help="if set, use GPU-accelerated period algorithm",
+        )
+        args, _ = parser.parse_known_args()
+        self.test(**vars(args))
+
     def test(self, doGPU=False):
         """
         Test different workflows
@@ -1967,14 +2507,14 @@ def test(self, doGPU=False):
             inference,
             combine_preds,
         )
-        from scope.fritz import get_lightcurves_via_coords
+        from .fritz import get_lightcurves_via_coords
 
         # Test feature generation
         with status("Test generate_features"):
             print()
             test_field, test_ccd, test_quad = 297, 2, 2
-            test_feature_directory = 'generated_features'
-            test_feature_filename = 'testFeatures'
+            test_feature_directory = "generated_features"
+            test_feature_filename = "testFeatures"
             n_sources = 3
 
             _ = generate_features.generate_features(
@@ -1994,12 +2534,23 @@ def test(self, doGPU=False):
                 doScaleMinPeriod=True,
             )
 
-            path_gen_features = (
-                pathlib.Path(__file__).parent.absolute()
-                / test_feature_directory
-                / f"field_{test_field}"
-                / f"{test_feature_filename}_field_{test_field}_ccd_{test_ccd}_quad_{test_quad}.parquet"
+            path_to_features = self.config.get("feature_generation").get(
+                "path_to_features"
             )
+            if path_to_features is None:
+                path_gen_features = (
+                    self.base_path
+                    / test_feature_directory
+                    / f"field_{test_field}"
+                    / f"{test_feature_filename}_field_{test_field}_ccd_{test_ccd}_quad_{test_quad}.parquet"
+                )
+            else:
+                path_gen_features = (
+                    pathlib.Path(path_to_features)
+                    / test_feature_directory
+                    / f"field_{test_field}"
+                    / f"{test_feature_filename}_field_{test_field}_ccd_{test_ccd}_quad_{test_quad}.parquet"
+                )
 
         with status("Test get_lightcurves_via_coords"):
             print()
@@ -2010,12 +2561,12 @@ def test(self, doGPU=False):
         with status("Test get_cone_ids"):
             print()
             _ = get_quad_ids.get_cone_ids(
-                obj_id_list=['obj1', 'obj2', 'obj3'],
+                obj_id_list=["obj1", "obj2", "obj3"],
                 ra_list=[40.0, 41.0, 42.0],
                 dec_list=[50.0, 51.0, 52.0],
             )
 
-        src_catalog = self.config['kowalski']['collections']['sources']
+        src_catalog = self.config["kowalski"]["collections"]["sources"]
         with status("Test get_ids_loop and get_field_ids"):
             print()
             _, lst = get_quad_ids.get_ids_loop(
@@ -2034,34 +2585,53 @@ def test(self, doGPU=False):
             test_ftrs, outfile = get_features.get_features_loop(
                 get_features.get_features,
                 source_ids=lst[0],
-                features_catalog=self.config['kowalski']['collections']['features'],
+                features_catalog=self.config["kowalski"]["collections"]["features"],
                 field=297,
                 limit_per_query=5,
                 max_sources=10,
                 save=False,
             )
 
-            testpath = pathlib.Path(outfile)
-            testpath = testpath.parent.parent
+            if path_to_features is None:
+                testpath = pathlib.Path(outfile)
+                testpath = testpath.parent.parent
+            else:
+                testpath = pathlib.Path(path_to_features) / "features"
             # Use 'field_0' as test directory to avoid removing any existing data locally
-            testpath_features = testpath / 'field_0'
+            testpath_features = testpath / "field_0"
 
             if not testpath_features.exists():
                 testpath_features.mkdir(parents=True, exist_ok=True)
-            write_parquet(test_ftrs, str(testpath_features / 'field_0_iter_0.parquet'))
+            write_parquet(test_ftrs, str(testpath_features / "field_0_iter_0.parquet"))
 
         # create a mock dataset and check that the training pipeline works
         dataset_orig = f"{uuid.uuid4().hex}_orig.csv"
         dataset = f"{uuid.uuid4().hex}.csv"
-        path_mock = pathlib.Path(__file__).parent.absolute() / "data" / "training"
-        group_mock = 'scope_test'
+        path_mock = self.base_path / "data" / "training"
+        group_mock = "scope_test"
 
         try:
-            with status('Test training'):
+            with status("Test training"):
                 print()
 
-                period_suffix_config = self.config['features']['info']['period_suffix']
-                period_suffix_2 = 'LS'
+                period_suffix_config = (
+                    self.config.get("features").get("info").get("period_suffix")
+                )
+                if doGPU:
+                    if period_suffix_config not in [
+                        "ELS",
+                        "ECE",
+                        "EAOV",
+                        "ELS_ECE_EAOV",
+                    ]:
+                        period_suffix_test = "ELS_ECE_EAOV"
+                    else:
+                        period_suffix_test = period_suffix_config
+                else:
+                    if period_suffix_config not in ["LS", "CE", "AOV", "LS_CE_AOV"]:
+                        period_suffix_test = "LS"
+                    else:
+                        period_suffix_test = period_suffix_config
 
                 if not path_mock.exists():
                     path_mock.mkdir(parents=True, exist_ok=True)
@@ -2070,28 +2640,28 @@ def test(self, doGPU=False):
                 feature_names_orig = [
                     key
                     for key in all_feature_names
-                    if forgiving_true(all_feature_names[key]['include'])
+                    if forgiving_true(all_feature_names[key]["include"])
                 ]
 
                 feature_names_new = feature_names_orig.copy()
                 if not (
-                    (period_suffix_config is None) | (period_suffix_config == 'None')
+                    (period_suffix_config is None) | (period_suffix_config == "None")
                 ):
                     periodic_bool = [
-                        all_feature_names[x]['periodic'] for x in feature_names_new
+                        all_feature_names[x]["periodic"] for x in feature_names_new
                     ]
                     for j, name in enumerate(feature_names_new):
                         if periodic_bool[j]:
-                            feature_names_new[j] = f'{name}_{period_suffix_config}'
+                            feature_names_new[j] = f"{name}_{period_suffix_config}"
 
                 feature_names = feature_names_orig.copy()
-                if not ((period_suffix_2 is None) | (period_suffix_2 == 'None')):
+                if not ((period_suffix_test is None) | (period_suffix_test == "None")):
                     periodic_bool = [
-                        all_feature_names[x]['periodic'] for x in feature_names
+                        all_feature_names[x]["periodic"] for x in feature_names
                     ]
                     for j, name in enumerate(feature_names):
                         if periodic_bool[j]:
-                            feature_names[j] = f'{name}_{period_suffix_2}'
+                            feature_names[j] = f"{name}_{period_suffix_test}"
 
                 class_names = [
                     self.config["training"]["classes"][class_name]["label"]
@@ -2136,16 +2706,16 @@ def test(self, doGPU=False):
                 df_mock = pd.DataFrame.from_records(entries)
                 df_mock.to_csv(path_mock / dataset, index=False)
 
-                algorithms = ['xgb', 'dnn']
+                algorithms = ["xgb", "dnn"]
                 model_paths_orig = []
 
                 # Train twice: once on Kowalski features, once on generated features with different periodic feature names
                 for algorithm in algorithms:
                     tag = "vnv"
-                    if algorithm == 'xgb':
-                        extension = 'json'
-                    elif algorithm == 'dnn':
-                        extension = 'h5'
+                    if algorithm == "xgb":
+                        extension = "json"
+                    elif algorithm == "dnn":
+                        extension = "h5"
                     time_tag = self.train(
                         tag=tag,
                         path_dataset=path_mock / dataset_orig,
@@ -2159,7 +2729,7 @@ def test(self, doGPU=False):
                         group=group_mock,
                     )
                     path_model = (
-                        pathlib.Path(__file__).parent.absolute()
+                        self.base_path
                         / f"models_{algorithm}"
                         / group_mock
                         / tag
@@ -2170,10 +2740,10 @@ def test(self, doGPU=False):
                 model_paths = []
                 for algorithm in algorithms:
                     tag = "vnv"
-                    if algorithm == 'xgb':
-                        extension = 'json'
-                    elif algorithm == 'dnn':
-                        extension = 'h5'
+                    if algorithm == "xgb":
+                        extension = "json"
+                    elif algorithm == "dnn":
+                        extension = "h5"
                     time_tag = self.train(
                         tag=tag,
                         path_dataset=path_mock / dataset,
@@ -2184,11 +2754,11 @@ def test(self, doGPU=False):
                         test=True,
                         algorithm=algorithm,
                         skip_cv=True,
-                        period_suffix=period_suffix_2,
+                        period_suffix=period_suffix_test,
                         group=group_mock,
                     )
                     path_model = (
-                        pathlib.Path(__file__).parent.absolute()
+                        self.base_path
                         / f"models_{algorithm}"
                         / group_mock
                         / tag
@@ -2196,8 +2766,8 @@ def test(self, doGPU=False):
                     )
                     model_paths += [path_model]
 
-            print('model_paths_orig', model_paths_orig)
-            print('model_paths', model_paths)
+            print("model_paths_orig", model_paths_orig)
+            print("model_paths", model_paths)
 
             with status("Test inference (queried features)"):
                 print()
@@ -2230,7 +2800,7 @@ def test(self, doGPU=False):
                     trainingSet=df_mock,
                     feature_directory=test_feature_directory,
                     feature_file_prefix=test_feature_filename,
-                    period_suffix=period_suffix_2,
+                    period_suffix=period_suffix_test,
                     no_write_metadata=True,
                 )
                 print()
@@ -2244,7 +2814,7 @@ def test(self, doGPU=False):
                     xgb_model=True,
                     feature_directory=test_feature_directory,
                     feature_file_prefix=test_feature_filename,
-                    period_suffix=period_suffix_2,
+                    period_suffix=period_suffix_test,
                     no_write_metadata=True,
                 )
 
@@ -2270,7 +2840,7 @@ def test(self, doGPU=False):
                     [0],
                     probability_threshold=0.0,
                     doNotSave=True,
-                    algorithm='xgb',
+                    algorithm="xgb",
                 )
                 _ = self.select_fritz_sample(
                     [0],
@@ -2279,7 +2849,7 @@ def test(self, doGPU=False):
                     min_class_examples=3,
                     probability_threshold=0.0,
                     doNotSave=True,
-                    algorithm='xgb',
+                    algorithm="xgb",
                 )
 
         finally:
@@ -2287,21 +2857,17 @@ def test(self, doGPU=False):
             (path_mock / dataset_orig).unlink()
             (path_mock / dataset).unlink()
             os.remove(path_gen_features)
-            (testpath_features / 'field_0_iter_0.parquet').unlink()
+            (testpath_features / "field_0_iter_0.parquet").unlink()
             os.rmdir(testpath_features)
             (preds_filename_dnn_orig).unlink()
             (preds_filename_xgb_orig).unlink()
             (preds_filename_dnn).unlink()
             (preds_filename_xgb).unlink()
-            (preds_filename_dnn_orig.parent / 'meta.json').unlink()
-            (preds_filename_xgb_orig.parent / 'meta.json').unlink()
+            (preds_filename_dnn_orig.parent / "meta.json").unlink()
+            (preds_filename_xgb_orig.parent / "meta.json").unlink()
             os.rmdir(preds_filename_dnn_orig.parent)
             os.rmdir(preds_filename_xgb_orig.parent)
 
             # Remove trained model artifacts, but keep models_xgb and models_dnn directories
             for path in model_paths:
                 shutil.rmtree(path.parent.parent)
-
-
-if __name__ == "__main__":
-    fire.Fire(Scope)
diff --git a/scope/utils.py b/scope/utils.py
index 4a4bd144..5d480ac5 100644
--- a/scope/utils.py
+++ b/scope/utils.py
@@ -39,11 +39,15 @@
 import json as JSON
 from sklearn.impute import KNNImputer
 import seaborn as sns
+import argparse
+import os
+from deepdiff import DeepDiff
+from pprint import pprint
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
+BASE_DIR = pathlib.Path.cwd()
 
 
-def load_config(config_path: Union[str, pathlib.Path]):
+def load_config(config_path: Union[str, pathlib.Path] = "config.yaml"):
     """
     Load config and secrets
     """
@@ -53,6 +57,66 @@ def load_config(config_path: Union[str, pathlib.Path]):
     return config
 
 
+def parse_load_config():
+    """
+    Load config from user-specified --config-path argument
+    """
+    config_parser = argparse.ArgumentParser()
+    config_parser.add_argument(
+        "--config-path",
+        type=str,
+        help="path to config file",
+    )
+    config_parser.add_argument(
+        "--check-configs",
+        action="store_true",
+        help="if set, check config against default file in same directory",
+    )
+    config_parser.add_argument(
+        "--default-config-name",
+        type=str,
+        default="config.defaults.yaml",
+        help="name of default config file",
+    )
+
+    config_args, _ = config_parser.parse_known_args()
+    config_path = config_args.config_path
+
+    if config_path is None:
+        print(f"No --config-path specified. Loading '{BASE_DIR}/config.yaml'.")
+        config_path = str(BASE_DIR / "config.yaml")
+    else:
+        print(f"Loading config file from '{config_path}'.")
+
+    config = load_config(config_path)
+
+    if config_args.check_configs:
+        print("Checking configuration versus defaults...")
+        config_dirname = os.path.dirname(config_path)
+        default_config_path = os.path.join(
+            config_dirname, config_args.default_config_name
+        )
+
+        try:
+            default_config = load_config(default_config_path)
+        except Exception:
+            print(
+                f"Could not load {default_config_path}. To compare configs, place the latest version of config.defaults.yaml in the same directory as your customized config file ({config_path})."
+            )
+
+        deep_diff = DeepDiff(default_config, config, ignore_order=True)
+        difference = {
+            k: v for k, v in deep_diff.items() if k in ("dictionary_item_removed",)
+        }
+        if len(difference) > 0:
+            print("config structure differs from defaults")
+            pprint(difference)
+            raise KeyError("Fix config before proceeding")
+        print("Configuration check finished.")
+
+    return config
+
+
 def time_stamp():
     """
 
@@ -90,7 +154,7 @@ def make_tdtax_taxonomy(taxonomy: Mapping):
 
 
 def write_hdf(
-    dataframe: pd.DataFrame, filepath: str, key: str = 'df', overwrite: bool = True
+    dataframe: pd.DataFrame, filepath: str, key: str = "df", overwrite: bool = True
 ):
     """
     Write HDF5 file and attach metadata
@@ -100,14 +164,14 @@ def write_hdf(
     :param key: key associated with DataFrame (str)
     :param overwrite: if True, overwrite file, else append. (bool)
     """
-    mode = 'w' if overwrite else 'a'
+    mode = "w" if overwrite else "a"
 
     with pd.HDFStore(filepath, mode=mode) as store:
         store.put(key, dataframe)
         store.get_storer(key).attrs.metadata = dataframe.attrs
 
 
-def read_hdf(filepath: str, key: str = 'df'):
+def read_hdf(filepath: str, key: str = "df"):
     """
     Read HDF5 file and metadata (if available). Currently supports accessing one key of the file at a time.
 
@@ -116,17 +180,17 @@ def read_hdf(filepath: str, key: str = 'df'):
 
     :return: pandas.DataFrame
     """
-    with pd.HDFStore(filepath, mode='r') as store:
+    with pd.HDFStore(filepath, mode="r") as store:
         dataframe = store[key]
         try:
             dataframe.attrs = store.get_storer(key).attrs.metadata
         except AttributeError:
-            warnings.warn('Did not read metadata from HDF5 file.')
+            warnings.warn("Did not read metadata from HDF5 file.")
 
     return dataframe
 
 
-def write_parquet(dataframe: pd.DataFrame, filepath: str, meta_key: str = 'scope'):
+def write_parquet(dataframe: pd.DataFrame, filepath: str, meta_key: str = "scope"):
     """
     Write Apache Parquet file and attach Metadata
 
@@ -154,7 +218,7 @@ def write_parquet(dataframe: pd.DataFrame, filepath: str, meta_key: str = 'scope
     pq.write_table(table, filepath)
 
 
-def read_parquet(filepath: str, meta_key: str = 'scope'):
+def read_parquet(filepath: str, meta_key: str = "scope"):
     """
     Read Apache Parquet file and metadata (if available)
 
@@ -173,7 +237,7 @@ def read_parquet(filepath: str, meta_key: str = 'scope'):
         restored_meta = JSON.loads(meta_json)
         dataframe.attrs = restored_meta
     except KeyError:
-        warnings.warn('Did not read metadata from parquet file.')
+        warnings.warn("Did not read metadata from parquet file.")
 
     return dataframe
 
@@ -268,8 +332,8 @@ def plot_periods(
     """Plot a histogram of periods for the sample"""
     # plot the H-R diagram for 1 M stars within 200 pc from the Sun
 
-    period_colname = 'period'
-    if not ((period_suffix is None) | (period_suffix == 'None')):
+    period_colname = "period"
+    if not ((period_suffix is None) | (period_suffix == "None")):
         period_colname = f"{period_colname}_{period_suffix}"
 
     plt.rc("text", usetex=True)
@@ -518,51 +582,51 @@ def impute_features(
     features_df: pd.DataFrame,
     n_neighbors: int = 5,
     self_impute: bool = False,
-    **kwargs,
+    period_suffix: str = None,
 ):
     # Load config file
     config = load_config(BASE_DIR / "config.yaml")
-    period_suffix_config = config['features']['info']['period_suffix']
 
-    period_suffix = kwargs.get('period_suffix', period_suffix_config)
+    if period_suffix is None:
+        period_suffix = config["features"]["info"]["period_suffix"]
 
     if self_impute:
         referenceSet = features_df.copy()
     else:
         # Load training set
-        trainingSetPath = str(BASE_DIR / config['training']['dataset'])
-        if trainingSetPath.endswith('.parquet'):
+        trainingSetPath = str(BASE_DIR / config["training"]["dataset"])
+        if trainingSetPath.endswith(".parquet"):
             trainingSet = read_parquet(trainingSetPath)
-        elif trainingSetPath.endswith('.h5'):
+        elif trainingSetPath.endswith(".h5"):
             trainingSet = read_hdf(trainingSetPath)
-        elif trainingSetPath.endswith('.csv'):
+        elif trainingSetPath.endswith(".csv"):
             trainingSet = pd.read_csv(trainingSetPath)
         else:
             raise ValueError(
-                'Training set must have one of .parquet, .h5 or .csv file formats.'
+                "Training set must have one of .parquet, .h5 or .csv file formats."
             )
 
         referenceSet = trainingSet
 
-    all_features = config['features']['ontological']
+    all_features = config["features"]["ontological"]
 
     # Impute zero where specified
     feature_list_impute_zero = [
         x
         for x in all_features
         if (
-            all_features[x]['include']
-            and all_features[x]['impute_strategy'] in ['zero', 'Zero', 'ZERO']
+            all_features[x]["include"]
+            and all_features[x]["impute_strategy"] in ["zero", "Zero", "ZERO"]
         )
     ]
 
-    if not ((period_suffix is None) | (period_suffix == 'None')):
-        periodic_bool = [all_features[x]['periodic'] for x in feature_list_impute_zero]
+    if not ((period_suffix is None) | (period_suffix == "None")):
+        periodic_bool = [all_features[x]["periodic"] for x in feature_list_impute_zero]
         for j, name in enumerate(feature_list_impute_zero):
             if periodic_bool[j]:
-                feature_list_impute_zero[j] = f'{name}_{period_suffix}'
+                feature_list_impute_zero[j] = f"{name}_{period_suffix}"
 
-    print('Imputing zero for the following features: ', feature_list_impute_zero)
+    print("Imputing zero for the following features: ", feature_list_impute_zero)
     print()
     for feat in feature_list_impute_zero:
         features_df[feat] = features_df[feat].fillna(0.0)
@@ -572,20 +636,20 @@ def impute_features(
         x
         for x in all_features
         if (
-            all_features[x]['include']
-            and all_features[x]['impute_strategy'] in ['median', 'Median', 'MEDIAN']
+            all_features[x]["include"]
+            and all_features[x]["impute_strategy"] in ["median", "Median", "MEDIAN"]
         )
     ]
 
-    if not ((period_suffix is None) | (period_suffix == 'None')):
+    if not ((period_suffix is None) | (period_suffix == "None")):
         periodic_bool = [
-            all_features[x]['periodic'] for x in feature_list_impute_median
+            all_features[x]["periodic"] for x in feature_list_impute_median
         ]
         for j, name in enumerate(feature_list_impute_median):
             if periodic_bool[j]:
-                feature_list_impute_median[j] = f'{name}_{period_suffix}'
+                feature_list_impute_median[j] = f"{name}_{period_suffix}"
 
-    print('Imputing median for the following features: ', feature_list_impute_median)
+    print("Imputing median for the following features: ", feature_list_impute_median)
     print()
     for feat in feature_list_impute_median:
         features_df[feat] = features_df[feat].fillna(np.nanmedian(referenceSet[feat]))
@@ -595,18 +659,18 @@ def impute_features(
         x
         for x in all_features
         if (
-            all_features[x]['include']
-            and all_features[x]['impute_strategy'] in ['mean', 'Mean', 'MEAN']
+            all_features[x]["include"]
+            and all_features[x]["impute_strategy"] in ["mean", "Mean", "MEAN"]
         )
     ]
 
-    if not ((period_suffix is None) | (period_suffix == 'None')):
-        periodic_bool = [all_features[x]['periodic'] for x in feature_list_impute_mean]
+    if not ((period_suffix is None) | (period_suffix == "None")):
+        periodic_bool = [all_features[x]["periodic"] for x in feature_list_impute_mean]
         for j, name in enumerate(feature_list_impute_mean):
             if periodic_bool[j]:
-                feature_list_impute_mean[j] = f'{name}_{period_suffix}'
+                feature_list_impute_mean[j] = f"{name}_{period_suffix}"
 
-    print('Imputing mean for the following features: ', feature_list_impute_mean)
+    print("Imputing mean for the following features: ", feature_list_impute_mean)
     print()
     for feat in feature_list_impute_mean:
         features_df[feat] = features_df[feat].fillna(np.nanmean(referenceSet[feat]))
@@ -616,23 +680,23 @@ def impute_features(
         x
         for x in all_features
         if (
-            all_features[x]['include']
-            and all_features[x]['impute_strategy'] in ['regress', 'Regress', 'REGRESS']
+            all_features[x]["include"]
+            and all_features[x]["impute_strategy"] in ["regress", "Regress", "REGRESS"]
         )
     ]
 
-    if not ((period_suffix is None) | (period_suffix == 'None')):
-        periodic_bool = [all_features[x]['periodic'] for x in feature_list_regression]
+    if not ((period_suffix is None) | (period_suffix == "None")):
+        periodic_bool = [all_features[x]["periodic"] for x in feature_list_regression]
         for j, name in enumerate(feature_list_regression):
             if periodic_bool[j]:
-                feature_list_regression[j] = f'{name}_{period_suffix}'
+                feature_list_regression[j] = f"{name}_{period_suffix}"
 
-    print('Imputing by regression on the following features: ', feature_list_regression)
+    print("Imputing by regression on the following features: ", feature_list_regression)
     print()
 
     # Fit KNNImputer to training set
     imp = KNNImputer(n_neighbors=n_neighbors)
-    imp.set_output(transform='pandas')
+    imp.set_output(transform="pandas")
 
     fit_feats = imp.fit(referenceSet[feature_list_regression])
     imputed_feats = fit_feats.transform(features_df[feature_list_regression])
@@ -646,16 +710,16 @@ def impute_features(
         x
         for x in all_features
         if (
-            all_features[x]['include']
-            and all_features[x]['impute_strategy'] in ['none', 'None', 'NONE']
+            all_features[x]["include"]
+            and all_features[x]["impute_strategy"] in ["none", "None", "NONE"]
         )
     ]
 
-    if not ((period_suffix is None) | (period_suffix == 'None')):
-        periodic_bool = [all_features[x]['periodic'] for x in feature_list_impute_none]
+    if not ((period_suffix is None) | (period_suffix == "None")):
+        periodic_bool = [all_features[x]["periodic"] for x in feature_list_impute_none]
         for j, name in enumerate(feature_list_impute_none):
             if periodic_bool[j]:
-                feature_list_impute_none[j] = f'{name}_{period_suffix}'
+                feature_list_impute_none[j] = f"{name}_{period_suffix}"
 
     orig_len = len(features_df)
     features_df = features_df.dropna(subset=feature_list_impute_none).reset_index(
@@ -664,7 +728,7 @@ def impute_features(
     new_len = len(features_df)
     print()
     print(
-        f'Dropped {orig_len - new_len} rows containing missing features with no imputation strategy.'
+        f"Dropped {orig_len - new_len} rows containing missing features with no imputation strategy."
     )
 
     return features_df
@@ -693,12 +757,12 @@ def overlapping_histogram(a, bins):
         sa = np.sort(a[i : i + block])
         n += (
             np.r_[
-                sa.searchsorted(bins[:-1, 1], 'left'),
-                sa.searchsorted(bins[-1, 1], 'right'),
+                sa.searchsorted(bins[:-1, 1], "left"),
+                sa.searchsorted(bins[-1, 1], "right"),
             ]
             - np.r_[
-                sa.searchsorted(bins[:-1, 0], 'left'),
-                sa.searchsorted(bins[-1, 0], 'right'),
+                sa.searchsorted(bins[:-1, 0], "left"),
+                sa.searchsorted(bins[-1, 0], "right"),
             ]
         )
     return n, (bins[:, 0] + bins[:, 1]) / 2.0
@@ -784,7 +848,7 @@ def sort_lightcurve(t, m, e):
 def make_confusion_matrix(
     cf,
     group_names=None,
-    categories='auto',
+    categories="auto",
     count=True,
     percent=True,
     cbar=True,
@@ -792,11 +856,11 @@ def make_confusion_matrix(
     xyplotlabels=True,
     sum_stats=True,
     figsize=None,
-    cmap='Blues',
+    cmap="Blues",
     title=None,
     annotate_scores=False,
 ):
-    '''
+    """
     CONFUSION MATRIX CODE ADAPTED FROM https://github.com/DTrimarchi10/confusion_matrix (Dennis Trimarchi)
 
     This function will make a pretty plot of an sklearn Confusion Matrix cm using a Seaborn heatmap visualization.
@@ -829,10 +893,10 @@ def make_confusion_matrix(
 
     title:         Title for the heatmap. Default is None.
 
-    '''
+    """
 
     # CODE TO GENERATE TEXT INSIDE EACH SQUARE
-    blanks = ['' for i in range(cf.size)]
+    blanks = ["" for i in range(cf.size)]
 
     if group_names and len(group_names) == cf.size:
         group_labels = ["{}\n".format(value) for value in group_names]
@@ -877,7 +941,7 @@ def make_confusion_matrix(
     # SET FIGURE PARAMETERS ACCORDING TO OTHER ARGUMENTS
     if figsize is None:
         # Get default figure size if not set
-        figsize = plt.rcParams.get('figure.figsize')
+        figsize = plt.rcParams.get("figure.figsize")
 
     if xyticks is False:
         # Do not show categories if xyticks is False
@@ -896,8 +960,8 @@ def make_confusion_matrix(
     )
 
     if xyplotlabels:
-        plt.ylabel('True label')
-        plt.xlabel('Predicted label' + stats_text)
+        plt.ylabel("True label")
+        plt.xlabel("Predicted label" + stats_text)
     else:
         plt.xlabel(stats_text)
 
@@ -909,22 +973,22 @@ def make_confusion_matrix(
 
 def plot_roc(fpr, tpr, roc_auc):
     plt.plot(fpr, tpr)
-    plt.plot([0, 1], [0, 1], 'k--')
+    plt.plot([0, 1], [0, 1], "k--")
     plt.xlim([0.0, 1.0])
     plt.ylim([0.0, 1.05])
-    plt.xlabel('False Positive Rate')
-    plt.ylabel('True Positive Rate')
-    plt.title('ROC curve (area = %0.6f)' % roc_auc)
+    plt.xlabel("False Positive Rate")
+    plt.ylabel("True Positive Rate")
+    plt.title("ROC curve (area = %0.6f)" % roc_auc)
 
 
 def plot_pr(recall, precision):
-    plt.step(recall, precision, color='b', alpha=0.2, where='post')
-    plt.fill_between(recall, precision, step='post', alpha=0.2, color='b')
-    plt.xlabel('Recall')
-    plt.ylabel('Precision')
+    plt.step(recall, precision, color="b", alpha=0.2, where="post")
+    plt.fill_between(recall, precision, step="post", alpha=0.2, color="b")
+    plt.xlabel("Recall")
+    plt.ylabel("Precision")
     plt.ylim([0.0, 1.05])
     plt.xlim([0.0, 1.0])
-    plt.title('Precision-Recall')
+    plt.title("Precision-Recall")
 
 
 """ Datasets """
@@ -937,15 +1001,17 @@ def __init__(
         path_dataset: Union[str, pathlib.Path],
         features: tuple,
         verbose: bool = False,
-        algorithm: str = 'dnn',
-        **kwargs,
+        algorithm: str = "dnn",
+        period_suffix: str = None,
     ):
         """Load parquet, hdf5 or csv file with the dataset containing both data and labels
 
-        :param tag:
-        :param path_dataset:
-        :param features:
-        :param verbose:
+        :param tag: classifier designation, refers to "class" in config.taxonomy (str)
+        :param path_dataset: local path to .parquet, .h5 or .csv file with the dataset (str)
+        :param features: list of input features (list)
+        :param verbose: if set, print additional outputs (bool)
+        :param algorithm: name of ML algorithm to use (str)
+        :param period_suffix: suffix of period/Fourier features to use for training (str)
         """
         self.tag = tag
         self.path_dataset = str(path_dataset)
@@ -955,37 +1021,37 @@ def __init__(
 
         # Load config file
         self.config = load_config(BASE_DIR / "config.yaml")
-        self.period_suffix_config = self.config['features']['info']['period_suffix']
+        self.period_suffix_config = self.config["features"]["info"]["period_suffix"]
 
-        period_suffix = kwargs.get('period_suffix', self.period_suffix_config)
+        if period_suffix is None:
+            period_suffix = self.period_suffix_config
 
-        if algorithm in ['DNN', 'NN', 'dnn', 'nn']:
-            self.algorithm = 'dnn'
-        elif algorithm in ['XGB', 'xgb', 'XGBoost', 'xgboost', 'XGBOOST']:
-            self.algorithm = 'xgb'
+        if algorithm in ["DNN", "NN", "dnn", "nn"]:
+            self.algorithm = "dnn"
+        elif algorithm in ["XGB", "xgb", "XGBoost", "xgboost", "XGBOOST"]:
+            self.algorithm = "xgb"
         else:
-            raise ValueError('Current supported algorithms are DNN and XGB.')
+            raise ValueError("Current supported algorithms are DNN and XGB.")
 
         if self.verbose:
             log(f"Loading {self.path_dataset}...")
-        nrows = kwargs.get("nrows", None)
 
         csv = False
-        if self.path_dataset.endswith('.csv'):
+        if self.path_dataset.endswith(".csv"):
             csv = True
-            self.df_ds = pd.read_csv(self.path_dataset, nrows=nrows)
-        elif self.path_dataset.endswith('.h5'):
+            self.df_ds = pd.read_csv(self.path_dataset)
+        elif self.path_dataset.endswith(".h5"):
             self.df_ds = read_hdf(self.path_dataset)
-            for key in ['coordinates', 'dmdt']:
+            for key in ["coordinates", "dmdt"]:
                 df_temp = read_hdf(self.path_dataset, key=key)
                 self.df_ds[key] = df_temp
             del df_temp
-            self.dmdt = self.df_ds['dmdt']
-        elif self.path_dataset.endswith('.parquet'):
+            self.dmdt = self.df_ds["dmdt"]
+        elif self.path_dataset.endswith(".parquet"):
             self.df_ds = read_parquet(self.path_dataset)
-            self.dmdt = self.df_ds['dmdt']
+            self.dmdt = self.df_ds["dmdt"]
         else:
-            raise ValueError('Dataset must have .parquet, .h5 or .csv extension.')
+            raise ValueError("Dataset must have .parquet, .h5 or .csv extension.")
 
         if self.verbose:
             log(self.df_ds[list(features)].describe())
@@ -997,7 +1063,7 @@ def __init__(
         )
 
         dmdt = []
-        if (self.verbose) & (self.algorithm == 'dnn'):
+        if (self.verbose) & (self.algorithm == "dnn"):
             print("Moving dmdt's to a dedicated numpy array...")
             iterator = tqdm(self.df_ds.itertuples(), total=len(self.df_ds))
         else:
@@ -1039,28 +1105,28 @@ def make(
         batch_size: int = 256,
         shuffle_buffer_size: int = 256,
         epochs: int = 300,
-        **kwargs,
+        float_convert_types: list = [64, 32],
     ):
         """Make datasets for target_label
 
-        :param target_label: corresponds to training.classes.<label> in config
-        :param threshold: our labels are floats [0, 0.25, 0.5, 0.75, 1]
+        :param target_label: classifier designation, refers to "class" in config.taxonomy (str)
+        :param threshold: classification threshold separating positive from negative examples (float)
         :param balance: balance ratio for the prevalent class. if null - use all available data
-        :param weight_per_class:
-        :param scale_features: min_max | median_std
-        :param test_size:
-        :param val_size:
-        :param random_state: set this for reproducibility
-        :param feature_stats: feature_stats to use to standardize features.
-                              if None, stats are computed from the data, taking balance into account
-        :param batch_size
-        :param shuffle_buffer_size
-        :param epochs
+        :param weight_per_class: if set, weight training data based on fraction of positive/negative samples (bool)
+        :param scale_features: method by which to scale input features [min_max or median_std] (str)
+        :param test_size: fractional size of test set, taken from initial learning set (float)
+        :param val_size: fractional size of val set, taken from learning set less test set (float)
+        :param random_state: random seed to set for reproducibility
+        :param feature_stats: feature stats to use to standardize features. If set to 'config', source feature stats from values in config file. Otherwise, compute them from data, taking balance into account (str)
+        :param batch_size: batch size to use for training (int)
+        :param shuffle_buffer_size: buffer size to use when shuffling training set (int)
+        :param epochs: number of training epochs (int)
+        :param float_convert_types: convert floats from a to b bits, e.g. [64, 32] (list)
+
         :return:
         """
 
         # Note: Dataset.from_tensor_slices method requires the target variable to be of the int or float32 type.
-        float_convert_types = kwargs.get("float_convert_types", (64, 32))
         # TODO: see what to do about it when trying label smoothing in the future.
 
         target = np.asarray(
@@ -1181,18 +1247,6 @@ def make(
                     self.df_ds[feature] = (self.df_ds[feature] - stats["min"]) / (
                         stats["max"] - stats["min"]
                     )
-        # norms = {
-        #     feature: np.linalg.norm(self.df_ds.loc[ds_indexes, feature])
-        #     for feature in self.features
-        # }
-        # for feature, norm in norms.items():
-        #     if np.isnan(norm) or norm == 0.0:
-        #         norms[feature] = 1.0
-        # if self.verbose:
-        #     print('Computed feature norms:\n', norms)
-        #
-        # for feature, norm in norms.items():
-        #     self.df_ds[feature] /= norm
 
         # Convert float64 to float32 to satisfy tensorflow requirements
         float_type_dict = {16: np.float16, 32: np.float32, 64: np.float64}
@@ -1200,10 +1254,10 @@ def make(
 
         # float_init, float_final = float_convert_types[0], float_convert_types[1]
 
-        self.df_ds[
-            self.df_ds.select_dtypes(float_type_dict[float_init]).columns
-        ] = self.df_ds.select_dtypes(float_type_dict[float_init]).astype(
-            float_type_dict[float_final]
+        self.df_ds[self.df_ds.select_dtypes(float_type_dict[float_init]).columns] = (
+            self.df_ds.select_dtypes(float_type_dict[float_init]).astype(
+                float_type_dict[float_final]
+            )
         )
 
         train_dataset = tf.data.Dataset.from_tensor_slices(
@@ -1273,9 +1327,9 @@ def make(
             "train": np.array(train_indexes),
             "val": np.array(val_indexes),
             "test": np.array(test_indexes),
-            "dropped_samples": np.array(index_dropped.to_list())
-            if index_dropped is not None
-            else None,
+            "dropped_samples": (
+                np.array(index_dropped.to_list()) if index_dropped is not None else None
+            ),
         }
 
         # How many steps per epoch?
diff --git a/scope/xgb.py b/scope/xgb.py
index 8cffa62b..420ea5fd 100644
--- a/scope/xgb.py
+++ b/scope/xgb.py
@@ -8,7 +8,7 @@
     precision_recall_curve,
 )
 import matplotlib.pyplot as plt
-from scope.utils import make_confusion_matrix, plot_roc, plot_pr
+from .utils import make_confusion_matrix, plot_roc, plot_pr
 import seaborn as sns
 import numpy as np
 import json
@@ -24,8 +24,8 @@ def setup(
         eta=0.1,
         subsample=0.7,
         colsample_bytree=0.7,
-        objective='binary:logistic',
-        eval_metric='auc',
+        objective="binary:logistic",
+        eval_metric="auc",
         early_stopping_rounds=10,
         num_boost_round=999,
         scale_pos_weight=1.0,
@@ -36,53 +36,53 @@ def setup(
         #  'tree_method': 'gpu_hist',
 
         params = {
-            'max_depth': max_depth,
-            'min_child_weight': min_child_weight,
-            'eta': eta,
-            'subsample': subsample,
-            'colsample_bytree': colsample_bytree,
-            'objective': objective,
-            'eval_metric': eval_metric,
-            'scale_pos_weight': scale_pos_weight,
+            "max_depth": max_depth,
+            "min_child_weight": min_child_weight,
+            "eta": eta,
+            "subsample": subsample,
+            "colsample_bytree": colsample_bytree,
+            "objective": objective,
+            "eval_metric": eval_metric,
+            "scale_pos_weight": scale_pos_weight,
         }
 
-        self.meta['early_stopping_rounds'] = early_stopping_rounds
-        self.meta['num_boost_round'] = num_boost_round
-        self.meta['params'] = params
+        self.meta["early_stopping_rounds"] = early_stopping_rounds
+        self.meta["num_boost_round"] = num_boost_round
+        self.meta["params"] = params
 
     def train(self, X_train, y_train, X_val, y_val, **kwargs):
-        seed = kwargs.get('seed', 42)
-        nfold = kwargs.get('nfold', 5)
-        metrics = kwargs.get('metrics', ['auc'])
+        seed = kwargs.get("seed", 42)
+        nfold = kwargs.get("nfold", 5)
+        metrics = kwargs.get("metrics", ["auc"])
 
-        max_depth_start = kwargs.get('max_depth_start', 3)
-        max_depth_stop = kwargs.get('max_depth_stop', 8)
-        max_depth_step = kwargs.get('max_depth_step', 2)
+        max_depth_start = kwargs.get("max_depth_start", 3)
+        max_depth_stop = kwargs.get("max_depth_stop", 8)
+        max_depth_step = kwargs.get("max_depth_step", 2)
 
-        min_child_weight_start = kwargs.get('min_child_weight_start', 1)
-        min_child_weight_stop = kwargs.get('min_child_weight_stop', 6)
-        min_child_weight_step = kwargs.get('min_child_weight_step', 2)
+        min_child_weight_start = kwargs.get("min_child_weight_start", 1)
+        min_child_weight_stop = kwargs.get("min_child_weight_stop", 6)
+        min_child_weight_step = kwargs.get("min_child_weight_step", 2)
 
-        eta_list = kwargs.get('eta_list', [0.3, 0.2, 0.1, 0.05])
+        eta_list = kwargs.get("eta_list", [0.3, 0.2, 0.1, 0.05])
 
-        subsample_start = kwargs.get('subsample_start', 6)
-        subsample_stop = kwargs.get('subsample_stop', 11)
-        subsample_step = kwargs.get('subsample_step', 2)
+        subsample_start = kwargs.get("subsample_start", 6)
+        subsample_stop = kwargs.get("subsample_stop", 11)
+        subsample_step = kwargs.get("subsample_step", 2)
 
-        colsample_bytree_start = kwargs.get('colsample_bytree_start', 6)
-        colsample_bytree_stop = kwargs.get('colsample_bytree_stop', 11)
-        colsample_bytree_step = kwargs.get('colsample_bytree_step', 2)
+        colsample_bytree_start = kwargs.get("colsample_bytree_start", 6)
+        colsample_bytree_stop = kwargs.get("colsample_bytree_stop", 11)
+        colsample_bytree_step = kwargs.get("colsample_bytree_step", 2)
 
         dtrain = xgb.DMatrix(X_train, label=y_train)
         dval = xgb.DMatrix(X_val, label=y_val)
 
         # Evaluate on train and val sets
-        evals = [(dtrain, 'dtrain'), (dval, 'dval')]
-        self.meta['evals'] = evals
+        evals = [(dtrain, "dtrain"), (dval, "dval")]
+        self.meta["evals"] = evals
 
-        skip_cv = kwargs.get('skip_cv', True)
+        skip_cv = kwargs.get("skip_cv", True)
         if not skip_cv:
-            print('Running cross-validated hyperparameter grid search...')
+            print("Running cross-validated hyperparameter grid search...")
             # Grid search for max_depth and min_child_weight params
             gridsearch_params = [
                 (max_depth, min_child_weight)
@@ -102,21 +102,21 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                     )
                 )
                 # Update our parameters
-                self.meta['params']['max_depth'] = max_depth
-                self.meta['params']['min_child_weight'] = min_child_weight
+                self.meta["params"]["max_depth"] = max_depth
+                self.meta["params"]["min_child_weight"] = min_child_weight
                 # Run CV
                 cv_results = xgb.cv(
-                    self.meta['params'],
+                    self.meta["params"],
                     dtrain,
-                    num_boost_round=self.meta['num_boost_round'],
+                    num_boost_round=self.meta["num_boost_round"],
                     seed=seed,
                     nfold=nfold,
                     metrics=metrics,
-                    early_stopping_rounds=self.meta['early_stopping_rounds'],
+                    early_stopping_rounds=self.meta["early_stopping_rounds"],
                 )
                 # Update best AUC
-                mean_auc = cv_results['test-auc-mean'].max()
-                boost_rounds = cv_results['test-auc-mean'].argmax()
+                mean_auc = cv_results["test-auc-mean"].max()
+                boost_rounds = cv_results["test-auc-mean"].argmax()
                 print("\tAUC {} for {} rounds".format(mean_auc, boost_rounds))
                 if mean_auc > max_auc:
                     max_auc = mean_auc
@@ -126,8 +126,8 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                     best_params[0], best_params[1], max_auc
                 )
             )
-            self.meta['params']['max_depth'] = best_params[0]
-            self.meta['params']['min_child_weight'] = best_params[1]
+            self.meta["params"]["max_depth"] = best_params[0]
+            self.meta["params"]["min_child_weight"] = best_params[1]
 
             # Grid search for subsample and colsample_bytree params
             gridsearch_params = [
@@ -156,21 +156,21 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                     )
                 )
                 # Update our parameters
-                self.meta['params']['subsample'] = subsample
-                self.meta['params']['colsample_bytree'] = colsample_bytree
+                self.meta["params"]["subsample"] = subsample
+                self.meta["params"]["colsample_bytree"] = colsample_bytree
                 # Run CV
                 cv_results = xgb.cv(
-                    self.meta['params'],
+                    self.meta["params"],
                     dtrain,
-                    num_boost_round=self.meta['num_boost_round'],
+                    num_boost_round=self.meta["num_boost_round"],
                     seed=seed,
                     nfold=nfold,
                     metrics=metrics,
-                    early_stopping_rounds=self.meta['early_stopping_rounds'],
+                    early_stopping_rounds=self.meta["early_stopping_rounds"],
                 )
                 # Update best AUC
-                mean_auc = cv_results['test-auc-mean'].max()
-                boost_rounds = cv_results['test-auc-mean'].argmax()
+                mean_auc = cv_results["test-auc-mean"].max()
+                boost_rounds = cv_results["test-auc-mean"].argmax()
                 print("\tAUC {} for {} rounds".format(mean_auc, boost_rounds))
                 if mean_auc > max_auc:
                     max_auc = mean_auc
@@ -180,8 +180,8 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                     best_params[0], best_params[1], max_auc
                 )
             )
-            self.meta['params']['subsample'] = best_params[0]
-            self.meta['params']['colsample_bytree'] = best_params[1]
+            self.meta["params"]["subsample"] = best_params[0]
+            self.meta["params"]["colsample_bytree"] = best_params[1]
 
             best_params = None
             max_auc = 0.0
@@ -190,42 +190,42 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                 print("CV with eta={}".format(eta))
 
                 # We update our parameters
-                self.meta['params']['eta'] = eta
+                self.meta["params"]["eta"] = eta
 
                 # Run and time CV
                 cv_results = xgb.cv(
-                    self.meta['params'],
+                    self.meta["params"],
                     dtrain,
-                    num_boost_round=self.meta['num_boost_round'],
+                    num_boost_round=self.meta["num_boost_round"],
                     seed=seed,
                     nfold=nfold,
                     metrics=metrics,
-                    early_stopping_rounds=self.meta['early_stopping_rounds'],
+                    early_stopping_rounds=self.meta["early_stopping_rounds"],
                 )
 
                 # Update best AUC
-                mean_auc = cv_results['test-auc-mean'].max()
-                boost_rounds = cv_results['test-auc-mean'].argmax()
+                mean_auc = cv_results["test-auc-mean"].max()
+                boost_rounds = cv_results["test-auc-mean"].argmax()
                 print("\tAUC {} for {} rounds".format(mean_auc, boost_rounds))
                 if mean_auc > max_auc:
                     max_auc = mean_auc
                     best_params = eta
             print("Best params: {}, AUC: {}".format(best_params, max_auc))
-            self.meta['params']['eta'] = best_params
+            self.meta["params"]["eta"] = best_params
 
             # One more CV round for max_depth, min_child_weight params
-            max_depth1 = self.meta['params']['max_depth'] - max_depth_step
-            max_depth2 = self.meta['params']['max_depth'] + max_depth_step
+            max_depth1 = self.meta["params"]["max_depth"] - max_depth_step
+            max_depth2 = self.meta["params"]["max_depth"] + max_depth_step
             if max_depth1 < max_depth_start:
                 max_depth1 = max_depth_start
             if max_depth2 > max_depth_stop - 1:
                 max_depth2 = max_depth_stop - 1
 
             min_child_wt1 = (
-                self.meta['params']['min_child_weight'] - min_child_weight_step
+                self.meta["params"]["min_child_weight"] - min_child_weight_step
             )
             min_child_wt2 = (
-                self.meta['params']['min_child_weight'] + min_child_weight_step
+                self.meta["params"]["min_child_weight"] + min_child_weight_step
             )
             if min_child_wt1 < min_child_weight_start:
                 min_child_wt1 = min_child_weight_start
@@ -253,21 +253,21 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                     )
                 )
                 # Update our parameters
-                self.meta['params']['max_depth'] = max_depth
-                self.meta['params']['min_child_weight'] = min_child_weight
+                self.meta["params"]["max_depth"] = max_depth
+                self.meta["params"]["min_child_weight"] = min_child_weight
                 # Run CV
                 cv_results = xgb.cv(
-                    self.meta['params'],
+                    self.meta["params"],
                     dtrain,
-                    num_boost_round=self.meta['num_boost_round'],
+                    num_boost_round=self.meta["num_boost_round"],
                     seed=seed,
                     nfold=nfold,
                     metrics=metrics,
-                    early_stopping_rounds=self.meta['early_stopping_rounds'],
+                    early_stopping_rounds=self.meta["early_stopping_rounds"],
                 )
                 # Update best AUC
-                mean_auc = cv_results['test-auc-mean'].max()
-                boost_rounds = cv_results['test-auc-mean'].argmax()
+                mean_auc = cv_results["test-auc-mean"].max()
+                boost_rounds = cv_results["test-auc-mean"].argmax()
                 print("\tAUC {} for {} rounds".format(mean_auc, boost_rounds))
                 if mean_auc > max_auc:
                     max_auc = mean_auc
@@ -277,22 +277,22 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                     best_params[0], best_params[1], max_auc
                 )
             )
-            self.meta['params']['max_depth'] = best_params[0]
-            self.meta['params']['min_child_weight'] = best_params[1]
+            self.meta["params"]["max_depth"] = best_params[0]
+            self.meta["params"]["min_child_weight"] = best_params[1]
 
             # One more CV round for subsample, colsample_bytree params
-            subsample1 = int(self.meta['params']['subsample'] * 10) - subsample_step
-            subsample2 = int(self.meta['params']['subsample'] * 10) + subsample_step
+            subsample1 = int(self.meta["params"]["subsample"] * 10) - subsample_step
+            subsample2 = int(self.meta["params"]["subsample"] * 10) + subsample_step
             if subsample1 < subsample_start:
                 subsample1 = subsample_start
             if subsample2 > subsample_stop - 1:
                 subsample2 = subsample_stop - 1
 
             colsample_bytree1 = (
-                int(self.meta['params']['colsample_bytree'] * 10)
+                int(self.meta["params"]["colsample_bytree"] * 10)
             ) - colsample_bytree_step
             colsample_bytree2 = (
-                int(self.meta['params']['colsample_bytree'] * 10)
+                int(self.meta["params"]["colsample_bytree"] * 10)
             ) + colsample_bytree_step
             if colsample_bytree1 < colsample_bytree_start:
                 colsample_bytree1 = colsample_bytree_start
@@ -322,21 +322,21 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                     )
                 )
                 # Update our parameters
-                self.meta['params']['subsample'] = subsample
-                self.meta['params']['colsample_bytree'] = colsample_bytree
+                self.meta["params"]["subsample"] = subsample
+                self.meta["params"]["colsample_bytree"] = colsample_bytree
                 # Run CV
                 cv_results = xgb.cv(
-                    self.meta['params'],
+                    self.meta["params"],
                     dtrain,
-                    num_boost_round=self.meta['num_boost_round'],
+                    num_boost_round=self.meta["num_boost_round"],
                     seed=seed,
                     nfold=nfold,
                     metrics=metrics,
-                    early_stopping_rounds=self.meta['early_stopping_rounds'],
+                    early_stopping_rounds=self.meta["early_stopping_rounds"],
                 )
                 # Update best AUC
-                mean_auc = cv_results['test-auc-mean'].max()
-                boost_rounds = cv_results['test-auc-mean'].argmax()
+                mean_auc = cv_results["test-auc-mean"].max()
+                boost_rounds = cv_results["test-auc-mean"].argmax()
                 print("\tAUC {} for {} rounds".format(mean_auc, boost_rounds))
                 if mean_auc > max_auc:
                     max_auc = mean_auc
@@ -346,27 +346,27 @@ def train(self, X_train, y_train, X_val, y_val, **kwargs):
                     best_params[0], best_params[1], max_auc
                 )
             )
-            self.meta['params']['subsample'] = best_params[0]
-            self.meta['params']['colsample_bytree'] = best_params[1]
+            self.meta["params"]["subsample"] = best_params[0]
+            self.meta["params"]["colsample_bytree"] = best_params[1]
 
-            print('Grid search complete.')
+            print("Grid search complete.")
 
         # Train using optimized hyperparameters
         self.model = xgb.train(
-            self.meta['params'],
+            self.meta["params"],
             dtrain,
-            num_boost_round=self.meta['num_boost_round'],
-            evals=self.meta['evals'],
-            early_stopping_rounds=self.meta['early_stopping_rounds'],
+            num_boost_round=self.meta["num_boost_round"],
+            evals=self.meta["evals"],
+            early_stopping_rounds=self.meta["early_stopping_rounds"],
         )
 
         # One more iteration of training (stop at best iteration)
-        self.meta['num_boost_round'] = self.model.best_iteration + 1
+        self.meta["num_boost_round"] = self.model.best_iteration + 1
         self.model = xgb.train(
-            self.meta['params'],
+            self.meta["params"],
             dtrain,
-            num_boost_round=self.meta['num_boost_round'],
-            evals=self.meta['evals'],
+            num_boost_round=self.meta["num_boost_round"],
+            evals=self.meta["evals"],
         )
 
     def predict(self, X, name=None, **kwargs):
@@ -374,23 +374,23 @@ def predict(self, X, name=None, **kwargs):
         y_pred = self.model.predict(d)
 
         if name is not None:
-            self.meta[f'y_pred{name}'] = y_pred
+            self.meta[f"y_pred{name}"] = y_pred
         else:
-            self.meta['y_pred'] = y_pred
+            self.meta["y_pred"] = y_pred
 
         return y_pred
 
-    def evaluate(self, X_eval, y_eval, name='test', **kwargs):
+    def evaluate(self, X_eval, y_eval, name="test", **kwargs):
         d_eval = xgb.DMatrix(X_eval, label=y_eval)
 
-        y_pred = np.around(self.predict(X_eval, name=f'_{name}'))
+        y_pred = np.around(self.predict(X_eval, name=f"_{name}"))
 
-        self.meta[f'y_{name}'] = y_eval
+        self.meta[f"y_{name}"] = y_eval
 
         # Generate confusion matrix
-        self.meta[f'cm_{name}'] = confusion_matrix(y_eval, y_pred, normalize='all')
+        self.meta[f"cm_{name}"] = confusion_matrix(y_eval, y_pred, normalize="all")
 
-        return self.model.eval(d_eval, f'd{name}', **kwargs)
+        return self.model.eval(d_eval, f"d{name}", **kwargs)
 
     def load(self, path_model, **kwargs):
         self.model = xgb.Booster()
@@ -398,21 +398,21 @@ def load(self, path_model, **kwargs):
         plpath = pathlib.Path(path_model)
         name = pathlib.Path(plpath.name)
         parent = plpath.parent
-        filename = name.with_suffix('')
-        cfg_filename = str(filename) + '.params'
+        filename = name.with_suffix("")
+        cfg_filename = str(filename) + ".params"
 
         try:
             # Load .json model file
             self.model.load_model(path_model, **kwargs)
             # Load .params file
-            with open(str(parent / cfg_filename), 'r') as f:
+            with open(str(parent / cfg_filename), "r") as f:
                 cfg = json.load(f)
             # Convert config dict to str
             cfg = json.dumps(cfg)
             # Load optimal hyperparameters
             self.model.load_config(cfg)
         except Exception as e:
-            print('Failure during model loading:')
+            print("Failure during model loading:")
             print(e)
 
     def save(
@@ -421,7 +421,7 @@ def save(
         output_path: str = "./",
         output_format: str = "json",
         plot: bool = False,
-        names: list = ['train', 'val', 'test'],
+        names: list = ["train", "val", "test"],
         cm_include_count=False,
         cm_include_percent=True,
         annotate_scores=False,
@@ -435,13 +435,13 @@ def save(
             output_path.mkdir(parents=True, exist_ok=True)
 
         output_name = self.name if not tag else tag
-        if not output_name.endswith('.json'):
-            output_name += '.json'
-        config_name = pathlib.Path(output_name).with_suffix('')
-        config_name = str(config_name) + '.params'
+        if not output_name.endswith(".json"):
+            output_name += ".json"
+        config_name = pathlib.Path(output_name).with_suffix("")
+        config_name = str(config_name) + ".params"
         self.model.save_model(output_path / output_name)
         cfg = self.model.save_config()
-        with open(output_path / config_name, 'w') as f:
+        with open(output_path / config_name, "w") as f:
             f.write(cfg)
 
         stats_dct = {}
@@ -452,18 +452,18 @@ def save(
                 path = output_path / f"{tag}_plots" / name
                 if not path.exists():
                     path.mkdir(parents=True, exist_ok=True)
-                impvars = tag + '_impvars.pdf'
-                impvars_json = tag + '_impvars.json'
-                cmpdf = tag + '_cm.pdf'
-                recallpdf = tag + '_recall.pdf'
-                rocpdf = tag + '_roc.pdf'
-                stats_json = tag + '_stats.json'
+                impvars = tag + "_impvars.pdf"
+                impvars_json = tag + "_impvars.json"
+                cmpdf = tag + "_cm.pdf"
+                recallpdf = tag + "_recall.pdf"
+                rocpdf = tag + "_roc.pdf"
+                stats_json = tag + "_stats.json"
 
-                max_num_features = kwargs.get('max_num_features', 8)
+                max_num_features = kwargs.get("max_num_features", 8)
 
-                self.meta['importance'] = self.model.get_score()
-                with open(path / impvars_json, 'w') as f:
-                    json.dump(self.meta['importance'], f)
+                self.meta["importance"] = self.model.get_score()
+                with open(path / impvars_json, "w") as f:
+                    json.dump(self.meta["importance"], f)
 
                 _ = xgb.plot_importance(
                     self.model,
@@ -471,47 +471,47 @@ def save(
                     grid=False,
                     show_values=False,
                 )
-                plt.title(tag.split('.')[0])
-                plt.savefig(path / impvars, bbox_inches='tight')
+                plt.title(tag.split(".")[0])
+                plt.savefig(path / impvars, bbox_inches="tight")
                 plt.close()
 
-                if self.meta[f'cm_{name}'] is not None:
-                    cname = tag.split('.')[0]
+                if self.meta[f"cm_{name}"] is not None:
+                    cname = tag.split(".")[0]
                     accuracy, precision, recall, f1_score = make_confusion_matrix(
-                        self.meta[f'cm_{name}'],
+                        self.meta[f"cm_{name}"],
                         figsize=(8, 6),
                         cbar=False,
                         count=cm_include_count,
                         percent=cm_include_percent,
-                        categories=['not ' + cname, cname],
+                        categories=["not " + cname, cname],
                         annotate_scores=annotate_scores,
                     )
-                    stats_dct['accuracy'] = accuracy
-                    stats_dct['precision'] = precision
-                    stats_dct['recall'] = recall
-                    stats_dct['f1_score'] = f1_score
-                    sns.set_context('talk')
+                    stats_dct["accuracy"] = accuracy
+                    stats_dct["precision"] = precision
+                    stats_dct["recall"] = recall
+                    stats_dct["f1_score"] = f1_score
+                    sns.set_context("talk")
                     plt.title(cname)
-                    plt.savefig(path / cmpdf, bbox_inches='tight')
+                    plt.savefig(path / cmpdf, bbox_inches="tight")
                     plt.close()
 
-                y_compare = self.meta.get(f'y_{name}', None)
-                y_pred = self.meta.get(f'y_pred_{name}', None)
+                y_compare = self.meta.get(f"y_{name}", None)
+                y_pred = self.meta.get(f"y_pred_{name}", None)
 
                 if (y_compare is not None) & (y_pred is not None):
 
                     fpr, tpr, _ = roc_curve(y_compare, y_pred)
                     roc_auc = auc(fpr, tpr)
                     precision, recall, _ = precision_recall_curve(y_compare, y_pred)
-                    stats_dct['roc_auc'] = roc_auc
+                    stats_dct["roc_auc"] = roc_auc
 
                     plot_roc(fpr, tpr, roc_auc)
-                    plt.savefig(path / rocpdf, bbox_inches='tight')
+                    plt.savefig(path / rocpdf, bbox_inches="tight")
                     plt.close()
 
                     plot_pr(recall, precision)
-                    plt.savefig(path / recallpdf, bbox_inches='tight')
+                    plt.savefig(path / recallpdf, bbox_inches="tight")
                     plt.close()
 
-                with open(path / stats_json, 'w') as f:
+                with open(path / stats_json, "w") as f:
                     json.dump(stats_dct, f)
diff --git a/tools/analyze_logs.py b/tools/analyze_logs.py
index 4d308c6a..74104081 100755
--- a/tools/analyze_logs.py
+++ b/tools/analyze_logs.py
@@ -8,7 +8,7 @@
 import os
 from datetime import datetime
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
+BASE_DIR = pathlib.Path.cwd()
 plt.rcParams["font.size"] = 16
 
 
@@ -56,7 +56,7 @@ def get_parser():
     return parser
 
 
-def main(
+def analyze_logs(
     logs_path="generated_features_new/logs",
     logs_name_pattern="generate_features_new",
     logs_suffix="out",
@@ -64,6 +64,9 @@ def main(
     plot_name="quad_runtime_hist",
     start_date="2023-12-27",
 ):
+    """
+    analyze slurm logs from feature generation to quantify runtime
+    """
 
     logs_path = BASE_DIR / logs_path
     log_files = logs_path.glob(f"{logs_name_pattern}_[0-9]*_[0-9]*.{logs_suffix}")
@@ -137,7 +140,7 @@ def main(
     )
 
 
-if __name__ == "__main__":
+def main():
     parser = get_parser()
-    args = parser.parse_args()
-    main(**vars(args))
+    args, _ = parser.parse_known_args()
+    analyze_logs(**vars(args))
diff --git a/tools/combine_preds.py b/tools/combine_preds.py
index 48be7114..8d8aaa0e 100755
--- a/tools/combine_preds.py
+++ b/tools/combine_preds.py
@@ -2,17 +2,13 @@
 import pandas as pd
 import os
 import pathlib
-import yaml
 import argparse
 import numpy as np
 import json
-from scope.utils import read_parquet, write_parquet
+from scope.utils import read_parquet, write_parquet, parse_load_config
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 try:
     DEFAULT_PREDS_PATH = pathlib.Path(config['inference']['path_to_preds'])
@@ -39,9 +35,18 @@ def combine_preds(
     """
     Combine DNN and XGB preds for ingestion into Kowalski
 
+    :param path_to_preds: path to directories of existing and combined preds (str)
     :param combined_preds_dirname: directory name to use for combined preds (str)
     :param specific_field: number of specific field to run (str, useful for testing)
-    :param save: if True, save combined preds (bool, useful for testing)
+    :param use_config_fields: if set, use fields stored in the inference:fields_to_run part of config.yaml (bool
+    :param dateobs: GCN dateobs if not running on field/fields (str)
+    :param merge_dnn_xgb: if set, combine dnn and xgb classifications instead of keeping separate (bool)
+    :param dnn_directory: dirname in which dnn preds are saved (str)
+    :param xgb_directory: dirname in which xgb preds are saved (str)
+    :param save: if set, save combined preds (bool, useful for testing)
+    :param write_csv: if set, save CSV file in addition to parquet (bool)
+    :param agg_method: Aggregation method for classification probabilities, 'mean' or 'max' (str)
+    :param p_threshold: Minimum probability to add classification to metadata file (float)
 
     """
     if (specific_field is not None) & (dateobs is not None):
@@ -51,7 +56,7 @@ def combine_preds(
             "Please specify only one of --use_config_fields and --dateobs."
         )
 
-    if type(path_to_preds) == str:
+    if isinstance(path_to_preds, str):
         path_to_preds = pathlib.Path(path_to_preds)
 
     if specific_field is not None:
@@ -179,28 +184,28 @@ def combine_preds(
     return preds_to_save
 
 
-if __name__ == "__main__":
+def get_parser():
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        "--path_to_preds",
+        "--path-to-preds",
         type=pathlib.PosixPath,
         default=DEFAULT_PREDS_PATH,
         help="path to directories of existing and combined preds",
     )
     parser.add_argument(
-        "--combined_preds_dirname",
+        "--combined-preds-dirname",
         type=str,
         default='preds_dnn_xgb',
         help="dirname in which to save combined preds",
     )
     parser.add_argument(
-        "--specific_field",
+        "--specific-field",
         type=str,
         default=None,
         help="specific field to combine preds (useful for testing)",
     )
     parser.add_argument(
-        "--use_config_fields",
+        "--use-config-fields",
         action='store_true',
         help="if set, use fields stored in the inference:fields_to_run part of config.yaml",
     )
@@ -211,18 +216,18 @@ def combine_preds(
         help="GCN dateobs if not running on field/fields",
     )
     parser.add_argument(
-        "--merge_dnn_xgb",
+        "--merge-dnn-xgb",
         action='store_true',
         help="if set, combine dnn and xgb classifications instead of keeping separate",
     )
     parser.add_argument(
-        "--dnn_directory",
+        "--dnn-directory",
         type=str,
         default='preds_dnn',
         help="dirname in which dnn preds are saved",
     )
     parser.add_argument(
-        "--xgb_directory",
+        "--xgb-directory",
         type=str,
         default='preds_xgb',
         help="dirname in which xgb preds preds are saved",
@@ -233,23 +238,29 @@ def combine_preds(
         help="if set, do not save results (useful for testing)",
     )
     parser.add_argument(
-        "--write_csv",
+        "--write-csv",
         action='store_true',
         help="if set, save CSV file in addition to parquet",
     )
     parser.add_argument(
-        "--agg_method",
+        "--agg-method",
         type=str,
         default='mean',
         help="Aggregation method for classification probabilities (mean or max)",
     )
     parser.add_argument(
-        "--p_threshold",
+        "--p-threshold",
         type=float,
         default=0.7,
         help="Minimum probability to add classification to metadata file",
     )
-    args = parser.parse_args()
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     combine_preds(
         path_to_preds=args.path_to_preds,
diff --git a/tools/generate_features.py b/tools/generate_features.py
index 32287859..db9047c2 100755
--- a/tools/generate_features.py
+++ b/tools/generate_features.py
@@ -2,7 +2,6 @@
 import scope
 import argparse
 import pathlib
-import yaml
 import os
 from tools.get_quad_ids import get_ids_loop, get_field_ids
 from scope.fritz import get_lightcurves_via_ids
@@ -15,6 +14,7 @@
     read_parquet,
     read_hdf,
     split_dict,
+    parse_load_config,
 )
 import numpy as np
 from penquins import Kowalski
@@ -31,12 +31,8 @@
 import time
 
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-
-# setup connection to Kowalski instances
-config_path = pathlib.Path(__file__).parent.parent.absolute() / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 # use tokens specified as env vars (if exist)
 kowalski_token_env = os.environ.get("KOWALSKI_INSTANCE_TOKEN")
@@ -84,7 +80,6 @@
 
 def drop_close_bright_stars(
     id_dct: dict,
-    kowalski_instances: Kowalski,
     catalog: str = gaia_catalog,
     query_radius_arcsec: float = 300.0,
     xmatch_radius_arcsec: float = 2.0,
@@ -99,7 +94,6 @@ def drop_close_bright_stars(
     Use Gaia to identify and drop sources that are too close to bright stars
 
     :param id_dct: one quadrant's worth of id-coordinate pairs (dict)
-    :param kowalski_instances: authenticated instances of Kowalski databases
     :param catalog: name of catalog to use [currently only supports Gaia catalogs] (str)
     :param query_radius_arcsec: size of cone search radius to search for bright stars.
         Default is 300 corresponding with approximate maximum from A. Drake's exclusion radius (float)
@@ -436,7 +430,6 @@ def generate_features(
     gaia_catalog: str = gaia_catalog,
     bright_star_query_radius_arcsec: float = 300.0,
     xmatch_radius_arcsec: float = 2.0,
-    kowalski_instances: Kowalski = kowalski_instances,
     limit: int = 10000,
     period_algorithms: dict = period_algorithms,
     period_batch_size: int = 1000,
@@ -474,7 +467,6 @@ def generate_features(
     :param gaia_catalog*: name of Kowalski catalog containing Gaia data (str)
     :param bright_star_query_radius_arcsec: maximum angular distance from ZTF sources to query nearby bright stars in Gaia (float)
     :param xmatch_radius_arcsec: maximum angular distance from ZTF sources to match external catalog sources (float)
-    :param kowalski_instances*: authenticated instances of Kowalski databases (penquins.Kowalski)
     :param limit: maximum number of sources to process in batch queries / statistics calculations (int)
     :param period_algorithms*: dictionary containing names of period algorithms to run. Normally specified in config - if specified here, should be a (list)
     :param period_batch_size: maximum number of sources to simultaneously perform period finding (int)
@@ -516,7 +508,7 @@ def generate_features(
     start_dt = utcnow.strftime("%Y-%m-%d %H:%M:%S")
 
     # Select period algorithms from config based on CPU or GPU specification
-    if type(period_algorithms) == dict:
+    if isinstance(period_algorithms, dict):
         if doCPU:
             period_algorithms = period_algorithms['CPU']
         elif doGPU:
@@ -556,7 +548,6 @@ def generate_features(
             # Each index of lst corresponds to a different ccd/quad combo
             feature_gen_source_dict = drop_close_bright_stars(
                 lst[0],
-                kowalski_instances=kowalski_instances,
                 catalog=gaia_catalog,
                 query_radius_arcsec=bright_star_query_radius_arcsec,
                 xmatch_radius_arcsec=xmatch_radius_arcsec,
@@ -647,7 +638,6 @@ def generate_features(
             # Each index of lst corresponds to a different ccd/quad combo
             feature_gen_source_dict = drop_close_bright_stars(
                 lst[0],
-                kowalski_instances=kowalski_instances,
                 catalog=gaia_catalog,
                 query_radius_arcsec=bright_star_query_radius_arcsec,
                 xmatch_radius_arcsec=xmatch_radius_arcsec,
@@ -1082,7 +1072,8 @@ def generate_features(
         # Get ZTF alert stats
         alert_stats_dct = alertstats.get_ztf_alert_stats(
             feature_dict,
-            kowalski_instances=kowalski_instances,
+            kowalski_instances,
+            catalog=alerts_catalog,
             radius_arcsec=xmatch_radius_arcsec,
             limit=limit,
             Ncore=Ncore,
@@ -1097,7 +1088,7 @@ def generate_features(
         feature_dict = external_xmatch.xmatch(
             feature_dict,
             kowalski_instances,
-            catalog_info=ext_catalog_info,
+            ext_catalog_info,
             radius_arcsec=xmatch_radius_arcsec,
             limit=limit,
             Ncore=Ncore,
@@ -1202,50 +1193,51 @@ def generate_features(
     return feature_df
 
 
-def get_parser():
-    parser = argparse.ArgumentParser(add_help=False)
+def get_parser(**kwargs):
+    add_help = kwargs.get("add_help", True)
+    parser = argparse.ArgumentParser(add_help=add_help)
 
     parser.add_argument(
-        "--source_catalog",
+        "--source-catalog",
         default=source_catalog,
         help="name of source collection on Kowalski",
     )
     parser.add_argument(
-        "--alerts_catalog",
+        "--alerts-catalog",
         default=alerts_catalog,
         help="name of alerts collection on Kowalski",
     )
     parser.add_argument(
-        "--gaia_catalog",
+        "--gaia-catalog",
         default=gaia_catalog,
         help="name of Gaia collection on Kowalski",
     )
     parser.add_argument(
-        "--bright_star_query_radius_arcsec",
+        "--bright-star-query-radius-arcsec",
         type=float,
         default=300.0,
         help="size of cone search radius to search for bright stars",
     )
     parser.add_argument(
-        "--xmatch_radius_arcsec",
+        "--xmatch-radius-arcsec",
         type=float,
         default=2.0,
         help="cone radius for all crossmatches",
     )
     parser.add_argument(
-        "--query_size_limit",
+        "--query-size-limit",
         type=int,
         default=10000,
         help="sources per query limit for large Kowalski queries",
     )
     parser.add_argument(
-        "--period_algorithms",
+        "--period-algorithms",
         nargs='+',
         default=period_algorithms,
         help="to override config, list algorithms to use for period-finding with periodsearch.py",
     )
     parser.add_argument(
-        "--period_batch_size",
+        "--period-batch-size",
         type=int,
         default=1000,
         help="batch size for GPU-accelerated period algorithms",
@@ -1263,7 +1255,7 @@ def get_parser():
         help="if set, use GPU-accelerated period algorithm",
     )
     parser.add_argument(
-        "--samples_per_peak",
+        "--samples-per-peak",
         default=10,
         type=int,
     )
@@ -1271,7 +1263,7 @@ def get_parser():
         "--doScaleMinPeriod",
         action='store_true',
         default=False,
-        help="if set, scale min period using min_cadence_minutes",
+        help="if set, scale min period using --min-cadence-minutes",
     )
     parser.add_argument(
         "--doRemoveTerrestrial",
@@ -1298,13 +1290,13 @@ def get_parser():
         "--quad", type=int, default=1, help="if not -doAllQuads, ZTF field to run on"
     )
     parser.add_argument(
-        "--min_n_lc_points",
+        "--min-n-lc-points",
         type=int,
         default=50,
         help="minimum number of unflagged light curve points to run feature generation",
     )
     parser.add_argument(
-        "--min_cadence_minutes",
+        "--min-cadence-minutes",
         type=float,
         default=30.0,
         help="minimum cadence (in minutes) between light curve points. For groups of points closer together than this value, only the first will be kept.",
@@ -1334,14 +1326,14 @@ def get_parser():
         help="if set, do not save features",
     )
     parser.add_argument(
-        "--stop_early",
+        "--stop-early",
         action='store_true',
         default=False,
-        help="if set, stop when number of sources reaches query_size_limit. Helpful for testing on small samples.",
+        help="if set, stop when number of sources reaches --query-size-limit. Helpful for testing on small samples.",
     )
     parser.add_argument("--doQuadrantFile", action="store_true", default=False)
-    parser.add_argument("--quadrant_file", default="slurm.dat")
-    parser.add_argument("--quadrant_index", default=0, type=int)
+    parser.add_argument("--quadrant-file", default="slurm.dat")
+    parser.add_argument("--quadrant-index", default=0, type=int)
     parser.add_argument(
         "--doSpecificIDs",
         action='store_true',
@@ -1355,35 +1347,35 @@ def get_parser():
         help="if set, skip removal of sources too close to bright stars via Gaia. May be useful if input data has previously been analyzed in this way.",
     )
     parser.add_argument(
-        "--top_n_periods",
+        "--top-n-periods",
         type=int,
         default=50,
         help="number of (E)LS, (E)CE periods to pass to (E)AOV if using (E)LS_(E)CE_(E)AOV algorithm",
     )
     parser.add_argument(
-        "--max_freq",
+        "--max-freq",
         type=float,
         default=48.0,
         help="maximum frequency [1 / days] to use for period finding. Overridden by --doScaleMinPeriod",
     )
     parser.add_argument(
-        "--fg_dataset",
+        "--fg-dataset",
         type=str,
         default=None,
         help="path to parquet, hdf5 or csv file containing specific sources for feature generation",
     )
     parser.add_argument(
-        "--max_timestamp_hjd",
+        "--max-timestamp-hjd",
         type=float,
         help="maximum timestamp for queried light curves (HJD)",
     )
     return parser
 
 
-if __name__ == "__main__":
+def main():
 
     parser = get_parser()
-    args = parser.parse_args()
+    args, _ = parser.parse_known_args()
 
     # call generate_features
     generate_features(
diff --git a/tools/generate_features_job_submission.py b/tools/generate_features_job_submission.py
index 25cc4bd6..21353b4c 100755
--- a/tools/generate_features_job_submission.py
+++ b/tools/generate_features_job_submission.py
@@ -5,16 +5,12 @@
 import argparse
 import pandas as pd
 import numpy as np
-import yaml
 import subprocess
+from scope.utils import parse_load_config
 
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-
-# Read config file
-config_path = pathlib.Path(__file__).parent.parent.absolute() / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 fields_to_run = config['feature_generation']['fields_to_run']
 path_to_features = config['feature_generation']['path_to_features']
@@ -22,7 +18,7 @@
     BASE_DIR = pathlib.Path(path_to_features)
 
 
-def parse_commandline():
+def get_parser():
     """
     Parse the options given on the command-line.
     """
@@ -47,16 +43,16 @@ def parse_commandline():
         "--doSubmit",
         action="store_true",
         default=False,
-        help="If set, start jobs with limits specified by --max_instances and --wait_time_minutes",
+        help="If set, start jobs with limits specified by --max-instances and --wait-time-minutes",
     )
     parser.add_argument(
-        "--max_instances",
+        "--max-instances",
         type=int,
         default=20,
         help="Max number of instances to run in parallel",
     )
     parser.add_argument(
-        "--wait_time_minutes",
+        "--wait-time-minutes",
         type=float,
         default=5.0,
         help="Time to wait between job status checks",
@@ -80,21 +76,19 @@ def parse_commandline():
         help="HPC username",
     )
     parser.add_argument(
-        "--reset_running",
+        "--reset-running",
         action='store_true',
         default=False,
         help="If set, reset the 'running' status of all tags",
     )
     parser.add_argument(
-        "--submit_interval_minutes",
+        "--submit-interval-minutes",
         type=float,
         default=1.0,
         help="Time to wait between job submissions (minutes)",
     )
 
-    args = parser.parse_args()
-
-    return args
+    return parser
 
 
 def filter_running(user):
@@ -188,6 +182,8 @@ def run_job(
     quadrant_index,
     resultsDir,
     filename,
+    qsubfile,
+    jobline,
     runParallel=False,
     submit_interval_minutes=1.0,
 ):
@@ -217,17 +213,16 @@ def run_job(
             os.system(jobstr)
 
 
-if __name__ == '__main__':
+def main():
     # Start with 60s delay to allow previous submission job to conclude (esp. if running as cron job)
     time.sleep(60)
 
     # Parse command line
-    args = parse_commandline()
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     running_jobs_count = filter_running(args.user)
 
-    dir_path = os.path.dirname(os.path.realpath(__file__))
-
     filename = args.filename
     filetype = args.filetype
     dirname = args.dirname
@@ -241,8 +236,6 @@ def run_job(
 
     lines = [line.rstrip('\n') for line in open(qsubfile)]
     jobline = lines[-1]
-    joblineSplit = list(filter(None, jobline.split("algorithm")[-1].split(" ")))
-    algorithm = joblineSplit[0]
 
     quadrantfile = os.path.join(qsubDir, '%s.dat' % filetype)
 
@@ -270,7 +263,6 @@ def run_job(
         failure_count = 0
         counter = running_jobs_count
         status_njobs = len(df_to_complete)
-        diff_njobs = 0
         # Redefine max instances if fewer jobs remain
         new_max_instances = np.min([args.max_instances, nchoice])
         size = new_max_instances - counter
@@ -290,6 +282,8 @@ def run_job(
                         quadrant_index,
                         resultsDir,
                         filename,
+                        qsubfile,
+                        jobline,
                         runParallel=args.runParallel,
                         submit_interval_minutes=args.submit_interval_minutes,
                     )
@@ -330,7 +324,7 @@ def run_job(
                 status_njobs = njobs
                 counter = running_jobs_count
                 # Note that if a job has failed, it will not be re-queued until
-                # its quadrant's .running file is removed (or set --reset_running)
+                # its quadrant's .running file is removed (or set --reset-running)
 
                 # Define size of the next quadrant_indices array
                 size = np.min([new_max_instances - counter, nchoice])
@@ -353,6 +347,8 @@ def run_job(
                     quadrant_index,
                     resultsDir,
                     filename,
+                    qsubfile,
+                    jobline,
                     runParallel=args.runParallel,
                     submit_interval_minutes=args.submit_interval_minutes,
                 )
diff --git a/tools/generate_features_slurm.py b/tools/generate_features_slurm.py
index 81151662..8eeab5df 100755
--- a/tools/generate_features_slurm.py
+++ b/tools/generate_features_slurm.py
@@ -1,19 +1,16 @@
 #!/usr/bin/env python
 import argparse
 import pathlib
-import yaml
 import os
 from penquins import Kowalski
 import numpy as np
 import json
+from scope.utils import parse_load_config
+from tools.generate_features import get_parser
 
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-
-# setup connection to Kowalski instances
-config_path = pathlib.Path(__file__).parent.parent.absolute() / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 # use tokens specified as env vars (if exist)
 kowalski_token_env = os.environ.get("KOWALSKI_INSTANCE_TOKEN")
@@ -70,7 +67,6 @@ def check_quads_for_sources(
     Check ZTF field/ccd/quadrant combos for any sources. By default, lists any quadrants that have at least one source.
 
     :param fields: list of integer field numbers to query (list)
-    :param kowalski_instance_name: name of kowalski instance to query (str)
     :param catalog: name of source catalog to query (str)
     :param count_sources: if set, count number of sources per quad and return (bool)
     :param minobs: minimum number of observations needed to count a source (int)
@@ -180,158 +176,30 @@ def check_quads_for_sources(
     return field_dct, has_sources, missing_ccd_quad
 
 
-if __name__ == "__main__":
-
-    parser = argparse.ArgumentParser()
+def get_slurm_parser():
 
+    fg_parser = get_parser(add_help=False)
+    parser = argparse.ArgumentParser(parents=[fg_parser])
     parser.add_argument(
-        "--source_catalog",
-        default=source_catalog,
-        help="name of source collection on Kowalski",
-    )
-    parser.add_argument(
-        "--alerts_catalog",
-        default=alerts_catalog,
-        help="name of alerts collection on Kowalski",
-    )
-    parser.add_argument(
-        "--gaia_catalog",
-        default=gaia_catalog,
-        help="name of Gaia collection on Kowalski",
-    )
-    parser.add_argument(
-        "--bright_star_query_radius_arcsec",
-        type=float,
-        default=300.0,
-        help="size of cone search radius to search for bright stars",
-    )
-    parser.add_argument(
-        "--xmatch_radius_arcsec",
-        type=float,
-        default=2.0,
-        help="cone radius for all crossmatches",
-    )
-    parser.add_argument(
-        "--query_size_limit",
-        type=int,
-        default=10000,
-        help="sources per query limit for large Kowalski queries",
-    )
-    parser.add_argument(
-        "--period_batch_size",
-        type=int,
-        default=1000,
-        help="batch size for GPU-accelerated period algorithms",
-    )
-    parser.add_argument(
-        "--doCPU",
-        action='store_true',
-        default=False,
-        help="if set, run period-finding algorithms on CPU",
-    )
-    parser.add_argument(
-        "--doGPU",
-        action='store_true',
-        default=False,
-        help="if set, use GPU-accelerated period algorithms",
-    )
-    parser.add_argument(
-        "--samples_per_peak",
-        default=10,
-        type=int,
-    )
-    parser.add_argument(
-        "--doScaleMinPeriod",
-        action='store_true',
-        default=False,
-        help="if set, scale min period by min_cadence_minutes. Otherwise, set --max_freq to desired value",
-    )
-    parser.add_argument(
-        "--doRemoveTerrestrial",
-        action='store_true',
-        default=False,
-        help="if set, remove terrestrial frequencies from period analysis",
-    )
-    parser.add_argument(
-        "--Ncore",
-        default=10,
-        type=int,
-        help="number of cores for parallel period finding",
-    )
-    parser.add_argument(
-        "--field",
-        type=int,
-        default=296,
-        help="if not --doQuadrantFile, ZTF field to run on",
-    )
-    parser.add_argument(
-        "--ccd", type=int, default=1, help="if not -doAllCCDs, ZTF ccd to run on"
-    )
-    parser.add_argument(
-        "--quad", type=int, default=1, help="if not -doAllQuads, ZTF field to run on"
-    )
-    parser.add_argument(
-        "--min_n_lc_points",
-        type=int,
-        default=50,
-        help="minimum number of unflagged light curve points to run feature generation",
-    )
-    parser.add_argument(
-        "--min_cadence_minutes",
-        type=float,
-        default=30.0,
-        help="minimum cadence (in minutes) between light curve points. For groups of points closer together than this value, only the first will be kept.",
-    )
-    parser.add_argument(
-        "--dirname",
-        type=str,
-        default='generated_features',
-        help="Directory name for generated features",
-    )
-    parser.add_argument(
-        "--filename",
-        type=str,
-        default='gen_features',
-        help="Filename prefix for generated features",
-    )
-    parser.add_argument(
-        "--doCesium",
-        action='store_true',
-        default=False,
-        help="if set, use Cesium to generate additional features specified in config",
-    )
-    parser.add_argument(
-        "--doNotSave",
-        action='store_true',
-        default=False,
-        help="if set, do not save features",
-    )
-    parser.add_argument(
-        "--stop_early",
-        action='store_true',
-        default=False,
-        help="if set, stop when number of sources reaches query_size_limit. Helpful for testing on small samples.",
-    )
-    parser.add_argument(
-        "--job_name",
+        "--job-name",
         type=str,
         default='ztf_fg',
         help="job name",
     )
     parser.add_argument(
-        "--cluster_name",
+        "--cluster-name",
         type=str,
         default='Expanse',
         help="Name of HPC cluster",
     )
     parser.add_argument(
-        "--partition_type",
+        "--partition-type",
         type=str,
         default='gpu-shared',
         help="Partition name to request for computing",
     )
     parser.add_argument(
-        "--submit_partition_type",
+        "--submit-partition-type",
         type=str,
         default='shared',
         help="Partition name to request for job submission",
@@ -349,13 +217,13 @@ def check_quads_for_sources(
         help="Number of GPUs to request",
     )
     parser.add_argument(
-        "--memory_GB",
+        "--memory-GB",
         type=int,
         default=180,
         help="Memory allocation to request for computing",
     )
     parser.add_argument(
-        "--submit_memory_GB",
+        "--submit-memory-GB",
         type=int,
         default=16,
         help="Memory allocation to request for job submission",
@@ -367,19 +235,19 @@ def check_quads_for_sources(
         help="Walltime for instance",
     )
     parser.add_argument(
-        "--mail_user",
+        "--mail-user",
         type=str,
         default='healyb@umn.edu',
         help="contact email address",
     )
     parser.add_argument(
-        "--account_name",
+        "--account-name",
         type=str,
         default='umn131',
         help="Name of account with current HPC allocation",
     )
     parser.add_argument(
-        "--python_env_name",
+        "--python-env-name",
         type=str,
         default='scope-env',
         help="Name of python environment to activate",
@@ -391,47 +259,20 @@ def check_quads_for_sources(
         help="if set, generate a list of fields/ccd/quads and job numbers, save to slurm.dat",
     )
     parser.add_argument(
-        "--field_list",
+        "--field-list",
         type=int,
         nargs='+',
         default=None,
         help="space-separated list of fields for which to generate quadrant file. If None, all populated fields included.",
     )
-    parser.add_argument("--doQuadrantFile", action="store_true", default=False)
-    parser.add_argument("--quadrant_file", default="slurm.dat")
-    parser.add_argument("--quadrant_index", default=None, type=int)
-    parser.add_argument(
-        "--doSpecificIDs",
-        action='store_true',
-        default=False,
-        help="if set, perform feature generation for ztf_id column in config-specified file",
-    )
-    parser.add_argument(
-        "--skipCloseSources",
-        action='store_true',
-        default=False,
-        help="if set, skip removal of sources too close to bright stars via Gaia. May be useful if input data has previously been analyzed in this way.",
-    )
     parser.add_argument(
-        "--top_n_periods",
-        type=int,
-        default=50,
-        help="number of ELS, ECE periods to pass to EAOV if using ELS_ECE_EAOV algorithm",
-    )
-    parser.add_argument(
-        "--max_freq",
-        type=float,
-        default=48.0,
-        help="maximum frequency [1 / days] to use for period finding. Overridden by --doScaleMinPeriod",
-    )
-    parser.add_argument(
-        "--max_instances",
+        "--max-instances",
         type=int,
         default=20,
         help="Max number of instances to run in parallel",
     )
     parser.add_argument(
-        "--wait_time_minutes",
+        "--wait-time-minutes",
         type=float,
         default=5.0,
         help="Time to wait between job status checks",
@@ -455,13 +296,18 @@ def check_quads_for_sources(
         help="HPC username",
     )
     parser.add_argument(
-        "--submit_interval_minutes",
+        "--submit-interval-minutes",
         type=float,
         default=1.0,
-        help="Time to wait between job submissions (minutes)",
+        help="Time to wait between job submissions, minutes",
     )
 
-    args = parser.parse_args()
+    return parser
+
+
+def main():
+    parser = get_slurm_parser()
+    args, _ = parser.parse_known_args()
 
     if not (args.doCPU or args.doGPU):
         print("--doCPU or --doGPU required")
@@ -498,26 +344,27 @@ def check_quads_for_sources(
     skipCloseSources = args.skipCloseSources
     top_n_periods = args.top_n_periods
     max_freq = args.max_freq
+    max_timestamp_hjd = args.max_timestamp_hjd
 
-    if args.doCPU:
+    if doCPU:
         cpu_gpu_flag = "--doCPU"
-    else:
+    elif doGPU:
         cpu_gpu_flag = "--doGPU"
 
     extra_flags = []
-    if args.doScaleMinPeriod:
+    if doScaleMinPeriod:
         extra_flags.append("--doScaleMinPeriod")
-    if args.doRemoveTerrestrial:
+    if doRemoveTerrestrial:
         extra_flags.append("--doRemoveTerrestrial")
-    if args.doCesium:
+    if doCesium:
         extra_flags.append("--doCesium")
-    if args.doNotSave:
+    if doNotSave:
         extra_flags.append("--doNotSave")
-    if args.stop_early:
-        extra_flags.append("--stop_early")
-    if args.doSpecificIDs:
+    if stop_early:
+        extra_flags.append("--stop-early")
+    if doSpecificIDs:
         extra_flags.append("--doSpecificIDs")
-    if args.skipCloseSources:
+    if skipCloseSources:
         extra_flags.append("--skipCloseSources")
     extra_flags = " ".join(extra_flags)
 
@@ -561,8 +408,8 @@ def check_quads_for_sources(
     fid = open(os.path.join(slurmDir, 'slurm.sub'), 'w')
     fid.write('#!/bin/bash\n')
     fid.write(f'#SBATCH --job-name={args.job_name}.job\n')
-    fid.write(f'#SBATCH --output=../logs/{args.job_name}_%A_%a.out\n')
-    fid.write(f'#SBATCH --error=../logs/{args.job_name}_%A_%a.err\n')
+    fid.write(f'#SBATCH --output={dirname}/logs/{args.job_name}_%A_%a.out\n')
+    fid.write(f'#SBATCH --error={dirname}/logs/{args.job_name}_%A_%a.err\n')
     fid.write(f'#SBATCH -p {args.partition_type}\n')
     fid.write(f'#SBATCH --nodes {args.nodes}\n')
     fid.write(f'#SBATCH --ntasks-per-node {args.Ncore}\n')
@@ -584,9 +431,8 @@ def check_quads_for_sources(
         if args.quadrant_index is not None:
             qid = args.quadrant_index
         fid.write(
-            '%s/generate_features.py --source_catalog %s --alerts_catalog %s --gaia_catalog %s --bright_star_query_radius_arcsec %s --xmatch_radius_arcsec %s --query_size_limit %s --period_batch_size %s --samples_per_peak %s --Ncore %s --min_n_lc_points %s --min_cadence_minutes %s --dirname %s --filename %s --top_n_periods %s --max_freq %s --doQuadrantFile --quadrant_file %s --quadrant_index %s %s %s\n'
+            'generate-features --source-catalog %s --alerts-catalog %s --gaia-catalog %s --bright-star-query-radius-arcsec %s --xmatch-radius-arcsec %s --query-size-limit %s --period-batch-size %s --samples-per-peak %s --Ncore %s --min-n-lc-points %s --min-cadence-minutes %s --dirname %s --filename %s --top-n-periods %s --max-freq %s --max-timestamp-hjd %s --doQuadrantFile --quadrant-file %s --quadrant-index %s %s %s\n'
             % (
-                BASE_DIR / 'tools',
                 source_catalog,
                 alerts_catalog,
                 gaia_catalog,
@@ -602,6 +448,7 @@ def check_quads_for_sources(
                 filename,
                 top_n_periods,
                 max_freq,
+                max_timestamp_hjd,
                 args.quadrant_file,
                 qid,
                 cpu_gpu_flag,
@@ -610,9 +457,8 @@ def check_quads_for_sources(
         )
     else:
         fid.write(
-            '%s/generate_features.py --source_catalog %s --alerts_catalog %s --gaia_catalog %s --bright_star_query_radius_arcsec %s --xmatch_radius_arcsec %s --query_size_limit %s --period_batch_size %s --samples_per_peak %s --Ncore %s --field %s --ccd %s --quad %s --min_n_lc_points %s --min_cadence_minutes %s --dirname %s --filename %s --top_n_periods %s --max_freq %s %s %s\n'
+            'generate-features --source-catalog %s --alerts-catalog %s --gaia-catalog %s --bright-star-query-radius-arcsec %s --xmatch-radius-arcsec %s --query-size-limit %s --period-batch-size %s --samples-per-peak %s --Ncore %s --field %s --ccd %s --quad %s --min-n-lc-points %s --min-cadence-minutes %s --dirname %s --filename %s --top-n-periods %s --max-freq %s --max-timestamp-hjd %s %s %s\n'
             % (
-                BASE_DIR / 'tools',
                 source_catalog,
                 alerts_catalog,
                 gaia_catalog,
@@ -631,6 +477,7 @@ def check_quads_for_sources(
                 filename,
                 top_n_periods,
                 max_freq,
+                max_timestamp_hjd,
                 cpu_gpu_flag,
                 extra_flags,
             )
@@ -642,8 +489,8 @@ def check_quads_for_sources(
     fid = open(os.path.join(slurmDir, 'slurm_submission.sub'), 'w')
     fid.write('#!/bin/bash\n')
     fid.write('#SBATCH --job-name=submit_jobs.job\n')
-    fid.write(f'#SBATCH --output=../logs/{args.job_name}_submit_%A_%a.out\n')
-    fid.write(f'#SBATCH --error=../logs/{args.job_name}_submit_%A_%a.err\n')
+    fid.write(f'#SBATCH --output={dirname}/logs/{args.job_name}_submit_%A_%a.out\n')
+    fid.write(f'#SBATCH --error={dirname}/logs/{args.job_name}_submit_%A_%a.err\n')
     fid.write(f'#SBATCH -p {args.submit_partition_type}\n')
     fid.write(f'#SBATCH --mem {args.submit_memory_GB}G\n')
     fid.write(f'#SBATCH -A {args.account_name}\n')
@@ -659,9 +506,8 @@ def check_quads_for_sources(
     if not args.doSubmitLoop:
         if args.runParallel:
             fid.write(
-                '%s/generate_features_job_submission.py --dirname %s --filename %s --doSubmit --runParallel --max_instances %s --wait_time_minutes %s --user %s --submit_interval_minutes %s\n'
+                'generate-features-job-submission --dirname %s --filename %s --doSubmit --runParallel --max-instances %s --wait-time-minutes %s --user %s --submit-interval-minutes %s\n'
                 % (
-                    BASE_DIR / 'tools',
                     dirpath,
                     filename,
                     args.max_instances,
@@ -672,9 +518,8 @@ def check_quads_for_sources(
             )
         else:
             fid.write(
-                '%s/generate_features_job_submission.py --dirname %s --filename %s --doSubmit --max_instances %s --wait_time_minutes %s --user %s --submit_interval_minutes %s\n'
+                'generate-features-job-submission --dirname %s --filename %s --doSubmit --max-instances %s --wait-time-minutes %s --user %s --submit-interval-minutes %s\n'
                 % (
-                    BASE_DIR / 'tools',
                     dirpath,
                     filename,
                     args.max_instances,
@@ -686,18 +531,16 @@ def check_quads_for_sources(
     else:
         if args.runParallel:
             fid.write(
-                '%s/generate_features_job_submission.py --dirname %s --filename %s --doSubmitLoop --runParallel\n'
+                'generate-features-job-submission --dirname %s --filename %s --doSubmitLoop --runParallel\n'
                 % (
-                    BASE_DIR / 'tools',
                     dirpath,
                     filename,
                 )
             )
         else:
             fid.write(
-                '%s/generate_features_job_submission.py --dirname %s --filename %s --doSubmitLoop\n'
+                'generate-features-job-submission --dirname %s --filename %s --doSubmitLoop\n'
                 % (
-                    BASE_DIR / 'tools',
                     dirpath,
                     filename,
                 )
diff --git a/tools/get_features.py b/tools/get_features.py
index 90288a4d..d824f26b 100755
--- a/tools/get_features.py
+++ b/tools/get_features.py
@@ -4,22 +4,25 @@
 import pathlib
 from penquins import Kowalski
 from typing import List
-import yaml
 import os
 import time
 import h5py
-from scope.utils import write_parquet, impute_features
+from scope.utils import write_parquet, impute_features, parse_load_config
 from datetime import datetime
 import pyarrow.dataset as ds
 import argparse
 
-BASE_DIR = os.path.dirname(__file__)
-JUST = 50
-
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
-config_path = pathlib.Path(__file__).parent.parent.absolute() / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+JUST = 50
+DEFAULT_FIELD = 291
+DEFAULT_CCD_RANGE = [1, 16]
+DEFAULT_QUAD_RANGE = [1, 4]
+DEFAULT_LIMIT = 1000
+DEFAULT_SAVE_BATCHSIZE = 100000
+features_catalog = config['kowalski']['collections']['features']
+DEFAULT_CATALOG = features_catalog
 
 # Access datatypes in config file
 all_feature_names_config = config["features"]["ontological"]
@@ -345,14 +348,7 @@ def get_features(
     return df, dmdt
 
 
-if __name__ == "__main__":
-    DEFAULT_FIELD = 291
-    DEFAULT_CCD_RANGE = [1, 16]
-    DEFAULT_QUAD_RANGE = [1, 4]
-    DEFAULT_LIMIT = 1000
-    DEFAULT_SAVE_BATCHSIZE = 100000
-    features_catalog = config['kowalski']['collections']['features']
-    DEFAULT_CATALOG = features_catalog
+def get_parser():
 
     parser = argparse.ArgumentParser()
 
@@ -402,13 +398,13 @@ def get_features(
     parser.add_argument(
         "--start",
         type=int,
-        help="Start index of the sources to query (to be used with whole_field)",
+        help="Start index of the sources to query (to be used with --whole-field)",
         default=None,
     )
     parser.add_argument(
         "--end",
         type=int,
-        help="End index of the sources to query. (to be used with whole_field)",
+        help="End index of the sources to query. (to be used with --whole-field)",
         default=None,
     )
     parser.add_argument(
@@ -460,7 +456,13 @@ def get_features(
         help="verbose",
     )
 
-    args = parser.parse_args()
+    return parser
+
+
+def main():
+
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     field = args.field
     ccd_range = args.ccd_range
@@ -516,7 +518,7 @@ def get_features(
         iter_dct[field] = default_file
 
     for k, v in iter_dct.items():
-        if type(k) == tuple:
+        if isinstance(k, tuple):
             ccd_quad = k
             print(f'Getting features for ccd {ccd_quad[0]} quad {ccd_quad[1]}...')
         else:
diff --git a/tools/get_quad_ids.py b/tools/get_quad_ids.py
index 74daf71e..6855badc 100755
--- a/tools/get_quad_ids.py
+++ b/tools/get_quad_ids.py
@@ -7,13 +7,19 @@
 import os
 import h5py
 import pathlib
-import yaml
-
-BASE_DIR = os.path.dirname(__file__)
-
-config_path = pathlib.Path(__file__).parent.parent.absolute() / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+from scope.utils import parse_load_config
+
+BASE_DIR = pathlib.Path.cwd()
+DEFAULT_FIELD = 301
+DEFAULT_CCD = 4
+DEFAULT_QUAD = 3
+DEFAULT_CCD_RANGE = [1, 16]
+DEFAULT_QUAD_RANGE = [1, 4]
+DEFAULT_MINOBS = 20
+DEFAULT_LIMIT = 10000
+DEFAULT_SKIP = 0
+DEFAULT_VERBOSE = 2
+config = parse_load_config()
 
 # use tokens specified as env vars (if exist)
 kowalski_token_env = os.environ.get("KOWALSKI_INSTANCE_TOKEN")
@@ -107,9 +113,7 @@ def get_ids_loop(
             minobs=5,limit=2000, whole_field=False)
         '''
     if output_dir is None:
-        output_dir = os.path.join(
-            os.path.dirname(__file__), "../ids/field_" + str(field) + "/"
-        )
+        output_dir = os.path.join(str(BASE_DIR), "ids/field_" + str(field) + "/")
 
     dct = {}
     if verbose > 0:
@@ -125,9 +129,9 @@ def get_ids_loop(
     lst = []
     save_individual = (save) & (not whole_field)
 
-    if type(ccd_range) == int:
+    if isinstance(ccd_range, int):
         ccd_range = [ccd_range, ccd_range]
-    if type(quad_range) == int:
+    if isinstance(quad_range, int):
         quad_range = [quad_range, quad_range]
 
     for ccd in range(ccd_range[0], ccd_range[1] + 1):
@@ -413,17 +417,7 @@ def get_field_ids(
         return ids
 
 
-if __name__ == "__main__":
-    DEFAULT_FIELD = 301
-    DEFAULT_CCD = 4
-    DEFAULT_QUAD = 3
-    DEFAULT_CCD_RANGE = [1, 16]
-    DEFAULT_QUAD_RANGE = [1, 4]
-    DEFAULT_MINOBS = 20
-    DEFAULT_LIMIT = 10000
-    DEFAULT_SKIP = 0
-    DEFAULT_VERBOSE = 2
-
+def get_parser():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--catalog",
@@ -495,13 +489,16 @@ def get_field_ids(
         help="if passed as argument, store all ids of the field in one file",
     )
 
-    args = parser.parse_args()
+    return parser()
+
+
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     # Set default output directory
     if args.output_dir is None:
-        output_dir = os.path.join(
-            os.path.dirname(__file__), "../ids/field_" + str(args.field) + "/"
-        )
+        output_dir = os.path.join(str(BASE_DIR), "ids/field_" + str(args.field) + "/")
     else:
         output_dir = args.output_dir + "/ids/field_" + str(args.field) + "/"
     os.makedirs(output_dir, exist_ok=True)
@@ -525,18 +522,18 @@ def get_field_ids(
             minobs=args.minobs,
             limit=args.limit,
             verbose=args.verbose,
-            output_dir=os.path.join(os.path.dirname(__file__), output_dir),
+            output_dir=os.path.join(str(BASE_DIR), output_dir),
             whole_field=args.whole_field,
             save=True,
         )
 
     else:
         # Handle different types of input for ccd/quad_range
-        if type(args.ccd_range) == list:
+        if isinstance(args.ccd_range, list):
             ccd = args.ccd_range[0]
         else:
             ccd = args.ccd_range
-        if type(args.quad_range) == list:
+        if isinstance(args.quad_range, list):
             quad = args.quad_range[0]
         else:
             quad = args.quad_range
@@ -544,7 +541,7 @@ def get_field_ids(
             f'Saving up to {args.limit} results for single ccd/quadrant pair ({ccd},{quad}), skipping {args.skip} rows...'
         )
 
-        data = get_field_ids(
+        _ = get_field_ids(
             catalog=args.catalog,
             kowalski_instances=kowalski_instances,
             field=args.field,
@@ -554,5 +551,5 @@ def get_field_ids(
             skip=args.skip,
             limit=args.limit,
             save=True,
-            output_dir=os.path.join(os.path.dirname(__file__), output_dir),
+            output_dir=os.path.join(str(BASE_DIR), output_dir),
         )
diff --git a/tools/inference.py b/tools/inference.py
index 32f7fe02..8ebaebb4 100755
--- a/tools/inference.py
+++ b/tools/inference.py
@@ -4,7 +4,6 @@
 import pandas as pd
 import pathlib
 import warnings
-import yaml
 import json
 import os
 import time
@@ -17,6 +16,7 @@
     forgiving_true,
     impute_features,
     get_feature_stats,
+    parse_load_config,
 )
 from scope.xgb import XGB
 from datetime import datetime
@@ -25,14 +25,12 @@
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 warnings.filterwarnings('ignore')
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-BASE_DIR_FEATS = pathlib.Path(__file__).parent.parent.absolute()
-BASE_DIR_PREDS = pathlib.Path(__file__).parent.parent.absolute()
+BASE_DIR = pathlib.Path.cwd()
+BASE_DIR_FEATS = BASE_DIR
+BASE_DIR_PREDS = BASE_DIR
 JUST = 50
 
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+config = parse_load_config()
 
 path_to_features = config['feature_generation']['path_to_features']
 path_to_preds = config['inference']['path_to_preds']
@@ -250,11 +248,11 @@ def run_inference(
         preds/field_<field>/field_<field>.csv
 
     USAGE:
-    $ python tools/inference.py --path-model=models/dr5-1/agn-20210919_090902.h5 \
-        --model-class=agn --field=301 --ccd=1 --quad=1 --flag_ids
+    $ run-inference --paths-models models/dr5-1/agn-20210919_090902.h5 \
+        --model-class agn --field 301 --ccd 1 --quad 1 --flag-ids
 
-    $ python tools/inference.py --path-model=models/dr5-1/agn-20210919_090902.h5 \
-        --model-class=agn --field=301 --whole-field --flag_ids
+    $ run-inference --paths-models models/dr5-1/agn-20210919_090902.h5 \
+        --model-class agn --field 301 --whole-field --flag-ids
 
     """
 
@@ -310,9 +308,7 @@ def run_inference(
 
     features_filename = kwargs.get("features_filename", default_features_file)
 
-    out_dir = os.path.join(
-        os.path.dirname(__file__), f"{str(BASE_DIR_PREDS)}/preds_{algorithm}/"
-    )
+    out_dir = f"{str(BASE_DIR_PREDS)}/preds_{algorithm}/"
 
     if not whole_field:
         default_outfile = (
@@ -451,7 +447,7 @@ def run_inference(
         )
 
         # Get feature stats using training set for scaling consistency
-        if type(trainingSet) == str:
+        if isinstance(trainingSet, str):
             if (trainingSet == 'use_config') & (len(TRAINING_SET) > 0):
                 trainingSet = TRAINING_SET
             else:
@@ -661,47 +657,50 @@ def run_inference(
 def get_parser_minimal():
     parser = argparse.ArgumentParser(add_help=False)
     parser.add_argument(
-        "--paths_models", type=str, nargs='+', help="path(s) to model(s)"
+        "--paths-models",
+        type=str,
+        nargs='+',
+        help="path(s) to model(s), space-separated",
     )
     parser.add_argument(
-        "--model_class_names", type=str, nargs='+', help="name(s) of model class(es)"
+        "--model-class-names", type=str, nargs='+', help="name(s) of model class(es)"
     )
     parser.add_argument(
-        "--whole_field", action='store_true', help="flag to run on whole field"
+        "--whole-field", action='store_true', help="flag to run on whole field"
     )
     parser.add_argument(
-        "--flag_ids",
+        "--flag-ids",
         action='store_true',
         help="flag to flag ids having features with missing values",
     )
     parser.add_argument(
-        "--xgb_model", action='store_true', help="flag to evaluate using XGBoost models"
+        "--xgb-model", action='store_true', help="flag to evaluate using XGBoost models"
     )
     parser.add_argument("--verbose", action='store_true', help="verbose flag")
     parser.add_argument(
-        "--time_run",
+        "--time-run",
         action='store_true',
         help="flag to time the inference run and print results",
     )
     parser.add_argument(
-        "--write_csv",
+        "--write-csv",
         action='store_true',
         help="flag to write CSV file in addition to parquet",
     )
     parser.add_argument(
-        "--float_convert_types",
+        "--float-convert-types",
         type=tuple,
         default=(64, 32),
         help="Existing and final float types for feature conversion",
     )
     parser.add_argument(
-        "--feature_stats",
+        "--feature-stats",
         type=str,
         default=None,
         help="set to 'config' to read feature stats from config file",
     )
     parser.add_argument(
-        "--scale_features",
+        "--scale-features",
         type=str,
         default='min_max',
         help="method to use to scale features",
@@ -713,28 +712,28 @@ def get_parser_minimal():
         help="usually set to 'use_config'. A DataFrame can also be passed in, but this is not recommended.",
     )
     parser.add_argument(
-        "--feature_directory",
+        "--feature-directory",
         type=str,
         default='features',
         help="name of directory containing features",
     )
     parser.add_argument(
-        "--feature_file_prefix",
+        "--feature-file-prefix",
         type=str,
         default='gen_features',
         help="prefix of feature filename",
     )
     parser.add_argument(
-        "--period_suffix",
+        "--period-suffix",
         type=str,
         default=period_suffix_config,
         help="suffix of column containing period to save with inference results",
     )
     parser.add_argument(
-        "--no_write_metadata", action='store_true', help="flag to not write metadata"
+        "--no-write-metadata", action='store_true', help="flag to not write metadata"
     )
     parser.add_argument(
-        "--batch_size",
+        "--batch-size",
         type=int,
         default=100000,
         help="batch size to use when reading feature files",
@@ -745,10 +744,13 @@ def get_parser_minimal():
 def get_parser():
     parser = argparse.ArgumentParser(add_help=False)
     parser.add_argument(
-        "--paths_models", type=str, nargs='+', help="path(s) to model(s)"
+        "--paths-models",
+        type=str,
+        nargs='+',
+        help="path(s) to model(s), space-separated",
     )
     parser.add_argument(
-        "--model_class_names", type=str, nargs='+', help="name(s) of model class(es)"
+        "--model-class-names", type=str, nargs='+', help="name(s) of model class(es)"
     )
     parser.add_argument("--field", type=str, default='296', help="field number")
     parser.add_argument(
@@ -758,41 +760,41 @@ def get_parser():
         "--quad", type=int, default=1, help="quad number (if whole_field is not set)"
     )
     parser.add_argument(
-        "--whole_field", action='store_true', help="flag to run on whole field"
+        "--whole-field", action='store_true', help="flag to run on whole field"
     )
     parser.add_argument(
-        "--flag_ids",
+        "--flag-ids",
         action='store_true',
         help="flag to flag ids having features with missing values",
     )
     parser.add_argument(
-        "--xgb_model", action='store_true', help="flag to evaluate using XGBoost models"
+        "--xgb-model", action='store_true', help="flag to evaluate using XGBoost models"
     )
     parser.add_argument("--verbose", action='store_true', help="verbose flag")
     parser.add_argument(
-        "--time_run",
+        "--time-run",
         action='store_true',
         help="flag to time the inference run and print results",
     )
     parser.add_argument(
-        "--write_csv",
+        "--write-csv",
         action='store_true',
         help="flag to write CSV file in addition to parquet",
     )
     parser.add_argument(
-        "--float_convert_types",
+        "--float-convert-types",
         type=tuple,
         default=(64, 32),
         help="Existing and final float types for feature conversion",
     )
     parser.add_argument(
-        "--feature_stats",
+        "--feature-stats",
         type=str,
         default=None,
         help="set to 'config' to read feature stats from config file",
     )
     parser.add_argument(
-        "--scale_features",
+        "--scale-features",
         type=str,
         default='min_max',
         help="method to use to scale features",
@@ -804,28 +806,28 @@ def get_parser():
         help="usually set to 'use_config'. A DataFrame can also be passed in, but this is not recommended.",
     )
     parser.add_argument(
-        "--feature_directory",
+        "--feature-directory",
         type=str,
         default='features',
         help="name of directory containing features",
     )
     parser.add_argument(
-        "--feature_file_prefix",
+        "--feature-file-prefix",
         type=str,
         default='gen_features',
         help="prefix of feature filename",
     )
     parser.add_argument(
-        "--period_suffix",
+        "--period-suffix",
         type=str,
         default=period_suffix_config,
         help="suffix of column containing period to save with inference results",
     )
     parser.add_argument(
-        "--no_write_metadata", action='store_true', help="flag to not write metadata"
+        "--no-write-metadata", action='store_true', help="flag to not write metadata"
     )
     parser.add_argument(
-        "--batch_size",
+        "--batch-size",
         type=int,
         default=100000,
         help="batch size to use when reading feature files",
@@ -833,10 +835,10 @@ def get_parser():
     return parser
 
 
-if __name__ == "__main__":
+def main():
 
     parser = get_parser()
-    args = parser.parse_args()
+    args, _ = parser.parse_known_args()
 
     run_inference(
         paths_models=args.paths_models,
diff --git a/tools/run_inference_job_submission.py b/tools/run_inference_job_submission.py
index 438d9516..cd23f673 100755
--- a/tools/run_inference_job_submission.py
+++ b/tools/run_inference_job_submission.py
@@ -2,23 +2,21 @@
 import os
 import pathlib
 import argparse
-import yaml
 import numpy as np
+from scope.utils import parse_load_config
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-BASE_DIR_PREDS = pathlib.Path(__file__).parent.parent.absolute()
 
-# Read config file
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+BASE_DIR_PREDS = BASE_DIR
+
+config = parse_load_config()
 
 path_to_preds = config['inference']['path_to_preds']
 if path_to_preds is not None:
     BASE_DIR_PREDS = pathlib.Path(path_to_preds)
 
 
-def parse_commandline():
+def get_parser():
     """
     Parse the options given on the command-line.
     """
@@ -46,8 +44,7 @@ def parse_commandline():
         help="dnn or xgb",
     )
 
-    args = parser.parse_args()
-    return args
+    return parser
 
 
 def filter_completed(fields, algorithm):
@@ -68,17 +65,16 @@ def filter_completed(fields, algorithm):
     return fields_copy
 
 
-def run_job(field):
+def run_job(field, subfile):
     sbatchstr = f"sbatch --export=FID={field} {subfile}"
     print(sbatchstr)
     os.system(sbatchstr)
 
 
-if __name__ == '__main__':
+def main():
     # Parse command line
-    args = parse_commandline()
-
-    dir_path = os.path.dirname(os.path.realpath(__file__))
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     filetype = args.filetype
     dirname = args.dirname
@@ -95,13 +91,10 @@ def run_job(field):
         fields,
         algorithm,
     )
-    njobs = len(fields_remaining)
 
     for field in fields_remaining:
         # Only run jobs from tags_remaining_to_run list
-        run_job(
-            field,
-        )
+        run_job(field, subfile)
 
     os.system(f"squeue -u {args.user}")
 
diff --git a/tools/run_inference_slurm.py b/tools/run_inference_slurm.py
index c5925c05..387bcc36 100755
--- a/tools/run_inference_slurm.py
+++ b/tools/run_inference_slurm.py
@@ -1,22 +1,21 @@
 #!/usr/bin/env python
 import argparse
 import pathlib
-import yaml
 import os
+from scope.utils import parse_load_config
 
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-BASE_DIR_PREDS = pathlib.Path(__file__).parent.parent.absolute()
+BASE_DIR = pathlib.Path.cwd()
+BASE_DIR_PREDS = BASE_DIR
 
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+config = parse_load_config()
 
 path_to_preds = config['inference']['path_to_preds']
 if path_to_preds is not None:
     BASE_DIR_PREDS = pathlib.Path(path_to_preds)
 
-if __name__ == "__main__":
+
+def get_parser():
 
     parser = argparse.ArgumentParser()
 
@@ -33,25 +32,25 @@
         help="Directory name for slurm scripts/logs",
     )
     parser.add_argument(
-        "--job_name",
+        "--job-name",
         type=str,
         default='run_inference',
         help="job name",
     )
     parser.add_argument(
-        "--cluster_name",
+        "--cluster-name",
         type=str,
         default='Expanse',
         help="Name of HPC cluster",
     )
     parser.add_argument(
-        "--partition_type",
+        "--partition-type",
         type=str,
         default='gpu-shared',
         help="Partition name to request for computing",
     )
     parser.add_argument(
-        "--submit_partition_type",
+        "--submit-partition-type",
         type=str,
         default='shared',
         help="Partition name to request for job submission",
@@ -69,13 +68,13 @@
         help="number of cores to request for computing",
     )
     parser.add_argument(
-        "--submit_nodes",
+        "--submit-nodes",
         type=int,
         default=1,
         help="Number of nodes to request for job submission",
     )
     parser.add_argument(
-        "--submit_Ncore",
+        "--submit-Ncore",
         default=1,
         type=int,
         help="number of cores to request for job submission",
@@ -87,13 +86,13 @@
         help="Number of GPUs to request",
     )
     parser.add_argument(
-        "--memory_GB",
+        "--memory-GB",
         type=int,
         default=64,
         help="Memory allocation to request for computing",
     )
     parser.add_argument(
-        "--submit_memory_GB",
+        "--submit-memory-GB",
         type=int,
         default=16,
         help="Memory allocation to request for job submission",
@@ -105,19 +104,19 @@
         help="Walltime for instance",
     )
     parser.add_argument(
-        "--mail_user",
+        "--mail-user",
         type=str,
         default='healyb@umn.edu',
         help="contact email address",
     )
     parser.add_argument(
-        "--account_name",
+        "--account-name",
         type=str,
         default='umn131',
         help="Name of account with current HPC allocation",
     )
     parser.add_argument(
-        "--python_env_name",
+        "--python-env-name",
         type=str,
         default='scope-env',
         help="Name of python environment to activate",
@@ -135,7 +134,12 @@
         help="dnn or xgb",
     )
 
-    args = parser.parse_args()
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     scriptname = args.scriptname
     script_path = BASE_DIR / scriptname
@@ -158,8 +162,8 @@
     fid = open(os.path.join(slurmDir, 'slurm.sub'), 'w')
     fid.write('#!/bin/bash\n')
     fid.write(f'#SBATCH --job-name={jobname}.job\n')
-    fid.write(f'#SBATCH --output=../logs/{jobname}_%A_%a.out\n')
-    fid.write(f'#SBATCH --error=../logs/{jobname}_%A_%a.err\n')
+    fid.write(f'#SBATCH --output={dirname}/logs/{jobname}_%A_%a.out\n')
+    fid.write(f'#SBATCH --error={dirname}/logs/{jobname}_%A_%a.err\n')
     fid.write(f'#SBATCH -p {args.partition_type}\n')
     fid.write(f'#SBATCH --nodes {args.nodes}\n')
     fid.write(f'#SBATCH --ntasks-per-node {args.Ncore}\n')
@@ -185,8 +189,8 @@
     fid = open(os.path.join(slurmDir, 'slurm_submission.sub'), 'w')
     fid.write('#!/bin/bash\n')
     fid.write(f'#SBATCH --job-name={jobname}_submit.job\n')
-    fid.write(f'#SBATCH --output=../logs/{jobname}_submit_%A_%a.out\n')
-    fid.write(f'#SBATCH --error=../logs/{jobname}_submit_%A_%a.err\n')
+    fid.write(f'#SBATCH --output={dirname}/logs/{jobname}_submit_%A_%a.out\n')
+    fid.write(f'#SBATCH --error={dirname}/logs/{jobname}_submit_%A_%a.err\n')
     fid.write(f'#SBATCH -p {args.submit_partition_type}\n')
     fid.write(f'#SBATCH --nodes {args.submit_nodes}\n')
     fid.write(f'#SBATCH --ntasks-per-node {args.submit_Ncore}\n')
@@ -202,9 +206,8 @@
         fid.write(f'source activate {args.python_env_name}\n')
 
     fid.write(
-        '%s/run_inference_job_submission.py --dirname=%s --scriptname=%s --user=%s --algorithm=%s\n'
+        'run-inference-job-submission --dirname %s --scriptname %s --user %s --algorithm %s\n'
         % (
-            BASE_DIR / 'tools',
             dirname,
             scriptname,
             args.user,
diff --git a/tools/run_scope_local.py b/tools/run_scope_local.py
index dd6a2ce1..fba03d74 100755
--- a/tools/run_scope_local.py
+++ b/tools/run_scope_local.py
@@ -1,22 +1,24 @@
 #!/usr/bin/env python
 import argparse
 import pathlib
-import yaml
 import os
 from datetime import datetime
 import pandas as pd
 from penquins import Kowalski
 from scope.fritz import get_lightcurves_via_ids, radec_to_iau_name
 from tools.get_quad_ids import get_cone_ids
-from scope.utils import read_parquet, read_hdf, write_parquet, write_hdf
+from scope.utils import (
+    read_parquet,
+    read_hdf,
+    write_parquet,
+    write_hdf,
+    parse_load_config,
+)
 from tools import generate_features, inference
 
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-
-config_path = pathlib.Path(__file__).parent.parent.absolute() / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 # use tokens specified as env vars (if exist)
 kowalski_token_env = os.environ.get("KOWALSKI_INSTANCE_TOKEN")
@@ -297,7 +299,7 @@ def run_scope_local(
     return current_dt
 
 
-if __name__ == "__main__":
+def get_parser():
     parser_generate_features = generate_features.get_parser()
     parser_inference = inference.get_parser_minimal()
 
@@ -334,7 +336,11 @@ def run_scope_local(
         nargs='+',
         help="group names of trained models (with order corresponding to --algorithms input)",
     )
+    return parser
+
 
-    args = parser.parse_args()
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     run_scope_local(**vars(args))
diff --git a/tools/scope_download_classification.py b/tools/scope_download_classification.py
index 687241f8..c0321858 100755
--- a/tools/scope_download_classification.py
+++ b/tools/scope_download_classification.py
@@ -1,26 +1,29 @@
 #!/usr/bin/env python
 import argparse
 import pathlib
-import yaml
 import pandas as pd
-import scope
+import pkg_resources
 from scope.fritz import api
-from scope.utils import read_hdf, write_hdf, read_parquet, write_parquet
+from scope.utils import (
+    read_hdf,
+    write_hdf,
+    read_parquet,
+    write_parquet,
+    impute_features,
+    parse_load_config,
+)
 import warnings
 import numpy as np
 from tools.get_features import get_features
 from tools.get_quad_ids import get_cone_ids
 import os
 from datetime import datetime
-from scope.utils import impute_features
 
 NUM_PER_PAGE = 500
 CHECKPOINT_NUM = 500
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
+BASE_DIR = pathlib.Path.cwd()
 
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+config = parse_load_config()
 
 features_catalog = config['kowalski']['collections']['features']
 training_set_config = pathlib.Path(config['training']['dataset'])
@@ -121,7 +124,7 @@ def merge_sources_features(
     min_net_votes: int = 1,
 ):
 
-    outpath = os.path.join(os.path.dirname(__file__), output_dir)
+    outpath = str(BASE_DIR / output_dir)
     os.makedirs(outpath, exist_ok=True)
 
     # Drop rows with duplicate obj_ids (keep first instance)
@@ -131,7 +134,7 @@ def merge_sources_features(
         sources = sources.drop_duplicates('obj_id').reset_index(drop=True)
 
     # Open golden dataset mapper
-    mapper_dir = os.path.dirname(__file__)
+    mapper_dir = os.path.dirname(BASE_DIR)
     mapper_path = os.path.join(mapper_dir, taxonomy_map)
     gold_map = pd.read_json(mapper_path)
 
@@ -242,13 +245,13 @@ def merge_sources_features(
     print(f'Found {len(ztf_and_obj_ids)} rows of features - some may be duplicates.')
 
     print('Getting non-duplicate features...')
-    feature_df_nodup, dmdt_nodup = get_features(
+    feature_df_nodup, _ = get_features(
         source_ids=ztf_and_obj_ids_nodup['_id'].values.tolist(),
         features_catalog=features_catalog,
         limit_per_query=features_limit,
     )
     print('Getting duplicate features for further analysis...')
-    feature_df_dup, dmdt_dup = get_features(
+    feature_df_dup, _ = get_features(
         source_ids=ztf_and_obj_ids_dup['_id'].values.tolist(),
         features_catalog=features_catalog,
         limit_per_query=features_limit,
@@ -428,7 +431,7 @@ def download_classification(
     ):
         output_filename = os.path.splitext(output_filename)[0]
 
-    outpath = os.path.join(os.path.dirname(__file__), output_dir)
+    outpath = str(BASE_DIR / output_dir)
     os.makedirs(outpath, exist_ok=True)
 
     filename = (
@@ -437,7 +440,9 @@ def download_classification(
     filepath = os.path.join(outpath, filename)
 
     # Get code version and current date/time for metadata
-    code_version = scope.__version__
+    code_version = pkg_resources.get_distribution(
+        "scope-ml"
+    ).version  # scope.__version__
     utcnow = datetime.utcnow()
     start_dt = utcnow.strftime("%Y-%m-%d %H:%M:%S")
 
@@ -736,16 +741,16 @@ def download_classification(
     return sources
 
 
-if __name__ == "__main__":
+def get_parser():
 
     parser = argparse.ArgumentParser()
     parser.add_argument("--file", type=str, default='parse', help="dataset")
-    parser.add_argument("--group_ids", type=int, nargs='+', help="list of group ids")
+    parser.add_argument("--group-ids", type=int, nargs='+', help="list of group ids")
     parser.add_argument(
         "--start", type=int, default=0, help="start page/index for continued download"
     )
     parser.add_argument(
-        "--merge_features",
+        "--merge-features",
         type=bool,
         nargs='?',
         const=True,
@@ -753,83 +758,87 @@ def download_classification(
         help="merge downloaded results with features from Kowalski",
     )
     parser.add_argument(
-        "--features_catalog",
+        "--features-catalog",
         type=str,
         default=features_catalog,
         help="catalog of features on Kowalski",
     )
 
     parser.add_argument(
-        "--features_limit",
+        "--features-limit",
         type=int,
         default=1000,
         help="Maximum number of sources queried for features per loop",
     )
 
     parser.add_argument(
-        "--taxonomy_map",
+        "--taxonomy-map",
         type=str,
         default='golden_dataset_mapper.json',
         help="JSON file mapping between origin labels and Fritz taxonomy",
     )
 
     parser.add_argument(
-        "--output_dir",
+        "--output-dir",
         type=str,
         default='fritzDownload',
         help="Name of directory to save downloaded file",
     )
 
     parser.add_argument(
-        "--output_filename",
+        "--output-filename",
         type=str,
         default='merged_classifications_features',
         help="Name of output file containing merged classifications and features",
     )
 
     parser.add_argument(
-        "--output_format",
+        "--output-format",
         type=str,
         default='parquet',
         help="Format of output file: parquet, h5 or csv",
     )
 
     parser.add_argument(
-        "--get_ztf_filters",
+        "--get-ztf-filters",
         action='store_true',
         default=False,
         help="add ZTF filter ID to default features",
     )
 
     parser.add_argument(
-        "--impute_missing_features",
+        "--impute-missing-features",
         action='store_true',
         default=False,
         help="impute missing features using strategy specified by config",
     )
 
     parser.add_argument(
-        "--update_training_set",
+        "--update-training-set",
         action='store_true',
         default=False,
         help="if downloading an active learning sample, update the training set with the new classification based on votes",
     )
 
     parser.add_argument(
-        "--updated_training_set_prefix",
+        "--updated-training-set-prefix",
         type=str,
         default='updated_AL',
         help="Prefix to add to updated trainingset file",
     )
 
     parser.add_argument(
-        "--min_vote_diff",
+        "--min-vote-diff",
         type=int,
         default=1,
         help="Minimum number of net votes (upvotes - downvotes) to keep an active learning classification. Caution: if zero, all classifications of reviewed sources will be added",
     )
+    return parser
 
-    args = parser.parse_args()
+
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     # download object classifications in the file
     download_classification(
diff --git a/tools/scope_download_gcn_sources.py b/tools/scope_download_gcn_sources.py
index 95b1393d..cc6b1bde 100755
--- a/tools/scope_download_gcn_sources.py
+++ b/tools/scope_download_gcn_sources.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python
 import argparse
 import pathlib
-import yaml
 import os
 from penquins import Kowalski
 from datetime import datetime, timedelta
@@ -9,18 +8,15 @@
 import json
 from scope.fritz import api
 from tools.get_quad_ids import get_cone_ids
-from scope.utils import write_parquet
+from scope.utils import write_parquet, parse_load_config
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 NUM_PER_PAGE = 100
 
 # EM+GW is group 1544
 # Recommendation: query all groups when downloading, then upload all classifications to single group (e.g. EM+GW)
 
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
-
 # use tokens specified as env vars (if exist)
 kowalski_token_env = os.environ.get("KOWALSKI_INSTANCE_TOKEN")
 gloria_token_env = os.environ.get("GLORIA_INSTANCE_TOKEN")
@@ -61,7 +57,7 @@ def download_gcn_sources(
     group_ids: list = [],
     days_range: float = 7.0,
     radius_arcsec: float = 0.5,
-    save_filename: str = 'tools/fritzDownload/specific_ids_GCN_sources',
+    save_filename: str = 'fritzDownload/specific_ids_GCN_sources',
 ):
     """
     Download sources for a GCN event from Fritz (with intent to generate features/run inference on these sources)
@@ -174,35 +170,41 @@ def download_gcn_sources(
     return ids
 
 
-if __name__ == '__main__':
+def get_parser():
+
     parser = argparse.ArgumentParser()
     parser.add_argument("--dateobs", type=str, help="unique dateObs of GCN event")
     parser.add_argument(
-        "--group_ids",
+        "--group-ids",
         type=int,
         nargs='+',
         help="group ids to query sources (all if not specified)",
     )
     parser.add_argument(
-        "--days_range",
+        "--days-range",
         type=float,
         default=7.0,
         help="max days past event to search for sources",
     )
     parser.add_argument(
-        "--radius_arcsec",
+        "--radius-arcsec",
         type=float,
         default=0.5,
         help="radius around new sources to search for existing ZTF sources",
     )
     parser.add_argument(
-        "--save_filename",
+        "--save-filename",
         type=str,
-        default='tools/fritzDownload/specific_ids_GCN_sources',
+        default='fritzDownload/specific_ids_GCN_sources',
         help="filename to save source ids/coordinates",
     )
 
-    args = parser.parse_args()
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     download_gcn_sources(
         dateobs=args.dateobs,
diff --git a/tools/scope_manage_annotation.py b/tools/scope_manage_annotation.py
index 1469d0be..a02dc0cf 100755
--- a/tools/scope_manage_annotation.py
+++ b/tools/scope_manage_annotation.py
@@ -175,17 +175,22 @@ def manage_annotation(action, source, group_ids, origin, key, value):
             )
 
 
-if __name__ == "__main__":
-
+def get_parser():
     parser = argparse.ArgumentParser()
+
     parser.add_argument("--action", help="post, update, or delete annotation")
     parser.add_argument("--source", help="Fritz object id or csv file of sources")
-    parser.add_argument("--group_ids", type=int, nargs='+', help="list of group ids")
+    parser.add_argument("--group-ids", type=int, nargs='+', help="list of group ids")
     parser.add_argument("--origin", type=str, help="name of annotation origin")
     parser.add_argument("--key", help="annotation key")
     parser.add_argument("--value", type=str, help="annotation value")
 
-    args = parser.parse_args()
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     manage_annotation(
         args.action,
diff --git a/tools/scope_upload_classification.py b/tools/scope_upload_classification.py
index ab40433e..81b8cf40 100755
--- a/tools/scope_upload_classification.py
+++ b/tools/scope_upload_classification.py
@@ -4,10 +4,15 @@
 import pandas as pd
 from penquins import Kowalski
 from scope.fritz import save_newsource, api, radec_to_iau_name
-from scope.utils import read_hdf, read_parquet, write_hdf, write_parquet
+from scope.utils import (
+    read_hdf,
+    read_parquet,
+    write_hdf,
+    write_parquet,
+    parse_load_config,
+)
 import math
 import warnings
-import yaml
 import pathlib
 from tools import scope_manage_annotation
 from datetime import datetime
@@ -21,11 +26,8 @@
 
 plt.rcParams['font.size'] = 12
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 # use tokens specified as env vars (if exist)
 kowalski_token_env = os.environ.get("KOWALSKI_INSTANCE_TOKEN")
@@ -153,13 +155,12 @@ def make_phot_plot(
 
 def upload_classification(
     file: str,
-    kowalski_instances: Kowalski,
     group_ids: list,
     classification: list,
     taxonomy_map: str,
-    comment: str,
-    start: int,
-    stop: int,
+    comment: str = None,
+    start: int = None,
+    stop: int = None,
     classification_origin: str = 'SCoPe',
     post_survey_id: bool = False,
     survey_id_origin: str = 'SCoPe_xmatch',
@@ -184,7 +185,6 @@ def upload_classification(
     """
     Upload labels to Fritz
     :param file: path to .csv, .h5 or .parquet file containing labels (str)
-    :param kowalski_instances: authenticated kowalski instances (penquins.Kowalski)
     :param group_ids: list of group ids on Fritz for upload target location [int, int, ...]
     :param classification: list of classifications [str, str, ...]
     :param taxonomy_map: if classification is ['read'], path to JSON file containing taxonomy mapping (str)
@@ -697,7 +697,7 @@ def upload_classification(
                 ):
                     result_filetag = os.path.splitext(result_filetag)[0]
 
-                outpath = os.path.join(os.path.dirname(__file__), result_dir)
+                outpath = str(BASE_DIR / result_dir)
                 os.makedirs(outpath, exist_ok=True)
 
                 filename = (
@@ -727,18 +727,17 @@ def upload_classification(
                     write_parquet(all_sources, filepath)
 
 
-if __name__ == "__main__":
-
+def get_parser():
     parser = argparse.ArgumentParser()
     parser.add_argument("--file", help="dataset with .csv, .h5 or .parquet extension")
-    parser.add_argument("--group_ids", type=int, nargs='+', help="list of group ids")
+    parser.add_argument("--group-ids", type=int, nargs='+', help="list of group ids")
 
     # parser.add_argument("-classification", type=str, help="name of object class")
     parser.add_argument(
         "--classification", type=str, nargs='+', help="list of object classes"
     )
     parser.add_argument(
-        "--taxonomy_map",
+        "--taxonomy-map",
         type=str,
         help="JSON file mapping between origin labels and Fritz taxonomy",
     )
@@ -754,13 +753,13 @@ def upload_classification(
         help="Index to stop uploading (inclusive)",
     )
     parser.add_argument(
-        "--classification_origin",
+        "--classification-origin",
         type=str,
         default='SCoPe',
         help="origin of classifications",
     )
     parser.add_argument(
-        "--skip_phot",
+        "--skip-phot",
         type=bool,
         nargs='?',
         default=False,
@@ -768,143 +767,146 @@ def upload_classification(
         help="Skip photometry upload, only post groups and classifications.",
     )
     parser.add_argument(
-        "--post_survey_id",
+        "--post-survey-id",
         action='store_true',
         help="If set, post survey_id from input dataset.",
     )
     parser.add_argument(
-        "--survey_id_origin",
+        "--survey-id-origin",
         type=str,
         default='SCoPe_xmatch',
         help="Annotation origin for survey ID",
     )
     parser.add_argument(
-        "--p_threshold",
+        "--p-threshold",
         type=float,
         default=0.0,
         help="Classification probability >= this number to upload",
     )
     parser.add_argument(
-        "--match_ids",
+        "--match-ids",
         action='store_true',
         default=False,
         help="If set, match input and existing sources using ZTF source IDs.",
     )
     parser.add_argument(
-        "--use_existing_obj_id",
+        "--use-existing-obj-id",
         action='store_true',
         default=False,
         help="If set, source obj_id from input dataset.",
     )
     parser.add_argument(
-        "--post_upvote",
+        "--post-upvote",
         action='store_true',
         default=False,
         help="If set, post upvote to new classifications.",
     )
     parser.add_argument(
-        "--check_labelled_box",
+        "--check-labelled-box",
         action='store_true',
         default=False,
         help="If set, check 'labelled' box for source.",
     )
     parser.add_argument(
-        "--write_obj_id",
+        "--write-obj-id",
         action='store_true',
         default=False,
         help="If set, write obj_ids corresponding to each uploaded source.",
     )
     parser.add_argument(
-        "--result_dir",
+        "--result-dir",
         type=str,
         default='fritzUpload',
         help="Directory to save upload results",
     )
     parser.add_argument(
-        "--result_filetag",
+        "--result-filetag",
         type=str,
         default='fritzUpload',
         help="Directory to save upload results",
     )
     parser.add_argument(
-        "--result_format",
+        "--result-format",
         type=str,
         default='parquet',
         help="Format of result file: parquet, h5 or csv",
     )
     parser.add_argument(
-        "--replace_classifications",
+        "--replace-classifications",
         action='store_true',
         default=False,
         help="If set, delete each object's existing classifications before posting new ones.",
     )
     parser.add_argument(
-        "--radius_arcsec",
+        "--radius-arcsec",
         type=float,
         default=2.0,
         help="Photometry search radius for uploaded sources",
     )
     parser.add_argument(
-        "--no_ml",
+        "--no-ml",
         action='store_true',
         default=False,
         help="If set, posted classifications are not noted to originate from an ML classifier.",
     )
     parser.add_argument(
-        "--post_phot_as_comment",
+        "--post-phot-as-comment",
         action='store_true',
         default=False,
         help="If set, post photometry plot as a comment on the source.",
     )
     parser.add_argument(
-        "--post_phasefolded_phot",
+        "--post-phasefolded-phot",
         action='store_true',
         default=False,
         help="if set, post phase-folded photometry as comment in addition to time series",
     )
     parser.add_argument(
-        "--phot_dirname",
+        "--phot-dirname",
         type=str,
         default='phot_plots',
         help="Name of directory in which to save photometry plots",
     )
     parser.add_argument(
-        "--instrument_name",
+        "--instrument-name",
         type=str,
         default='ZTF',
         help="Name of instrument used for observations",
     )
+    return parser
+
 
-    args = parser.parse_args()
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     # upload classification objects
     upload_classification(
-        args.file,
-        kowalski_instances,
-        args.group_ids,
-        args.classification,
-        args.taxonomy_map,
-        args.comment,
-        args.start,
-        args.stop,
-        args.classification_origin,
-        args.post_survey_id,
-        args.survey_id_origin,
-        args.skip_phot,
-        args.p_threshold,
-        args.match_ids,
-        args.use_existing_obj_id,
-        args.post_upvote,
-        args.check_labelled_box,
-        args.write_obj_id,
-        args.result_dir,
-        args.result_filetag,
-        args.result_format,
-        args.replace_classifications,
-        args.radius_arcsec,
-        args.no_ml,
-        args.post_phot_as_comment,
-        args.post_phasefolded_phot,
-        args.phot_dirname,
-        args.instrument_name,
+        file=args.file,
+        group_ids=args.group_ids,
+        classification=args.classification,
+        taxonomy_map=args.taxonomy_map,
+        comment=args.comment,
+        start=args.start,
+        stop=args.stop,
+        classification_origin=args.classification_origin,
+        post_survey_id=args.post_survey_id,
+        survey_id_origin=args.survey_id_origin,
+        skip_phot=args.skip_phot,
+        p_threshold=args.p_threshold,
+        match_ids=args.match_ids,
+        use_existing_obj_id=args.use_existing_obj_id,
+        post_upvote=args.post_upvote,
+        check_labelled_box=args.check_labelled_box,
+        write_obj_id=args.write_obj_id,
+        result_dir=args.result_dir,
+        result_filetag=args.result_filetag,
+        result_format=args.result_format,
+        replace_classifications=args.replace_classifications,
+        radius_arcsec=args.radius_arcsec,
+        no_ml=args.no_ml,
+        post_phot_as_comment=args.post_phot_as_comment,
+        post_phasefolded_phot=args.post_phasefolded_phot,
+        phot_dirname=args.phot_dirname,
+        instrument_name=args.instrument_name,
     )
diff --git a/tools/scope_upload_disagreements.py b/tools/scope_upload_disagreements.py
index af1b66fc..308cd1b3 100644
--- a/tools/scope_upload_disagreements.py
+++ b/tools/scope_upload_disagreements.py
@@ -6,6 +6,8 @@
 import csv
 import json
 
+# This code is deprecated and will not be updated. It should eventually be removed.
+
 
 def upload(data):
     import requests
diff --git a/tools/taxonomy.py b/tools/taxonomy.py
index bae75751..7a92382d 100755
--- a/tools/taxonomy.py
+++ b/tools/taxonomy.py
@@ -1,15 +1,13 @@
 #!/usr/bin/env python
-import fire
-import pathlib
 import requests
 from typing import List, Optional
 import yaml
 from inspect import ismodule
+from scope.utils import parse_load_config
+import argparse
 
 
-config_path = pathlib.Path(__file__).parent.parent.absolute() / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+config = parse_load_config()
 
 
 def fritz_api(method: str, endpoint: str, data: Optional[dict] = None):
@@ -18,7 +16,7 @@ def fritz_api(method: str, endpoint: str, data: Optional[dict] = None):
     return response
 
 
-def post(
+def post_taxonomy(
     taxonomy,
     group_ids: Optional[List[int]] = None,
     name: Optional[str] = None,
@@ -28,12 +26,12 @@ def post(
     """Post taxonomy to Fritz
        NOTE: token in config.yaml must have 'Post taxonomy' permission
 
-    $ ./taxonomy.py \
-      --taxonomy='phenomenological.yaml' \
-      --group_ids=1444 \
-      --name='Scope Phenomenological Taxonomy' \
-      --version='1.2.0' \
-      --provenance='https://github.com/bfhealy/scope-phenomenology.git'
+    $ post-taxonomy \
+      --taxonomy phenomenological.yaml \
+      --group_ids 1444 \
+      --name "Scope Phenomenological Taxonomy" \
+      --version 1.2.0  \
+      --provenance https://github.com/bfhealy/scope-phenomenology.git
 
     Within python:
     taxonomy.post(scope_phenom, group_ids=[1, 2])
@@ -44,11 +42,12 @@ def post(
     :param name: name of input taxonomy (str)
     :param version: version of input taxonomy (str)
     :param provenance: URL hosting input taxonomy (str)
+
     :return:
     """
 
     # Read .yaml file and check other arguments
-    if type(taxonomy) == str:
+    if isinstance(taxonomy, str):
         with open(taxonomy) as taxonomy_yaml:
             tax = yaml.load(taxonomy_yaml, Loader=yaml.FullLoader)
         if (name is None) | (version is None) | (provenance is None):
@@ -92,5 +91,44 @@ def post(
         print(f'Did not post taxonomy - message: {message}.')
 
 
-if __name__ == "__main__":
-    fire.Fire(post)
+def get_parser():
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--taxonomy",
+        type=str,
+        default="phenomenological.yaml",
+        help="path to yaml file with taxonomy in tdtax format, or imported taxonomy module",
+    )
+    parser.add_argument(
+        "--group-ids",
+        type=int,
+        nargs='+',
+        help="ids of groups on Fritz to post taxonomy to (all if not specified).",
+    )
+    parser.add_argument(
+        "--name",
+        type=str,
+        default="Scope Phenomenological Taxonomy",
+        help="name of input taxonomy",
+    )
+    parser.add_argument(
+        "--version",
+        type=str,
+        default='1.0.0',
+        help="version of input taxonomy",
+    )
+    parser.add_argument(
+        "--provenance",
+        type=str,
+        default="github",
+        help="URL hosting input taxonomy",
+    )
+
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
+    post_taxonomy(**vars(args))
diff --git a/tools/train_algorithm_job_submission.py b/tools/train_algorithm_job_submission.py
index ec2880ba..e9860991 100755
--- a/tools/train_algorithm_job_submission.py
+++ b/tools/train_algorithm_job_submission.py
@@ -3,20 +3,17 @@
 import pathlib
 import time
 import argparse
-import yaml
 from tools.train_algorithm_slurm import parse_training_script
 import numpy as np
 import datetime
+from scope.utils import parse_load_config
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
 
-# Read config file
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 
-def parse_commandline():
+def get_parser():
     """
     Parse the options given on the command-line.
     """
@@ -44,19 +41,19 @@ def parse_commandline():
         help="HPC username",
     )
     parser.add_argument(
-        "--max_instances",
+        "--max-instances",
         type=int,
         default=100,
         help="Max number of instances to run in parallel",
     )
     parser.add_argument(
-        "--wait_time_minutes",
+        "--wait-time-minutes",
         type=float,
         default=5.0,
         help="Time to wait between job status checks (minutes)",
     )
     parser.add_argument(
-        "--submit_interval_seconds",
+        "--submit-interval-seconds",
         type=float,
         default=5.0,
         help="Time to wait between job submissions (seconds)",
@@ -68,20 +65,19 @@ def parse_commandline():
         help="If set, job submission runs filter_completed in different directory",
     )
     parser.add_argument(
-        "--reset_running",
+        "--reset-running",
         action='store_true',
         default=False,
         help="If set, reset the 'running' status of all tags",
     )
     parser.add_argument(
-        "--group_name",
+        "--group-name",
         type=str,
         default=None,
-        help="Group name (if different from name in train_script)",
+        help="Group name (if different from name in training script)",
     )
 
-    args = parser.parse_args()
-    return args
+    return parser
 
 
 def filter_completed(tags, group, algorithm, sweep=False, reset_running=False):
@@ -136,7 +132,14 @@ def filter_completed(tags, group, algorithm, sweep=False, reset_running=False):
     return tags_remaining_to_complete, tags_remaining_to_run
 
 
-def run_job(tag, group, submit_interval_seconds=5.0, sweep=False):
+def run_job(
+    tag,
+    group,
+    algorithm,
+    subfile,
+    submit_interval_seconds=5.0,
+    sweep=False,
+):
     # Don't hit WandB server with too many login attempts at once
     time.sleep(submit_interval_seconds)
 
@@ -157,11 +160,10 @@ def run_job(tag, group, submit_interval_seconds=5.0, sweep=False):
         os.system(f'touch {str(output_path)}/{tag}.{time_tag}.running')
 
 
-if __name__ == '__main__':
+def main():
     # Parse command line
-    args = parse_commandline()
-
-    dir_path = os.path.dirname(os.path.realpath(__file__))
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     scriptname = args.scriptname
     filetype = args.filetype
@@ -200,6 +202,8 @@ def run_job(tag, group, submit_interval_seconds=5.0, sweep=False):
                 run_job(
                     tag,
                     group,
+                    algorithm,
+                    subfile,
                     submit_interval_seconds=args.submit_interval_seconds,
                     sweep=sweep,
                 )
diff --git a/tools/train_algorithm_slurm.py b/tools/train_algorithm_slurm.py
index ee12b7a1..ede30f8f 100755
--- a/tools/train_algorithm_slurm.py
+++ b/tools/train_algorithm_slurm.py
@@ -1,15 +1,12 @@
 #!/usr/bin/env python
 import argparse
 import pathlib
-import yaml
 import os
+from scope.utils import parse_load_config
 
 
-BASE_DIR = pathlib.Path(__file__).parent.parent.absolute()
-
-config_path = BASE_DIR / "config.yaml"
-with open(config_path) as config_yaml:
-    config = yaml.load(config_yaml, Loader=yaml.FullLoader)
+BASE_DIR = pathlib.Path.cwd()
+config = parse_load_config()
 
 
 def parse_training_script(script_path):
@@ -22,8 +19,8 @@ def parse_training_script(script_path):
     algorithm = 'dnn'
 
     for line in lines:
-        if 'scope.py train' in line:
-            line_info = line.removeprefix('./scope.py train').split()
+        if 'scope-train' in line:
+            line_info = line.removeprefix('scope-train').split()
             for arg in line_info.copy():
                 if '--tag' in arg:
                     tag = arg.split('=')[1]
@@ -41,7 +38,7 @@ def parse_training_script(script_path):
     return tags, group, algorithm, line_info
 
 
-if __name__ == "__main__":
+def get_parser():
 
     parser = argparse.ArgumentParser()
 
@@ -58,25 +55,25 @@ def parse_training_script(script_path):
         help="Directory name for slurm scripts/logs",
     )
     parser.add_argument(
-        "--job_name",
+        "--job-name",
         type=str,
         default='train',
         help="job name",
     )
     parser.add_argument(
-        "--cluster_name",
+        "--cluster-name",
         type=str,
         default='Expanse',
         help="Name of HPC cluster",
     )
     parser.add_argument(
-        "--partition_type",
+        "--partition-type",
         type=str,
         default='gpu-shared',
         help="Partition name to request for computing",
     )
     parser.add_argument(
-        "--submit_partition_type",
+        "--submit-partition-type",
         type=str,
         default='shared',
         help="Partition name to request for job submission",
@@ -94,13 +91,13 @@ def parse_training_script(script_path):
         help="number of cores to request for computing",
     )
     parser.add_argument(
-        "--submit_nodes",
+        "--submit-nodes",
         type=int,
         default=1,
         help="Number of nodes to request for job submission",
     )
     parser.add_argument(
-        "--submit_Ncore",
+        "--submit-Ncore",
         default=1,
         type=int,
         help="number of cores to request for job submission",
@@ -112,13 +109,13 @@ def parse_training_script(script_path):
         help="Number of GPUs to request",
     )
     parser.add_argument(
-        "--memory_GB",
+        "--memory-GB",
         type=int,
         default=64,
         help="Memory allocation to request for computing",
     )
     parser.add_argument(
-        "--submit_memory_GB",
+        "--submit-memory-GB",
         type=int,
         default=16,
         help="Memory allocation to request for job submission",
@@ -130,19 +127,19 @@ def parse_training_script(script_path):
         help="Walltime for instance",
     )
     parser.add_argument(
-        "--mail_user",
+        "--mail-user",
         type=str,
         default='healyb@umn.edu',
         help="contact email address",
     )
     parser.add_argument(
-        "--account_name",
+        "--account-name",
         type=str,
         default='umn131',
         help="Name of account with current HPC allocation",
     )
     parser.add_argument(
-        "--python_env_name",
+        "--python-env-name",
         type=str,
         default='scope-env',
         help="Name of python environment to activate",
@@ -160,19 +157,19 @@ def parse_training_script(script_path):
         help="HPC username",
     )
     parser.add_argument(
-        "--max_instances",
+        "--max-instances",
         type=int,
         default=20,
         help="Max number of instances to run in parallel",
     )
     parser.add_argument(
-        "--wait_time_minutes",
+        "--wait-time-minutes",
         type=float,
         default=5.0,
         help="Time to wait between job status checks (minutes)",
     )
     parser.add_argument(
-        "--submit_interval_seconds",
+        "--submit-interval-seconds",
         type=float,
         default=5.0,
         help="Time to wait between job submissions (seconds)",
@@ -184,12 +181,17 @@ def parse_training_script(script_path):
         help="If set, job submission runs filter_completed in different directory",
     )
 
-    args = parser.parse_args()
+    return parser
+
+
+def main():
+    parser = get_parser()
+    args, _ = parser.parse_known_args()
 
     scriptname = args.scriptname
 
     script_path = BASE_DIR / scriptname
-    _, group, algorithm, line_info = parse_training_script(script_path)
+    _, _, algorithm, line_info = parse_training_script(script_path)
 
     dirname = f"{algorithm}_{args.dirname}"
     jobname = f"{args.job_name}_{algorithm}"
@@ -209,8 +211,8 @@ def parse_training_script(script_path):
     fid = open(os.path.join(slurmDir, 'slurm.sub'), 'w')
     fid.write('#!/bin/bash\n')
     fid.write(f'#SBATCH --job-name={jobname}.job\n')
-    fid.write(f'#SBATCH --output=../logs/{jobname}_%A_%a.out\n')
-    fid.write(f'#SBATCH --error=../logs/{jobname}_%A_%a.err\n')
+    fid.write(f'#SBATCH --output={dirname}/logs/{jobname}_%A_%a.out\n')
+    fid.write(f'#SBATCH --error={dirname}/logs/{jobname}_%A_%a.err\n')
     fid.write(f'#SBATCH -p {args.partition_type}\n')
     fid.write(f'#SBATCH --nodes {args.nodes}\n')
     fid.write(f'#SBATCH --ntasks-per-node {args.Ncore}\n')
@@ -228,9 +230,7 @@ def parse_training_script(script_path):
             fid.write('module add cuda\n')
         fid.write(f'source activate {args.python_env_name}\n')
 
-    fid.write(
-        str(BASE_DIR / 'scope.py train ') + "--tag=$TID " + " ".join(line_info) + '\n'
-    )
+    fid.write("scope-train " + "--tag $TID " + " ".join(line_info) + '\n')
     fid.close()
 
     # Secondary script to manage job submission using train_algorithm_job_submission.py
@@ -238,8 +238,8 @@ def parse_training_script(script_path):
     fid = open(os.path.join(slurmDir, 'slurm_submission.sub'), 'w')
     fid.write('#!/bin/bash\n')
     fid.write(f'#SBATCH --job-name={jobname}_submit.job\n')
-    fid.write(f'#SBATCH --output=../logs/{jobname}_submit_%A_%a.out\n')
-    fid.write(f'#SBATCH --error=../logs/{jobname}_submit_%A_%a.err\n')
+    fid.write(f'#SBATCH --output={dirname}/logs/{jobname}_submit_%A_%a.out\n')
+    fid.write(f'#SBATCH --error={dirname}/logs/{jobname}_submit_%A_%a.err\n')
     fid.write(f'#SBATCH -p {args.submit_partition_type}\n')
     fid.write(f'#SBATCH --nodes {args.submit_nodes}\n')
     fid.write(f'#SBATCH --ntasks-per-node {args.submit_Ncore}\n')
@@ -256,9 +256,8 @@ def parse_training_script(script_path):
 
     if args.sweep:
         fid.write(
-            '%s/train_algorithm_job_submission.py --dirname=%s --scriptname=%s --user=%s --max_instances=%s --wait_time_minutes=%s --submit_interval_seconds=%s --sweep\n'
+            'train-algorithm-job-submission --dirname %s --scriptname %s --user %s --max-instances %s --wait-time-minutes %s --submit-interval-seconds %s --sweep\n'
             % (
-                BASE_DIR / 'tools',
                 dirname,
                 scriptname,
                 args.user,
@@ -269,9 +268,8 @@ def parse_training_script(script_path):
         )
     else:
         fid.write(
-            '%s/train_algorithm_job_submission.py --dirname=%s --scriptname=%s --user=%s --max_instances=%s --wait_time_minutes=%s --submit_interval_seconds=%s\n'
+            'train-algorithm-job-submission --dirname %s --scriptname %s --user %s --max-instances %s --wait-time-minutes %s --submit-interval-seconds %s\n'
             % (
-                BASE_DIR / 'tools',
                 dirname,
                 scriptname,
                 args.user,