diff --git a/CHANGES.rst b/CHANGES.rst index 758b18a..e50f6ec 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,10 @@ 1.1.2 (unreleased) ================== +docs +---- +- Use furo as sphinx theme, improve page hierarchy and add custom domain [#104] + preprocessor ------------ - explicitly pass `encoding=bytes` in transform.hypersonic_pliers for numpy 2 compatibility where this will no longer be the default for np.loadtxt [#92] diff --git a/docs/Makefile b/docs/Makefile index e4c8763..8bdd209 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -100,4 +100,7 @@ linkcheck: doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ - "results in $(BUILDDIR)/doctest/output.txt." \ No newline at end of file + "results in $(BUILDDIR)/doctest/output.txt." + +livehtml: + sphinx-autobuild "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) \ No newline at end of file diff --git a/docs/requirements.txt b/docs/requirements.txt index e721394..3e92eb9 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,19 +1,19 @@ setuptools==70.2.0 -Sphinx==8.0.2 -sphinx-automodapi==0.17.0 -sphinx_rtd_theme==2.0.0 +Sphinx==8.1.3 +sphinx-automodapi==0.18.0 +furo==2024.8.6 matplotlib==3.8.4 tensorflow==2.17.0 numpy==2.1.1 numpydoc==1.7.0 -astropy==6.1.1 -pydot==2.0.0 +astropy==6.1.4 +pydot==3.0.2 graphviz==0.20.3 astroquery==0.4.7 -boto3==1.28.7 -plotly==5.20.0 +boto3==1.35.54 +plotly==5.24.1 progressbar==2.5 scikit-learn==1.5.2 -scikeras==0.11.0 -tables==3.9.2 -tqdm==4.66.5 +scikeras==0.13.0 +tables==3.10.1 +tqdm==4.66.6 diff --git a/docs/source/analyzer/compute.rst b/docs/source/analyzer/compute.rst index 588f75b..e342903 100644 --- a/docs/source/analyzer/compute.rst +++ b/docs/source/analyzer/compute.rst @@ -3,7 +3,7 @@ ************************* spacekit.analyzer.compute ************************* - + .. currentmodule:: spacekit.analyzer.compute .. inheritance-diagram:: spacekit.analyzer.compute diff --git a/docs/source/analyzer/index.rst b/docs/source/analyzer/index.rst new file mode 100644 index 0000000..5b3d010 --- /dev/null +++ b/docs/source/analyzer/index.rst @@ -0,0 +1,15 @@ +.. _analyzer: + +***************** +spacekit.analyzer +***************** + +.. currentmodule:: spacekit.analyzer + +.. toctree:: + :maxdepth: 1 + + compute + explore + scan + track \ No newline at end of file diff --git a/docs/source/analyzer/track.rst b/docs/source/analyzer/track.rst index a8ff783..b045d1f 100644 --- a/docs/source/analyzer/track.rst +++ b/docs/source/analyzer/track.rst @@ -4,9 +4,12 @@ spacekit.analyzer.track *********************** +.. currentmodule:: spacekit.analyzer.track + + This module tracks the start, end clocktime intervals for a running process and record the total duration. Optionally the timestamps and durations for multiple processes can be logged in a single text file on disk (or separate files if desired). -.. currentmodule:: spacekit.analyzer.track + .. autofunction:: proc_time diff --git a/docs/source/builder/architect.rst b/docs/source/builder/architect.rst index e423f04..ef347aa 100644 --- a/docs/source/builder/architect.rst +++ b/docs/source/builder/architect.rst @@ -4,6 +4,7 @@ spacekit.builder.architect ************************** + .. currentmodule:: spacekit.builder.architect .. inheritance-diagram:: spacekit.builder.architect diff --git a/docs/source/builder/blueprints.rst b/docs/source/builder/blueprints.rst index 5b427b0..ffbfff7 100644 --- a/docs/source/builder/blueprints.rst +++ b/docs/source/builder/blueprints.rst @@ -1,4 +1,4 @@ -.. _blueprint: +.. _blueprints: *************************** spacekit.builder.blueprints @@ -6,6 +6,7 @@ spacekit.builder.blueprints .. currentmodule:: spacekit.builder.blueprints + .. autoclass:: Blueprint :members: :undoc-members: diff --git a/docs/source/builder/index.rst b/docs/source/builder/index.rst new file mode 100644 index 0000000..b701ea1 --- /dev/null +++ b/docs/source/builder/index.rst @@ -0,0 +1,13 @@ +.. _builder + +***************** +spacekit.builder +***************** + +.. currentmodule:: spacekit.builder + +.. toctree:: + :maxdepth: 1 + + architect + blueprints diff --git a/docs/source/conf.py b/docs/source/conf.py index aa92d4c..84e98cc 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -16,8 +16,7 @@ import importlib from packaging.version import Version from configparser import ConfigParser -# import sphinx -# import stsci_rtd_theme + def setup(app): @@ -179,7 +178,7 @@ def setup(app): # a list of builtin themes. # html_theme = 'stsci_rtd_theme' # html_theme_path = [stsci_rtd_theme.get_html_theme_path()] -html_theme = "sphinx_rtd_theme" +html_theme = "furo" # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". @@ -208,7 +207,7 @@ def setup(app): # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -html_sidebars = {"**": ["globaltoc.html", "relations.html", "searchbox.html"]} +# html_sidebars = {"**": ["globaltoc.html", "relations.html", "searchbox.html"]} # Additional templates that should be rendered to pages, maps page names to # template names. @@ -220,6 +219,8 @@ def setup(app): # If false, no index is generated. html_use_index = True +html_baseurl = "https://docs.spacekit.org" + # If true, the index is split into individual pages for each letter. # html_split_index = False diff --git a/docs/source/dashboard/cal.rst b/docs/source/dashboard/cal.rst index 07fc531..cd36c0b 100644 --- a/docs/source/dashboard/cal.rst +++ b/docs/source/dashboard/cal.rst @@ -1,3 +1,6 @@ ********************** spacekit.dashboard.cal ********************** + + +.. currentmodule:: spacekit.dashboard.cal diff --git a/docs/source/dashboard/index.rst b/docs/source/dashboard/index.rst new file mode 100644 index 0000000..c8078e7 --- /dev/null +++ b/docs/source/dashboard/index.rst @@ -0,0 +1,13 @@ +.. _dashboard + +***************** +spacekit.dashboard +***************** + +.. currentmodule:: spacekit.dashboard + +.. toctree:: + :maxdepth: 1 + + cal + svm diff --git a/docs/source/dashboard/svm.rst b/docs/source/dashboard/svm.rst index 9192861..fc7a4e8 100644 --- a/docs/source/dashboard/svm.rst +++ b/docs/source/dashboard/svm.rst @@ -1,3 +1,5 @@ ********************** spacekit.dashboard.svm -********************** \ No newline at end of file +********************** + +.. currentmodule:: spacekit.dashboard.svm diff --git a/docs/source/datasets/index.rst b/docs/source/datasets/index.rst new file mode 100644 index 0000000..ab9c0ca --- /dev/null +++ b/docs/source/datasets/index.rst @@ -0,0 +1,12 @@ +.. _datasets + +***************** +spacekit.datasets +***************** + +.. currentmodule:: spacekit.datasets + +.. toctree:: + :maxdepth: 1 + + beam diff --git a/docs/source/extractor/index.rst b/docs/source/extractor/index.rst new file mode 100644 index 0000000..2c1af88 --- /dev/null +++ b/docs/source/extractor/index.rst @@ -0,0 +1,14 @@ +.. _extractor + +****************** +spacekit.extractor +****************** + +.. currentmodule:: spacekit.extractor + +.. toctree:: + :maxdepth: 1 + + load + radio + scrape \ No newline at end of file diff --git a/docs/source/extractor/radio.rst b/docs/source/extractor/radio.rst index 6ed0a45..5499274 100644 --- a/docs/source/extractor/radio.rst +++ b/docs/source/extractor/radio.rst @@ -6,6 +6,7 @@ spacekit.extractor.radio .. currentmodule:: spacekit.extractor.radio + Querying and downloading .fits files from a MAST s3 bucket on AWS. Unlike `spacekit.extractor.scrape `_, which can access data in private s3 buckets, this module is specifically for collecting data from the publicly available MAST website and/or MAST data hosted on s3. Instead of scraping a closed collection, you're receiving data from an open channel - like a radio. .. autoclass:: Radio diff --git a/docs/source/generator/augment.rst b/docs/source/generator/augment.rst index 160dccc..0f9ef26 100644 --- a/docs/source/generator/augment.rst +++ b/docs/source/generator/augment.rst @@ -4,5 +4,7 @@ spacekit.generator.augment ************************** +.. currentmodule:: spacekit.generator.augment + .. automodule:: spacekit.generator.augment :members: \ No newline at end of file diff --git a/docs/source/generator/index.rst b/docs/source/generator/index.rst new file mode 100644 index 0000000..8bb72b7 --- /dev/null +++ b/docs/source/generator/index.rst @@ -0,0 +1,13 @@ +.. _generator + +****************** +spacekit.generator +****************** + +.. currentmodule:: spacekit.generator + +.. toctree:: + :maxdepth: 1 + + augment + draw diff --git a/docs/source/index.rst b/docs/source/index.rst index 731cf3f..6dc9b2b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,108 +5,19 @@ spacekit documentation This is the documentation for ``spacekit``, the Astronomical Data Science and Machine Learning Toolkit -Reference/API -============= - - -Analyzer --------- - -.. toctree:: - :maxdepth: 1 - - compute - explore - scan - track - - -Builder -------- - -.. toctree:: - :maxdepth: 1 - - architect - blueprints - - -Dashboard ---------- - -.. toctree:: - :maxdepth: 1 - - cal - svm - - -Datasets ---------- - -.. toctree:: - :maxdepth: 1 - - beam - - -Extractor ---------- - -.. toctree:: - :maxdepth: 1 - - load - radio - scrape - - -Generator ---------- - -.. toctree:: - :maxdepth: 1 - - augment - draw - - -Logger ------- - .. toctree:: :maxdepth: 1 - log - - -Preprocessor ------------- - -.. toctree:: - :maxdepth: 1 - - encode - ingest - prep - scrub - transform - - -Skøpes: Space Telescope Machine Learning Applications ------------------------------------------------------ - -.. toctree:: - :maxdepth: 1 - - James Webb Space Telescope - Calibration Pipeline Resource Prediction Modeling - - Hubble Space Telescope - Single Visit Mosaic Alignment Modeling - Calcloud Data Pipeline Modeling + Analyzer + Builder + Dashboard + Datasets + Extractor + Generator + Logger + Preprocessor + Skøpes - K2/Kepler Exoplanets Indices and tables @@ -114,4 +25,3 @@ Indices and tables * :ref:`genindex` * :ref:`modindex` -* :ref:`search` \ No newline at end of file diff --git a/docs/source/logger/index.rst b/docs/source/logger/index.rst new file mode 100644 index 0000000..8af58e1 --- /dev/null +++ b/docs/source/logger/index.rst @@ -0,0 +1,12 @@ +.. _logger + +*************** +spacekit.logger +*************** + +.. currentmodule:: spacekit.logger + +.. toctree:: + :maxdepth: 1 + + log diff --git a/docs/source/logger/log.rst b/docs/source/logger/log.rst index fa5535a..8b41551 100644 --- a/docs/source/logger/log.rst +++ b/docs/source/logger/log.rst @@ -4,5 +4,10 @@ spacekit.logger.log ******************* +.. currentmodule:: spacekit.logger.log + +.. toctree:: + :maxdepth: 1 + .. automodule:: spacekit.logger.log :members: \ No newline at end of file diff --git a/docs/source/preprocessor/encode.rst b/docs/source/preprocessor/encode.rst index a6d7461..204fde8 100644 --- a/docs/source/preprocessor/encode.rst +++ b/docs/source/preprocessor/encode.rst @@ -4,5 +4,7 @@ spacekit.preprocessor.encode **************************** +.. currentmodule:: spacekit.preprocessor.encode + .. automodule:: spacekit.preprocessor.encode :members: \ No newline at end of file diff --git a/docs/source/preprocessor/index.rst b/docs/source/preprocessor/index.rst new file mode 100644 index 0000000..b69bee0 --- /dev/null +++ b/docs/source/preprocessor/index.rst @@ -0,0 +1,16 @@ +.. _preprocessor + +********************* +spacekit.preprocessor +********************* + +.. currentmodule:: spacekit.preprocessor + +.. toctree:: + :maxdepth: 1 + + encode + ingest + prep + scrub + transform diff --git a/docs/source/preprocessor/ingest.rst b/docs/source/preprocessor/ingest.rst index 8b98963..fdc9224 100644 --- a/docs/source/preprocessor/ingest.rst +++ b/docs/source/preprocessor/ingest.rst @@ -4,5 +4,7 @@ spacekit.preprocessor.ingest **************************** +.. currentmodule:: spacekit.preprocessor.ingest + .. automodule:: spacekit.preprocessor.ingest :members: \ No newline at end of file diff --git a/docs/source/preprocessor/prep.rst b/docs/source/preprocessor/prep.rst index 39c08c9..b7a3ead 100644 --- a/docs/source/preprocessor/prep.rst +++ b/docs/source/preprocessor/prep.rst @@ -4,5 +4,7 @@ spacekit.preprocessor.prep **************************** +.. currentmodule:: spacekit.preprocessor.prep + .. automodule:: spacekit.preprocessor.prep :members: diff --git a/docs/source/preprocessor/transform.rst b/docs/source/preprocessor/transform.rst index 4e6b54d..3703d15 100644 --- a/docs/source/preprocessor/transform.rst +++ b/docs/source/preprocessor/transform.rst @@ -6,6 +6,9 @@ spacekit.preprocessor.transform .. currentmodule:: spacekit.preprocessor.transform +.. toctree:: + :maxdepth: 1 + .. inheritance-diagram:: spacekit.preprocessor.transform :parts: 3 diff --git a/docs/source/skopes/hst.rst b/docs/source/skopes/hst.rst deleted file mode 100644 index bdb06be..0000000 --- a/docs/source/skopes/hst.rst +++ /dev/null @@ -1,10 +0,0 @@ -************************************************ -spacekit.skopes.hst - hubble space telescope api -************************************************ - - -.. toctree:: - :maxdepth: 1 - - Single Visit Mosaic Alignment Modeling - Calcloud Data Pipeline Modeling diff --git a/docs/source/skopes/hst/cal.rst b/docs/source/skopes/hst/cal.rst deleted file mode 100644 index 47d5427..0000000 --- a/docs/source/skopes/hst/cal.rst +++ /dev/null @@ -1,86 +0,0 @@ -****************************************** -spacekit - calcloud data pipeline modeling -****************************************** - -Calcloud Job Predict Data Dictionary ------------------------------------- - - -.. toctree:: - :maxdepth: 2 - - predict - train - - -DETECTOR --------- - -**ACS** - -* WFC (versus HRC or SBC) mosaic of two 2048 x 4096 pixels -* HRC and SBC are on 1024 x 1024 - -**WFC3** - -* UVIS is a mosaic of two 4096 x 2051 pixels -* IR is 1024 x 1024 -* could also use "NAXIS1" and "NAXIS2" which should reflect the size of the images. - -- `detector = 1` : WFC, UVIS -- `detector = 0` : IR, HRC, SBC, et al - - -DRIZCORR --------- - -This will run the drizzling step which can take -a bit of time. Only applies to ACS and WFC3, so values for other instruments will show as NaN and be converted to zero (false). - -- `drizcorr = 1` : PERFORM -- `drizcorr = 0` : OMIT, NaN - -PCTECORR --------- - -This keyword turns on the CTE (charge transfer efficiency) -correction which is compute intensive processing. Only applies to ACS and WFC3. - -- `pctecorr = 1` : PERFORM -- `pctecorr = 0` : OMIT, NaN - - -SUBARRAY --------- - -The subarray readouts will be smaller than the full-frame -images and will process faster. - -- `subarray = 1` : T -- `subarray = 0` : F - ---- - -CRSPLIT -------- - -2 (or at least a value greater than 1) - -This indicates multiple images to be used for cosmic -ray rejection, so multiple input images will be open -at the same time for processing. - -- `crsplit = 0` -- `crsplit = 1` -- `crsplit = 2` - - -N_FILES (XFILES) ----------------- - -Total number of raw input files used in calibration. This feature is normalized and scaled into zero mean and unit variance values (`xfiles`) in order to stabilize variance and minimize skewness of the distribution. - -TOTAL_MB (XSIZE) ----------------- - -Total size in megabytes of all raw files used in calibration. This feature is normalized and scaled into zero mean and unit variance values (`xsize`) in order to stabilize variance and minimize skewness of the distribution. diff --git a/docs/source/skopes/hst/cal/index.rst b/docs/source/skopes/hst/cal/index.rst new file mode 100644 index 0000000..140de0f --- /dev/null +++ b/docs/source/skopes/hst/cal/index.rst @@ -0,0 +1,190 @@ +*********************** +spacekit.skopes.hst.cal +*********************** + +Spacekit HST "Calibration in the Cloud" (calcloud) Job Resource Allocation Model Training + +.. currentmodule:: spacekit.skopes.hst.cal + +.. toctree:: + :maxdepth: 1 + + predict + train + +This script imports and preprocesses job metadata for the Hubble Space Telescope data calibration pipeline, +which is then used as inputs to build, train and evaluate 3 neural networks for estimating AWS batch compute job resource requirements. + +The networks include one multi-class classifier and two linear regression estimators. The classifier predicts which of 4 possible memory bin sizes (and therefore compute instance type) is most appropriate for reprocessing a given ipppssoot (i.e. "job"). The wallclock regressor estimates the maximum execution time ("wallclock" or "kill" time) in seconds needed to complete the job. + +Memory Bin Classifier +--------------------- + +Allocating a job to a memory bin higher than required leads to unnecessary cost increases (because AWS billing tiers are directly related to compute instance types). However, if the estimated memory allocation is too low, the job will fail and have to be re-submitted at the next highest tier, also leading to increased costs. The majority of data being reprocessed for HST falls in the <2GB range, with only a handful in the highest tier (16-64GB). + +Memory Regressor +---------------- + +Essentially identical to the classifier, the difference being that it returns a precise estimation value for memory in GB rather than a class. This is not needed for the pipeline (because it only needs to know which bin size) but we decided to keep the model for reference and analysis purposes. + +Wallclock Regressor +------------------- + +Estimates maximum execution or "kill" time in seconds it will take to complete the job. AWS Batch requires a minimum threshold of 60 seconds, with a large proportion of jobs completing below the one minute mark. Some jobs can take hours or even days to complete - if a job fails in memory after running for 24 hours, it would have to be re-submitted (huge cost). Likewise, if a job is allocated appropriate memory size but fails prematurely because it passes the maximum wallclock threshold, it would have to be resubmitted at a higher time allocation (huge cost). The relationship between memory needs and wallclock time is not linear, hence why there is a need for two separate models. + + +Ex: + +.. code-block:: bash + + python -m spacekit.skopes.hst.cal.train data/2021-11-04-1636048291 + +using this script produces a specific file output structure which can optionally be compressed and uploaded to s3. +The dataset used for training is updated with model predictions and can optionally be stored in a dynamodb table. + + +.. code-block:: python + + """ + |- data/2021-11-04-1636048291 + |- data + |- latest.csv + |- models + |- tx_data.json + |- mem_clf + |- {saved model binaries} + |- mem_reg + |- {saved model binaries} + |- wall_reg + |- {saved model binaries} + |- results + |- mem_bin {standard classifier filenames} + |- acc_loss + |- cmx + |- cmx_norm + |- fnfp + |- history + |- report + |- roc_auc + |- test_idx + |- y_onehot + |- y_pred + |- y_scores + |- memory {standard regressor filenames} + |- history + |- loss + |- predictions + |- residuals + |- test_idx + |- wallclock {standard regressor filenames} + |- history + |- loss + |- predictions + |- residuals + |- test_idx + """ + +examples: + +download data from DynamoDB: + +~/data/timestamp + +upload to DDB: trained dataset with predictions CSV + +~/data/timestamp/data/latest.csv + +upload to s3: trained dataset with predictions CSV, results and models + +~/data/timestamp/ + + +Loading results from disk in a separate session +----------------------------------------------- + +To load results from disk in a separate session (for plotting, analysis etc): + +.. code-block:: python + + bcom = ComputeMulti(res_path=f"{res_path}/mem_bin") + bin_out = bcom.upload() + bcom.load_results(bin_out) + test_idx = bin_out["test_idx"] + + + +Calcloud Job Predict Data Dictionary +------------------------------------ + + +DETECTOR +-------- + +**ACS** + +* WFC (versus HRC or SBC) mosaic of two 2048 x 4096 pixels +* HRC and SBC are on 1024 x 1024 + +**WFC3** + +* UVIS is a mosaic of two 4096 x 2051 pixels +* IR is 1024 x 1024 +* could also use "NAXIS1" and "NAXIS2" which should reflect the size of the images. + +- `detector = 1` : WFC, UVIS +- `detector = 0` : IR, HRC, SBC, et al + + +DRIZCORR +-------- + +This will run the drizzling step which can take +a bit of time. Only applies to ACS and WFC3, so values for other instruments will show as NaN and be converted to zero (false). + +- `drizcorr = 1` : PERFORM +- `drizcorr = 0` : OMIT, NaN + +PCTECORR +-------- + +This keyword turns on the CTE (charge transfer efficiency) +correction which is compute intensive processing. Only applies to ACS and WFC3. + +- `pctecorr = 1` : PERFORM +- `pctecorr = 0` : OMIT, NaN + + +SUBARRAY +-------- + +The subarray readouts will be smaller than the full-frame +images and will process faster. + +- `subarray = 1` : T +- `subarray = 0` : F + +--- + +CRSPLIT +------- + +2 (or at least a value greater than 1) + +This indicates multiple images to be used for cosmic +ray rejection, so multiple input images will be open +at the same time for processing. + +- `crsplit = 0` +- `crsplit = 1` +- `crsplit = 2` + + +N_FILES (XFILES) +---------------- + +Total number of raw input files used in calibration. This feature is normalized and scaled into zero mean and unit variance values (`xfiles`) in order to stabilize variance and minimize skewness of the distribution. + +TOTAL_MB (XSIZE) +---------------- + +Total size in megabytes of all raw files used in calibration. This feature is normalized and scaled into zero mean and unit variance values (`xsize`) in order to stabilize variance and minimize skewness of the distribution. diff --git a/docs/source/skopes/hst/cal/predict.rst b/docs/source/skopes/hst/cal/predict.rst index 41f3316..9752659 100644 --- a/docs/source/skopes/hst/cal/predict.rst +++ b/docs/source/skopes/hst/cal/predict.rst @@ -2,6 +2,7 @@ spacekit.skopes.hst.cal.predict ******************************* + .. currentmodule:: spacekit.skopes.hst.cal.predict .. automodule:: spacekit.skopes.hst.cal.predict diff --git a/docs/source/skopes/hst/cal/train.rst b/docs/source/skopes/hst/cal/train.rst index ef923d6..1ad947c 100644 --- a/docs/source/skopes/hst/cal/train.rst +++ b/docs/source/skopes/hst/cal/train.rst @@ -2,101 +2,8 @@ spacekit.skopes.hst.cal.train ***************************** + .. currentmodule:: spacekit.skopes.hst.cal.train .. automodule:: spacekit.skopes.hst.cal.train - :members: - - -Spacekit HST "Calibration in the Cloud" (calcloud) Job Resource Allocation Model Training - -This script imports and preprocesses job metadata for the Hubble Space Telescope data calibration pipeline, -which is then used as inputs to build, train and evaluate 3 neural networks for estimating AWS batch compute job resource requirements. - -The networks include one multi-class classifier and two linear regression estimators. The classifier predicts which of 4 possible memory bin sizes (and therefore compute instance type) is most appropriate for reprocessing a given ipppssoot (i.e. "job"). The wallclock regressor estimates the maximum execution time ("wallclock" or "kill" time) in seconds needed to complete the job. - -Memory Bin Classifier ---------------------- -Allocating a job to a memory bin higher than required leads to unnecessary cost increases (because AWS billing tiers are directly related to compute instance types). However, if the estimated memory allocation is too low, the job will fail and have to be re-submitted at the next highest tier, also leading to increased costs. The majority of data being reprocessed for HST falls in the <2GB range, with only a handful in the highest tier (16-64GB). - -Memory Regressor ----------------- -Essentially identical to the classifier, the difference being that it returns a precise estimation value for memory in GB rather than a class. This is not needed for the pipeline (because it only needs to know which bin size) but we decided to keep the model for reference and analysis purposes. - -Wallclock Regressor -------------------- -Estimates maximum execution or "kill" time in seconds it will take to complete the job. AWS Batch requires a minimum threshold of 60 seconds, with a large proportion of jobs completing below the one minute mark. Some jobs can take hours or even days to complete - if a job fails in memory after running for 24 hours, it would have to be re-submitted (huge cost). Likewise, if a job is allocated appropriate memory size but fails prematurely because it passes the maximum wallclock threshold, it would have to be resubmitted at a higher time allocation (huge cost). The relationship between memory needs and wallclock time is not linear, hence why there is a need for two separate models. - -Ex: -python -m spacekit.skopes.hst.cal.train data/2021-11-04-1636048291 - -using this script produces a specific file output structure which can optionally be compressed and uploaded to s3. -The dataset used for training is updated with model predictions and can optionally be stored in a dynamodb table. - - -.. code-block:: python - - """ - |- data/2021-11-04-1636048291 - |- data - |- latest.csv - |- models - |- tx_data.json - |- mem_clf - |- {saved model binaries} - |- mem_reg - |- {saved model binaries} - |- wall_reg - |- {saved model binaries} - |- results - |- mem_bin {standard classifier filenames} - |- acc_loss - |- cmx - |- cmx_norm - |- fnfp - |- history - |- report - |- roc_auc - |- test_idx - |- y_onehot - |- y_pred - |- y_scores - |- memory {standard regressor filenames} - |- history - |- loss - |- predictions - |- residuals - |- test_idx - |- wallclock {standard regressor filenames} - |- history - |- loss - |- predictions - |- residuals - |- test_idx - """ - -examples: - -download data from DynamoDB: - -~/data/timestamp - -upload to DDB: trained dataset with predictions CSV - -~/data/timestamp/data/latest.csv - -upload to s3: trained dataset with predictions CSV, results and models - -~/data/timestamp/ - -Loading results from disk in a separate session ------------------------------------------------ - -To load results from disk in a separate session (for plotting, analysis etc): - -.. code-block:: python - - > bcom = ComputeMulti(res_path=f"{res_path}/mem_bin") - > bin_out = bcom.upload() - > bcom.load_results(bin_out) - > test_idx = bin_out["test_idx"] + :members: \ No newline at end of file diff --git a/docs/source/skopes/hst/index.rst b/docs/source/skopes/hst/index.rst new file mode 100644 index 0000000..b67a056 --- /dev/null +++ b/docs/source/skopes/hst/index.rst @@ -0,0 +1,13 @@ +******************* +spacekit.skopes.hst +******************* + +hubble space telescope machine learning api + +.. currentmodule:: spacekit.skopes.hst + +.. toctree:: + :maxdepth: 1 + + Single Visit Mosaic Alignment Modeling + Calcloud Data Pipeline Modeling diff --git a/docs/source/skopes/hst/svm/corrupt.rst b/docs/source/skopes/hst/svm/corrupt.rst index b5cdcea..783e2f3 100644 --- a/docs/source/skopes/hst/svm/corrupt.rst +++ b/docs/source/skopes/hst/svm/corrupt.rst @@ -1,3 +1,8 @@ +.. _corrupt + ******************************* spacekit.skopes.hst.svm.corrupt ******************************* + +.. currentmodule:: spacekit.skopes.hst.svm.corrupt + diff --git a/docs/source/skopes/hst/svm.rst b/docs/source/skopes/hst/svm/index.rst similarity index 87% rename from docs/source/skopes/hst/svm.rst rename to docs/source/skopes/hst/svm/index.rst index d39d39b..3429e51 100644 --- a/docs/source/skopes/hst/svm.rst +++ b/docs/source/skopes/hst/svm/index.rst @@ -1,6 +1,12 @@ -************************************************* -spacekit - single visit mosaic alignment modeling -************************************************* +.. _svm + +*********************** +spacekit.skopes.hst.svm +*********************** + +single visit mosaic alignment modeling + +.. currentmodule:: spacekit.skopes.hst.svm How-To ------- @@ -10,12 +16,12 @@ How-To * Generate Synthetic Corruption Images (artificial corruption data) .. toctree:: - :maxdepth: 2 + :maxdepth: 1 - prep - predict - train - corrupt + prep + predict + train + corrupt Background Summary ------------------ @@ -53,7 +59,8 @@ Dataset --- -## Setup +Setup +----- **Install with pip** @@ -69,22 +76,24 @@ Dataset $ pip install -e . -## Run +Run +--- **Example: HST Single Visit Mosaic Alignment Classification** -### Classify new data using pre-trained model: +Classify new data using pre-trained model: + 1. Preprocess data (scrape from regression test json and fits files, scrub/preprocess dataframe, generate png images for ML) -***from the command line*** +**from the command line** .. code-block:: bash $ python -m spacekit.skopes.hst.svm.prep path/to/svmdata -f=svm_data.csv -***from python*** +**from python** .. code-block:: python @@ -104,14 +113,14 @@ Outputs: 2. Generate predictions -***from the command line*** +**from the command line** .. code-block:: bash $ python -m spacekit.skopes.hst.svm.predict svm_data.csv img -***from python*** +**from python** .. code-block:: python @@ -131,11 +140,12 @@ Outputs: ---- -### Build, train, evaluate new classifier from labeled data +Build, train, evaluate new classifier from labeled data +------------------------------------------------------- Run step 1 (prep) above, then: -***from the command line*** +**from the command line** .. code-block:: bash @@ -143,7 +153,7 @@ Run step 1 (prep) above, then: $ python -m spacekit.skopes.hst.svm.train svm_data.csv img -***from Python*** +**from Python** .. code-block:: python diff --git a/docs/source/skopes/hst/svm/predict.rst b/docs/source/skopes/hst/svm/predict.rst index f656184..3143d33 100644 --- a/docs/source/skopes/hst/svm/predict.rst +++ b/docs/source/skopes/hst/svm/predict.rst @@ -1,7 +1,10 @@ +.. _predict + ******************************* spacekit.skopes.hst.svm.predict ******************************* + .. currentmodule:: spacekit.skopes.hst.svm.predict .. automodule:: spacekit.skopes.hst.svm.predict diff --git a/docs/source/skopes/hst/svm/prep.rst b/docs/source/skopes/hst/svm/prep.rst index cb5650b..cd33654 100644 --- a/docs/source/skopes/hst/svm/prep.rst +++ b/docs/source/skopes/hst/svm/prep.rst @@ -1,7 +1,10 @@ +.. _prep + **************************** spacekit.skopes.hst.svm.prep **************************** + .. currentmodule:: spacekit.skopes.hst.svm.prep .. automodule:: spacekit.skopes.hst.svm.prep diff --git a/docs/source/skopes/hst/svm/train.rst b/docs/source/skopes/hst/svm/train.rst index 7732e6b..19f2733 100644 --- a/docs/source/skopes/hst/svm/train.rst +++ b/docs/source/skopes/hst/svm/train.rst @@ -1,7 +1,10 @@ +.. _train + ***************************** spacekit.skopes.hst.svm.train ***************************** + .. currentmodule:: spacekit.skopes.hst.svm.train .. automodule:: spacekit.skopes.hst.svm.train diff --git a/docs/source/skopes/index.rst b/docs/source/skopes/index.rst new file mode 100644 index 0000000..fee37f4 --- /dev/null +++ b/docs/source/skopes/index.rst @@ -0,0 +1,17 @@ +.. _skopes + +*************** +spacekit.skopes +*************** + +Machine Learning Applications +============================= + +.. toctree:: + :maxdepth: 2 + + JWST + + HST + + K2/Kepler diff --git a/docs/source/skopes/jwst.rst b/docs/source/skopes/jwst.rst deleted file mode 100644 index 586e1c0..0000000 --- a/docs/source/skopes/jwst.rst +++ /dev/null @@ -1,9 +0,0 @@ -***************************************************** -spacekit.skopes.jwst - james webb space telescope api -***************************************************** - - -.. toctree:: - :maxdepth: 1 - - Calibration Pipeline Resource Prediction Modeling diff --git a/docs/source/skopes/jwst/cal.rst b/docs/source/skopes/jwst/cal/index.rst similarity index 66% rename from docs/source/skopes/jwst/cal.rst rename to docs/source/skopes/jwst/cal/index.rst index 4836d4d..901e1fd 100644 --- a/docs/source/skopes/jwst/cal.rst +++ b/docs/source/skopes/jwst/cal/index.rst @@ -1,16 +1,20 @@ -***************************************************************** -spacekit - JWST calibration pipeline resource prediction modeling -***************************************************************** +************************ +spacekit.skopes.jwst.cal +************************ -Inference ---------- +Resource Estimation for the JWST Calibration Pipeline +----------------------------------------------------- + +Inference: Generate estimated memory footprints on unlabeled data. + +Training: Build and train machine learning models for Image and Spec Level 3 pipelines -Generate estimated memory footprints on unlabeled data. .. toctree:: :maxdepth: 2 - predict + predict + train Setup @@ -33,14 +37,14 @@ Setup Run Inference ------------- -***from the command line*** +**from the command line** .. code-block:: bash $ python -m spacekit.skopes.jwst.cal.predict /path/to/inputs -***from python*** +**from python** .. code-block:: python diff --git a/docs/source/skopes/jwst/cal/predict.rst b/docs/source/skopes/jwst/cal/predict.rst index 804e692..991f209 100644 --- a/docs/source/skopes/jwst/cal/predict.rst +++ b/docs/source/skopes/jwst/cal/predict.rst @@ -4,6 +4,10 @@ spacekit.skopes.jwst.cal.predict ******************************** +.. toctree:: + :maxdepth: 1 + + .. currentmodule:: spacekit.skopes.jwst.cal.predict .. automodule:: spacekit.skopes.jwst.cal.predict diff --git a/docs/source/skopes/jwst/cal/train.rst b/docs/source/skopes/jwst/cal/train.rst new file mode 100644 index 0000000..c19997a --- /dev/null +++ b/docs/source/skopes/jwst/cal/train.rst @@ -0,0 +1,13 @@ +.. _train: + +****************************** +spacekit.skopes.jwst.cal.train +****************************** + +.. toctree:: + :maxdepth: 1 + +.. .. currentmodule:: spacekit.skopes.jwst.cal.train + +.. .. automodule:: spacekit.skopes.jwst.cal.train +.. :members: diff --git a/docs/source/skopes/jwst/index.rst b/docs/source/skopes/jwst/index.rst new file mode 100644 index 0000000..128dfa1 --- /dev/null +++ b/docs/source/skopes/jwst/index.rst @@ -0,0 +1,13 @@ +******************** +spacekit.skopes.jwst +******************** + +JWST Machine Learning API +========================= + +.. toctree:: + :maxdepth: 1 + + Calibration Pipeline Resource Prediction Modeling + +.. currentmodule:: spacekit.skopes.jwst \ No newline at end of file diff --git a/docs/source/skopes/kepler/index.rst b/docs/source/skopes/kepler/index.rst new file mode 100644 index 0000000..7066148 --- /dev/null +++ b/docs/source/skopes/kepler/index.rst @@ -0,0 +1,10 @@ +********************** +spacekit.skopes.kepler +********************** + +.. currentmodule:: spacekit.skopes.kepler + +.. toctree:: + :maxdepth: 2 + + light_curves diff --git a/docs/source/skopes/kepler/light-curves.rst b/docs/source/skopes/kepler/light-curves.rst deleted file mode 100644 index 9b77eb4..0000000 --- a/docs/source/skopes/kepler/light-curves.rst +++ /dev/null @@ -1,3 +0,0 @@ -********************************** -spacekit - kepler light-curves api -********************************** \ No newline at end of file diff --git a/docs/source/skopes/kepler/light_curves.rst b/docs/source/skopes/kepler/light_curves.rst new file mode 100644 index 0000000..6955df7 --- /dev/null +++ b/docs/source/skopes/kepler/light_curves.rst @@ -0,0 +1,7 @@ +.. _light_curves + +********************************** +spacekit.skopes.kepler.light_curves +********************************** + +.. currentmodule:: spacekit.skopes.kepler.light_curves \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index 2490ac8..6d7f3f7 100644 --- a/setup.cfg +++ b/setup.cfg @@ -53,11 +53,12 @@ test = ruff docs = sphinx - sphinx_rtd_theme + furo pydot graphviz sphinx-automodapi numpydoc + sphinx-autobuild x = astroquery matplotlib<4 diff --git a/spacekit/preprocessor/transform.py b/spacekit/preprocessor/transform.py index d7f7732..bea11c9 100644 --- a/spacekit/preprocessor/transform.py +++ b/spacekit/preprocessor/transform.py @@ -209,7 +209,7 @@ def get_pixel_offsets(self, exp_data): refpix["t_offset"] = self.pixel_sky_separation( refpix["TARG_RA"], refpix["TARG_DEC"], pcoord, refpix["scale"] ) - except ValueError: + except (ValueError, TypeError): self.log.debug("TARG/GS RA DEC vals missing or NaN - setting to 0.0") return refpix