diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..e69de29 diff --git a/.github/workflows/build_docs.yaml b/.github/workflows/build_docs.yaml new file mode 100644 index 0000000..383b484 --- /dev/null +++ b/.github/workflows/build_docs.yaml @@ -0,0 +1,71 @@ +name: Build the documentation + +on: + push: + branches: + - main + +permissions: + contents: write + +jobs: + build-docs: + concurrency: ci-${{ github.ref }} + name: Build docs (${{ matrix.python-version }}, ${{ matrix.os }}) + runs-on: ${{ matrix.os }} + defaults: + run: + shell: bash -l {0} + strategy: + matrix: + os: ["ubuntu-latest"] + python-version: ["3.10"] + + steps: + # Grap the latest commit from the branch + - name: Checkout the branch + uses: actions/checkout@v3.5.2 + with: + persist-credentials: false + + # Create a virtual environment + - name: create Conda environment + uses: conda-incubator/setup-miniconda@v2 + with: + auto-update-conda: true + python-version: ${{ matrix.python-version }} + + # Install katex for math support + - name: Install NPM + uses: actions/setup-node@v3 + with: + node-version: 16 + - name: Install KaTeX + run: | + npm install katex + # Install Poetry and build the documentation + - name: Install and configure Poetry + uses: snok/install-poetry@v1 + with: + version: 1.2.2 + virtualenvs-create: false + virtualenvs-in-project: false + installer-parallel: true + + - name: Install LaTex + run: | + sudo apt-get update + sudo apt-get install texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra dvipng cm-super + + - name: Build the documentation with MKDocs + run: | + cp docs/examples/gpjax.mplstyle . + poetry install --all-extras --with docs + conda install pandoc + poetry run mkdocs build + + - name: Deploy Page 🚀 + uses: JamesIves/github-pages-deploy-action@v4.4.1 + with: + branch: gh-pages + folder: site \ No newline at end of file diff --git a/README.md b/README.md index f37470b..6674355 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,177 @@ -# A minimal library for preprocessing remote sensing data for machine learning applications (In Progress) -[![CodeFactor](https://www.codefactor.io/repository/github/jejjohnson/rs_tools/badge)](https://www.codefactor.io/repository/github/jejjohnson/rs_tools) -[![codecov](https://codecov.io/gh/jejjohnson/rs_tools/branch/main/graph/badge.svg?token=YGPQQEAK91)](https://codecov.io/gh/jejjohnson/rs_tools) +# `rs-tools` -> This package has some simple, minimal preprocessing of helio-data to make it machine learning ready. +## What are RS-Tools? +`rs_tools` is a toolbox of functions designed to +There is a high barrier to entry when working with remote sensing data for machine learning (ML) research. +This is especially true for level 1 data which is typically raw radiance observations. +There are often many domain-specific transformations that can completely make or break the success of the ML task. +`rs_tools` seeks to lower the barrier to entry cost for ML researchers to make meaningful progress when dealing with remote sensing data. +It features a standardized, transparent and flexible procedure for defining data and evaluation pipelines for data-intensive level 1 data products. ---- +*** +### Agnostic Toolbox of Functions + +We provide a suite of useful functions which can be used to clean level-1 remote sensing data to be used for downstream tasks. +It is an agnostic suite of functions that can be piped together to create preprocessing and evaluation chains. +We take care of all of the nitty-gritty details which are often common for these types of datasets. +However, we take care not to hard-code anything and try to be as transparent as possible so that users can understand and modify the scripts for their own use cases. + +*** +### Pipelines + +We provide some hydra-integrated pipelines which allow users to do some high-level processing to produce ML-ready datasets. +We follow best principles to be as agnostic as possible so that users are not bound by any ML-framework. +In addition, we provide many small bite-sized functions which users can piece together in their own way for their own applications. + + +*** +#### Data Downloader + +With a few simple commands, we can download some raw level 1 data products with minimum preprocessing. +We currently have data downloaders for [MODIS Level 1](https://spaceml-org.github.io/rs_tools/datasets/modis) data, [MSG Level 1](https://spaceml-org.github.io/rs_tools/datasets/msg) data, and [GOES16 Level 1](https://spaceml-org.github.io/rs_tools/datasets/goes/) data. + + +A user can get started right away by simply running the following snippet in the command line. + +```bash +# GOES 16 +python rs_tools satellite=goes stage=download +# MODIS - AQUA (or TERRA) +python rs_tools satellite=aqua stage=download +# MSG +python rs_tools satellite=msg stage=download +``` + + +*** +#### Analysis-Ready Data + +We have scripts to generate some *analysis-ready data*. +These are datasets that have been harmonized under a common data structure. +We try to keep as much meta-data as possible which could be useful for downstream tasks, e.g., coordinates, time stamps, units and cloud masks. +A user can do some further analysis on these + +
<xarray.Dataset> Size: 10MB\n", + "Dimensions: (x: 302, y: 207, time: 1, band_wavelength: 16, band: 16)\n", + "Coordinates: (8)\n", + "Data variables: (2)\n", + "Attributes: (12/30)\n", + " naming_authority: gov.nesdis.noaa\n", + " Conventions: CF-1.7\n", + " standard_name_vocabulary: CF Standard Name Table (v35, 20 July 2016)\n", + " institution: DOC/NOAA/NESDIS > U.S. Department of Commerce,...\n", + " project: GOES\n", + " production_site: RBU\n", + " ... ...\n", + " timeline_id: ABI Mode 6\n", + " date_created: 2020-10-01T15:09:56.5Z\n", + " time_coverage_start: 2020-10-01T15:00:19.6Z\n", + " time_coverage_end: 2020-10-01T15:09:50.4Z\n", + " LUT_Filenames: SpaceLookParams(FM1A_CDRL79RevP_PR_09_00_02)-6...\n", + " id: ae981973-758f-4213-b71e-e619d91ddddb
<xarray.DataArray (variable: 2, band: 16, y: 64, x: 64)> Size: 524kB\n", + "60.15 62.58 94.26 103.2 103.2 113.7 112.9 112.9 ... nan nan nan nan nan nan nan\n", + "Coordinates: (9)\n", + "Attributes: (12/30)\n", + " naming_authority: gov.nesdis.noaa\n", + " Conventions: CF-1.7\n", + " standard_name_vocabulary: CF Standard Name Table (v35, 20 July 2016)\n", + " institution: DOC/NOAA/NESDIS > U.S. Department of Commerce,...\n", + " project: GOES\n", + " production_site: RBU\n", + " ... ...\n", + " timeline_id: ABI Mode 6\n", + " date_created: 2020-10-01T15:09:56.5Z\n", + " time_coverage_start: 2020-10-01T15:00:19.6Z\n", + " time_coverage_end: 2020-10-01T15:09:50.4Z\n", + " LUT_Filenames: SpaceLookParams(FM1A_CDRL79RevP_PR_09_00_02)-6...\n", + " id: ae981973-758f-4213-b71e-e619d91ddddb