diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index a7f6ad61..57640182 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -26,4 +26,4 @@ solve the problem. - Other details about your setup that could be relevant # Additional context -Add any other context about the problem here. +Add any other context about the problem here, including expected behaviour. diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md index 20730861..da43edc7 100644 --- a/.github/ISSUE_TEMPLATE/question.md +++ b/.github/ISSUE_TEMPLATE/question.md @@ -15,5 +15,5 @@ the full context of your question. ## Configuration - OS: [e.g. Hal] - - Version [e.g. Python 3.47] + - Version: [e.g. Python 3.47] - Other details about your setup that could be relevant diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 5fa8548d..24fa713d 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -1,6 +1,6 @@ # Description -Addresses # (issue) +Addresses #(issue) Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required @@ -47,6 +47,7 @@ Test B - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged and published in downstream modules - [ ] Add a note to ``CHANGELOG.md``, summarizing the changes +- [ ] Update zenodo.json file for new code contributors If this is a release PR, replace the first item of the above checklist with the release checklist on the pysat wiki: diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 0c024531..8cf4ccf0 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -1,5 +1,6 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions +# This workflow will install Python dependencies, run tests and lint with a +# variety of Python versions. For more information see: +# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Documentation Check @@ -8,24 +9,21 @@ on: [push, pull_request] jobs: build: - runs-on: ubuntu-latest + runs-on: ["ubuntu-latest"] strategy: fail-fast: false matrix: - python-version: [3.9] + python-version: ["3.11"] name: Documentation tests steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r test_requirements.txt - pip install -r requirements.txt + - name: Install with dependencies + run: pip install .[doc] - name: Set up pysat run: | diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e9cd42cc..57653141 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -12,35 +12,47 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest] # TODO(76): add windows-latest - python-version: ["3.9", "3.10"] - numpy_ver: ["latest"] + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.10", "3.11"] + test_config: ["latest"] include: - - python-version: "3.8" - numpy_ver: "1.21" + # NEP29 compliance settings + - python-version: "3.9" + numpy_ver: "1.23" os: ubuntu-latest + test_config: "NEP29" + # Operational compliance settings - python-version: "3.6.8" numpy_ver: "1.19.5" os: "ubuntu-20.04" + test_config: "Ops" - name: Python ${{ matrix.python-version }} on ${{ matrix.os }} with numpy ${{ matrix.numpy_ver }} + name: ${{ matrix.test_config }} Python ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - - name: Install standard dependencies + - name: Install Operational dependencies + if: ${{ matrix.test_config == 'Ops'}} run: | + pip install numpy==${{ matrix.numpy_ver }} + pip install -r requirements.txt pip install -r test_requirements.txt - pip install -r requirements.txt; + pip install . - name: Install NEP29 dependencies - if: ${{ matrix.numpy_ver != 'latest'}} + if: ${{ matrix.test_config == 'NEP29'}} run: | pip install numpy==${{ matrix.numpy_ver }} + pip install --upgrade-strategy only-if-needed .[test] + + - name: Install standard dependencies + if: ${{ matrix.test_config == 'latest'}} + run: pip install .[test] - name: Set up pysat run: | @@ -54,9 +66,22 @@ jobs: run: flake8 . --count --exit-zero --max-complexity=10 --statistics - name: Test with pytest - run: pytest -x --cov=pysatMadrigal/ + run: pytest - name: Publish results to coveralls env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: coveralls --rcfile=setup.cfg --service=github + COVERALLS_PARALLEL: true + run: coveralls --rcfile=pyproject.toml --service=github + + finish: + name: Finish Coverage Analysis + needs: build + runs-on: ubuntu-latest + steps: + - name: Coveralls Finished + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + pip install --upgrade coveralls + coveralls --service=github --finish diff --git a/.github/workflows/pip_rc_install.yml b/.github/workflows/pip_rc_install.yml new file mode 100644 index 00000000..ca7b8a68 --- /dev/null +++ b/.github/workflows/pip_rc_install.yml @@ -0,0 +1,41 @@ +# This workflow will install Python dependencies and the latest RC of +# pysatMadrigal from test pypi. This test should be manually run before an RC is +# officially approved and versioned. For more information see: +# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Test install of latest RC from pip + +on: [workflow_dispatch] + +jobs: + build: + strategy: + fail-fast: false + matrix: + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.12"] # Keep this version at the highest supported Python version + + name: Python ${{ matrix.python-version }} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install standard dependencies + run: pip install -r requirements.txt + + - name: Install pysatMadrigal RC + run: pip install --no-deps --pre -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ pysatMadrigal + + - name: Set up pysat + run: | + mkdir pysatData + python -c "import pysat; pysat.params['data_dirs'] = 'pysatData'" + + - name: Check that install imports correctly + run: | + cd .. + python -c "import pysatMadrigal; print(pysatMadrigal.__version__)" diff --git a/.github/workflows/pysat_rc.yml b/.github/workflows/pysat_rc.yml index 2e4ce4ea..3b7a80f7 100644 --- a/.github/workflows/pysat_rc.yml +++ b/.github/workflows/pysat_rc.yml @@ -1,5 +1,6 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions +# This workflow will install Python dependencies, run tests and lint with a +# variety of Python versions. For more information see: +# https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Test with latest pysat RC @@ -10,26 +11,23 @@ jobs: strategy: fail-fast: false matrix: -# TODO(#76): add windows tests when bugs are sorted - os: [ubuntu-latest, macos-latest] - python-version: ["3.10"] + os: ["ubuntu-latest", "macos-latest", "windows-latest"] + python-version: ["3.11"] name: Python ${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install pysat RC - run: pip install --no-deps -i https://test.pypi.org/simple/ pysat + run: pip install --pre -i https://test.pypi.org/simple/ --extra-index-url https://pypi.org/simple/ pysat - - name: Install standard dependencies - run: | - pip install -r requirements.txt - pip install -r test_requirements.txt + - name: Install standard dependencies and package + run: pip install .[test] - name: Set up pysat run: | @@ -37,9 +35,22 @@ jobs: python -c "import pysat; pysat.params['data_dirs'] = 'pysatData'" - name: Test with pytest - run: pytest -vs -x --cov=pysatMadrigal/ + run: pytest - name: Publish results to coveralls env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - run: coveralls --rcfile=setup.cfg --service=github + COVERALLS_PARALLEL: true + run: coveralls --rcfile=pyproject.toml --service=github + + finish: + name: Finish Coverage Analysis + needs: build + runs-on: ubuntu-latest + steps: + - name: Coveralls Finished + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + pip install --upgrade coveralls + coveralls --service=github --finish diff --git a/.readthedocs.yml b/.readthedocs.yml new file mode 100644 index 00000000..b198cf86 --- /dev/null +++ b/.readthedocs.yml @@ -0,0 +1,25 @@ +# .readthedocs.yml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required version of readthedocs +version: 2 + +# Set the version of Python and other tools you might need +build: + os: ubuntu-22.04 + tools: + python: "3.11" + +# Build documentation in the docs/ directory with Sphinx +sphinx: + configuration: docs/conf.py + + +# Declare the Python requirements required to build your docs +python: + install: + - method: pip + path: . + extra_requirements: + - doc diff --git a/ACKNOWLEDGEMENTS.md b/ACKNOWLEDGEMENTS.md new file mode 100644 index 00000000..9d124013 --- /dev/null +++ b/ACKNOWLEDGEMENTS.md @@ -0,0 +1,31 @@ +Funding +======= +The following institutions, missions, and programs have provided funding +for pysatMadrigal development. + +Institutions +------------ + - The Catholic University of America (CUA) + - Cosmic Studio + - Defense Advanced Research Projects Agency (DARPA) Defense Sciences Office + - National Aeronautics and Space Administration (NASA) + - National Science Foundation (NSF) + - Office of Naval Research (ONR) + +Programs +-------- + - NSF 125908, AGS-1651393 + - Naval Research Laboratory N00173191G016 and N0017322P0744 + +Disclaimers +=========== +Any opinions or actions taken by the listed funding institutions are those of +the institutions and do not necessarily reflect the views of the pysat development +team or individual authors. Any opinions, findings, and conclusions or recommendations +expressed in this material are those of the author(s) and do not necessarily reflect the views +of the funding agencies. + +Support and Thanks +================== +Thanks to Bill Rideout, who provided help when learning how to use the +madrigalWeb package. diff --git a/CHANGELOG.md b/CHANGELOG.md index 36c26b87..799fb67e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,24 @@ Change Log All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/). +[0.2.0] - 2024-03-15 +-------------------- +* Enhancements + * Moved the OMNI-2 IMF, Dst, and Geomagnetic index Instruments from the + general Madrigal Pandas instrument into new ones + * Moved the NGDC AE index Instrument from the general Madrigal Pandas + instrument to a new one, fixing the Windows memory issue and a problem + with duplicated times + * Added slant TEC (tag of 'los') to the gnss_tec Instrument + * Refactored general load function to extract useful parts of the code that + were used for specific load functions +* Maintenance + * Add manual GitHub Actions tests for pysatMadrigal RC + * Update GitHub Actions workflows for newer versions of pip, updated actions + * Added clean warning test attributes to all Instruments + * Updated documentation to comply with current Ecosystem guidelines + * Replaced setup.py with pyproject.toml + [0.1.0] - 2023-04-11 -------------------- * Enhancements @@ -16,6 +34,7 @@ This project adheres to [Semantic Versioning](https://semver.org/). between '.' delimiters, required for some Madrigal file formats * Standardized the Instrument method kwarg defaults * Added 'site' tag to the GNSS TEC Instrument + * Added a 'dmsp_ssj' instrument for Auroral Boundary Index data * Added support for varied use of `two_digit_year_break` to `methods.general.list_remote_files` * Implemented `two_digit_year_break` support for `vtec` GNSS TEC Instrument @@ -29,11 +48,13 @@ This project adheres to [Semantic Versioning](https://semver.org/). * Added quick-fail for main pytest command * Bug * Fixed bugs in the coordinate conversion functions + * Fixed bug in the general download function that sets the stop date * Maintenance * Updated GitHub action and NEP29 versions * Updated the minimum Madrigal version to allow HDF4 downloads * Update pysat instrument testing suite, pytest syntax * Add manual GitHub Actions tests for pysat RC + * Removed code needed to work around pysat bugs [0.0.4] - 2021-06-11 -------------------- diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 483ea9ac..ee4a9e76 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -56,9 +56,9 @@ project may be further defined and clarified by project maintainers. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at pysat.developers@gmail.com. The -project team will review and investigate all complaints, and will respond in a -way that it deems appropriate to the circumstances. The project team is -obligated to maintain confidentiality with regard to the reporter of an +pysatMadrigal project team will review and investigate all complaints, and will +respond in a way that it deems appropriate to the circumstances. The project +team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. @@ -69,7 +69,14 @@ members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 1.4, available at [https://contributor-covenant.org/version/1/4][version] +version 1.4, available at +[https://contributor-covenant.org/version/1/4][version] + +## FAQ + +For answers to common questions about this code of conduct, see +[https://www.contributor-covenant.org/faq][faq] [homepage]: https://contributor-covenant.org [version]: https://contributor-covenant.org/version/1/4/ +[faq]: https://www.contributor-covenant.org/faq diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index a81865e6..956b6f84 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -5,15 +5,21 @@ Bug reports, feature suggestions and other contributions are greatly appreciated! pysat and pysatMadrigal are community-driven projects that welcome both feedback and contributions. +Come join us on Slack! An invitation to the pysat workspace is available +in the 'About' section of the +[pysat GitHub Repository.](https://github.com/pysat/pysat) Development meetings +are generally held fortnightly. + Short version ------------- * Submit bug reports, feature requests, and questions at -`GitHub Issues `_ +[GitHub](https://github.com/pysat/pysatMadrigal/issues) + * Make pull requests to the ``develop`` branch -More about Issues ------------------ +Issues +------ Bug reports, questions, and feature requests should all be made as GitHub Issues. Templates are provided for each type of issue, to help you include @@ -24,73 +30,112 @@ Questions Not sure how something works? Ask away! The more information you provide, the easier the question will be to answer. You can also interact with the pysat -developers on our `slack channel `_. +developers on our [slack channel](https://pysat.slack.com). Bug reports ^^^^^^^^^^^ -When reporting a bug please include: +When [reporting a bug](https://github.com/pysat/pysatMadrigal/issues) please +include: * Your operating system name and version + * Any details about your local setup that might be helpful in troubleshooting + * Detailed steps to reproduce the bug -Feature requests -^^^^^^^^^^^^^^^^ +Feature requests and feedback +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The best way to send feedback is to file an +[issue](https://github.com/pysat/pysatMadrigal/issues). If you are proposing a new feature or a change in something that already exists: * Explain in detail how it would work. + * Keep the scope as narrow as possible, to make it easier to implement. + * Remember that this is a volunteer-driven project, and that code contributions are welcome :) -More about Development ----------------------- +Development +----------- To set up `pysatMadrigal` for local development: -1. Fork pysatMadrigal on - `GitHub `_. -2. Clone your fork locally:: +1. Fork [pysatMadrigal on GitHub](https://github.com/pysat/pysatMadrigal/fork). + +2. Clone your fork locally: + ``` git clone git@github.com:your_name_here/pysatMadrigal.git + ``` -3. Create a branch for local development:: +3. Create a branch for local development: + ``` git checkout -b name-of-your-bugfix-or-feature + ``` -4. Make your changes locally. Tests for new instruments are performed - automatically. Tests for custom functions should be added to the - appropriately named file in ``pysatMadrigal/tests``. For example, - Jicamarca methods containined in ``pysatMadrigal/instruments/methods/jro.py`` - should be named ``pysatMadrigal/tests/test_methods_jro.py``. If no test - file exists, then you should create one. This testing uses pytest, which - will run tests on any python file in the test directory that starts with - ``test``. Test classes must begin with ``Test``, and test methods must also - begin with ``test``. + Now you can make your changes locally. -5. When you're done making changes, run all the checks to ensure that nothing - is broken on your local system:: + Tests for new instruments are performed automatically. See discussion + [here](https://pysat.readthedocs.io/en/main/new_instrument.html#testing-support) + for more information on triggering these standard tests. - pytest -vs pysatMadrigal + Tests for custom functions should be added to the appropriately named file + in ``pysatMadrigal/tests``. For example, custom functions for the time + utilities are tested in ``pysat/tests/test_utils_time.py``. If no test file + exists, then you should create one. This testing uses pytest, which will run + tests on any python file in the test directory that starts with ``test``. + Classes must begin with ``Test``, and methods must begin with ``test`` as + well. + +4. When you're done making changes, run all the checks to ensure that nothing + is broken on your local system: + + ``` + pytest + + ``` + +5. You should also check for flake8 style compliance: + + ``` + flake8 . --count --select=D,E,F,H,W --show-source --statistics + ``` + + Note that pysat uses the `flake-docstrings` and `hacking` packages to ensure + standards in docstring formatting. 6. Update/add documentation (in ``docs``). Even if you don't think it's relevant, check to see if any existing examples have changed. 7. Add your name to the .zenodo.json file as an author -8. Commit your changes and push your branch to GitHub:: +8. Commit your changes: + ``` + git add . + git commit -m "AAA: Brief description of your changes" + ``` + Where AAA is a standard shorthand for the type of change (eg, BUG or DOC). + `pysat` follows the [numpy development workflow](https://numpy.org/doc/stable/dev/development_workflow.html), + see the discussion there for a full list of this shorthand notation. - git add . - git commit -m "Brief description of your changes" - git push origin name-of-your-bugfix-or-feature +9. Once you are happy with the local changes, push to GitHub: + ``` + git push origin name-of-your-bugfix-or-feature + ``` + Note that each push will trigger the Continuous Integration workflow. + +10. Submit a pull request through the GitHub website. Pull requests should be + made to the ``develop`` branch. Note that automated tests will be run on + GitHub Actions, but these must be initialized by a member of the pysat team. -9. Submit a pull request through the GitHub website. Pull requests should be - made to the ``develop`` branch. Pull Request Guidelines -^^^^^^^^^^^^^^^^^^^^^^^ +----------------------- If you need some code review or feedback while you're developing the code, just make a pull request. Pull requests should be made to the ``develop`` branch. @@ -99,17 +144,18 @@ For merging, you should: 1. Include an example for use 2. Add a note to ``CHANGELOG.md`` about the changes -3. Ensure that all checks passed (current checks include Travis-CI - and Coveralls) [1]_ +3. Ensure that all checks passed (current checks include GitHub Actions and + Coveralls) + +If you don't have all the necessary Python versions available locally or have +trouble building all the testing environments, you can rely on GitHub Actions to +run the tests for each change you add in the pull request. Because testing here +will delay tests by other developers, please ensure that the code passes all +tests on your local system first. -.. [1] If you don't have all the necessary Python versions available locally or - have trouble building all the testing environments, you can rely on - Travis to run the tests for each change you add in the pull request. - Because testing here will delay tests by other developers, please ensure - that the code passes all tests on your local system first. Project Style Guidelines -^^^^^^^^^^^^^^^^^^^^^^^^ +------------------------ In general, pysat follows PEP8 and numpydoc guidelines. Pytest runs the unit and integration tests, flake8 checks for style, and sphinx-build performs @@ -130,15 +176,19 @@ These include: * `import numpy as np` * `import pandas as pds` * `import xarray as xr` +* When incrementing a timestamp, use `dt.timedelta` instead of `pds.DateOffset` + when possible to reduce program runtime * All classes should have `__repr__` and `__str__` functions * Docstrings use `Note` instead of `Notes` * Try to avoid creating a try/except statement where except passes -* Use setup and teardown in test classes +* Use setup_method (or setup_class) and teardown_method (or teardown_class) in + test classes * Use pytest parametrize in test classes when appropriate +* Use pysat testing utilities when appropriate * Provide testing class methods with informative failure statements and descriptive, one-line docstrings * Block and inline comments should use proper English grammar and punctuation with the exception of single sentences in a block, which may then omit the final period -* When casting is necessary, use `np.int64` and `np.float64` to ensure operating - system agnosticism +* When casting is necessary, use `np.int64` and `np.float64` to ensure operating + system agnosticism diff --git a/MANIFEST.in b/MANIFEST.in index d24537a0..6e2fb91c 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,7 +5,6 @@ recursive-include pysatMadrigal *.py include *.md include *.txt include LICENSE -include pysatMadrigal/version.txt prune pysatMadrigal/tests prune docs prune demo diff --git a/README.md b/README.md index 00b75996..3abb977b 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,9 @@ pysatMadrigal -# pysatMadrigal +# pysatMadrigal: pysat support for Madrigal data sets [![Documentation Status](https://readthedocs.org/projects/pysatmadrigal/badge/?version=latest)](https://pysatmadrigal.readthedocs.io/en/latest/?badge=latest) -[![Build Status](https://github.com/github/docs/actions/workflows/main.yml/badge.svg)](https://github.com/github/docs/actions/workflows/main.yml/badge.svg) +[![Pytest with Flake8](https://github.com/pysat/pysatMadrigal/actions/workflows/main.yml/badge.svg)](https://github.com/pysat/pysatMadrigal/actions/workflows/main.yml) [![Coverage Status](https://coveralls.io/repos/github/pysat/pysatMadrigal/badge.svg?branch=main)](https://coveralls.io/github/pysat/pysatMadrigal?branch=main) [![DOI](https://zenodo.org/badge/258384773.svg)](https://zenodo.org/badge/latestdoi/258384773) [![PyPI version](https://badge.fury.io/py/pysatMadrigal.svg)](https://badge.fury.io/py/pysatMadrigal) @@ -22,12 +22,12 @@ give some examples on how to use the routines. ## Prerequisites pysatMadrigal uses common Python modules, as well as modules developed by and -for the Space Physics community. This module officially supports Python 3.7+. +for the Space Physics community. This module officially supports Python 3.6+. | Common modules | Community modules | | -------------- | ----------------- | | h5py | madrigalWeb>=2.6 | -| numpy | pysat >= 3.0.3 | +| numpy | pysat >= 3.1.0 | | pandas | | | xarray | | @@ -47,7 +47,8 @@ a local install use the "--user" flag after "install". ``` cd pysatMadrigal/ -python setup.py install +python -m build . +pip install . ``` # Examples @@ -67,5 +68,8 @@ Afterward, pysat will identify them using the `platform` and `name` keywords. ``` pysat.utils.registry.register('pysatMadrigal.instruments.dmsp_ivm') -dst = pysat.Instrument('dmsp', 'ivm', tag='utd', inst_id='f15') +ivm = pysat.Instrument('dmsp', 'ivm', tag='utd', inst_id='f15') ``` + +The package also include analysis tools. Detailed examples are in the +[documentation](https://pysatmadrigal.readthedocs.io/en/latest/?badge=latest). diff --git a/docs/acknowledgements.rst b/docs/acknowledgements.rst new file mode 100644 index 00000000..dd1f917a --- /dev/null +++ b/docs/acknowledgements.rst @@ -0,0 +1 @@ +.. mdinclude:: ../ACKNOWLEDGEMENTS.md diff --git a/docs/conf.py b/docs/conf.py index a1063ec6..64aff85f 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -19,7 +19,9 @@ import json import os +from pyproject_parser import PyProject import sys + sys.path.insert(0, os.path.abspath('..')) # -- General configuration ------------------------------------------------ @@ -55,20 +57,21 @@ master_doc = 'index' # General information about the project. +info = PyProject.load("../pyproject.toml") + project = 'pysatMadrigal' title = '{:s} Documentation'.format(project) zenodo = json.loads(open('../.zenodo.json').read()) author = ', '.join([creator['name'] for creator in zenodo['creators']]) description = ''.join(['Tools for accessing and analyzing data from the ', 'Madrigal database']) -copyright = ', '.join(['2021', author]) +category = 'Space Physics' +copyright = ', '.join(['2023', author]) # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. -doc_dir = os.path.abspath(os.path.dirname(__file__)) -with open(os.path.join(doc_dir, "..", project, "version.txt"), "r") as fin: - version = fin.read().strip() +version = info.project['version'].base_version release = '{:s}-beta'.format(version) # Include alpha/beta/rc tags. # The language for content autogenerated by Sphinx. Refer to documentation @@ -76,7 +79,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. @@ -143,7 +146,7 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [(master_doc, project, title, author, project, - description, 'Space Physics')] + description, category)] # -- Options for Epub output ---------------------------------------------- diff --git a/docs/examples.rst b/docs/examples.rst index 8dfafff5..f1f482e0 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -7,4 +7,5 @@ tools .. toctree:: examples/ex_init.rst examples/ex_gnss_tec.rst - examples/ex_jro_isr_beam.rst \ No newline at end of file + examples/ex_jro_isr_beam.rst + examples/ex_dmsp_methods.rst diff --git a/docs/index.rst b/docs/index.rst index 47875758..5439d5eb 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,7 +9,7 @@ to download, load, and support analysis for data sets available at the Madrigal data base as pysat.Instrument objects. .. toctree:: - :maxdepth: -1 + :maxdepth: 2 overview.rst installation.rst @@ -19,6 +19,11 @@ data base as pysat.Instrument objects. examples.rst develop_guide.rst history.rst + acknowledgements.rst + + +.. admonition:: DISTRIBUTION STATEMENT A: Approved for public release. + Distribution is unlimited. Indices and tables diff --git a/docs/installation.rst b/docs/installation.rst index 4ecc22a0..1c922eba 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -37,17 +37,18 @@ You may either install pysatMadrigal via pip or by cloning the git repository pip install pysatMadrigal -2. Clone the git repository and use the ``setup.py`` file to install +2. Clone the git repository and use the ``pyproject.toml`` file to install :: git clone https://github.com/pysat/pysatMadrigal.git - # Install on the system (root privileges required) - sudo python3 setup.py install + # Move into the pysatMadrigal directory. Then build the wheel + python -m build . + - # Install at the user level - python3 setup.py install --user + # Install at the user or system level, depending on privledges + pip install . - # Install at the user level with the intent to develop locally - python3 setup.py develop --user + # Install with the intent to develop locally + pip install -e . diff --git a/docs/methods.rst b/docs/methods.rst index 8a216454..f8f01318 100644 --- a/docs/methods.rst +++ b/docs/methods.rst @@ -26,7 +26,8 @@ GNSS ---- Supports the Global Navigation Satellite System instruments by providing -reference and acknowledgement information. +reference and acknowledgement information, specialised load functions, and +supporting information for probing the line-of-sight (LoS) files. .. automodule:: pysatMadrigal.instruments.methods.gnss diff --git a/docs/supported_instruments.rst b/docs/supported_instruments.rst index ca45d415..75f52a56 100644 --- a/docs/supported_instruments.rst +++ b/docs/supported_instruments.rst @@ -11,6 +11,16 @@ Meter (IVM) Madrigal data. .. automodule:: pysatMadrigal.instruments.dmsp_ivm :members: +DMSP_SSJ +-------- + +Supports the Defense Meteorological Satellite Program (DMSP) Special Sensor J +(SSJ) Madrigal data. + + +.. automodule:: pysatMadrigal.instruments.dmsp_ssj + :members: + GNSS_TEC -------- @@ -25,14 +35,40 @@ JRO_ISR ------- The incoherent scatter radar (ISR) at the -`Jicamarca Radio Observatory `_ regularly -measures the velocity, density, and other ionospheric characteristics near the -magnetic equator over Peru. +`Jicamarca Radio Observatory `_ +regularly measures the velocity, density, and other ionospheric characteristics +near the magnetic equator over Peru. .. automodule:: pysatMadrigal.instruments.jro_isr :members: +Madrigal_Dst +------------ + +An instrument for the Madrigal Dst index data. This data set spans the years +of 1957 through a period close to today, with all data saved in a single file. +Because of this, you only need to download the data once and any desired time +period may be loaded (unless you require a time between your last download and +now). + +.. automodule:: pysatMadrigal.instruments.madrigal_dst + :members: + + +Madrigal_Geoind +--------------- + +An instrument for the Madrigal geomagnetic index data. This data set spans the +years of 1950 through a period close to today, with all data saved in a single +file. Because of this, you only need to download the data once and any desired +time period may be loaded (unless you require a time between your last download +and now). + +.. automodule:: pysatMadrigal.instruments.madrigal_geoind + :members: + + Madrigal_Pandas --------------- @@ -40,7 +76,7 @@ A general instrument for Madrigal time-series data. This :py:class:`pysat.Instrument` uses Madrigal instrument codes and kindats to support the use of any of the Madrigal time-series data sets. There are some further constraints in that the data set's Madrigal naming convention must be -parsable by pysat. Currently nine Madrigal instrument codes are supported by +parsable by pysat. Currently three Madrigal instrument codes are supported by this :py:class:`pysat.Instrument`. When possible, using a specific instrument module is recommended, since that instrument module will have additional support (e.g., cleaning methods, experiment acknowledgements, and references). @@ -49,4 +85,28 @@ support (e.g., cleaning methods, experiment acknowledgements, and references). :members: - +NGDC_AE +------- + +An instrument for the Geophysical indices from NGDC, which include AE, AL, AU, +and AO. The :py:attr:`name` is AE due to the Madrigal naming conventions. The +data set spans the years of 1978 through 1987, will all data saved in a single +file. Because of this, you only need to download the data once and any desired +time period may be loaded. + +.. automodule:: pysatMadrigal.instruments.ngdc_ae + :members: + + +OMNI2_IMF +--------- + +An instrument for the interplanetary magnetic field (IMF) data from OMNI 2. The +data starts in 1963 and the entire data set is contained in a single file. The +file is occasionally updated, and so obtaining the most recent data means that +all historic data must also be downloaded (or re-downloaded). OMNI data may +also be obtained more directly using +`pysatNASA `_. + +.. automodule:: pysatMadrigal.instruments.omni2_imf + :members: diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..956cd863 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,90 @@ +[build-system] +requires = ["setuptools >= 38.6", "pip >= 10"] +build-backend = "setuptools.build_meta" + +[project] +name = "pysatMadrigal" +version = "0.2.0" +description = 'Madrigal instrument support for the pysat ecosystem' +readme = "README.md" +requires-python = ">=3.6" +license = {file = "LICENSE"} +authors = [ + {name = "Angeline G. Burrell et al.", email = "pysat.developers@gmail.com"} +] +classifiers = [ + "Development Status :: 4 - Beta", + "Topic :: Scientific/Engineering :: Physics", + "Topic :: Scientific/Engineering :: Atmospheric Science", + "Intended Audience :: Science/Research", + "License :: OSI Approved :: BSD License", + "Natural Language :: English", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Operating System :: POSIX :: Linux", + "Operating System :: MacOS :: MacOS X", + "Operating System :: Microsoft :: Windows" +] +keywords = [ + "pysat", + "ionosphere", + "Madrigal", + "CEDAR", + "thermosphere", + "GPS", + "GNSS", + "TEC", + "Jicamarca", + "DMSP", + "ISR", + "Incoherent scatter radar", +] +dependencies = [ + "h5py", + "madrigalWeb", + "numpy", + "packaging", + "pandas", + "pysat>=3.1.0", + "xarray"] + +[project.optional-dependencies] +test = [ + "coveralls", + "flake8", + "flake8-docstrings", + "hacking >= 1.0", + "pytest", + "pytest-cov", + "pytest-ordering" +] +doc = [ + "extras_require", + "ipython", + "m2r2", + "numpydoc", + "pyproject_parser", + "sphinx", + "sphinx_rtd_theme >= 1.2.2, < 2.0.0" +] + +[project.urls] +Documentation = "https://pysatmadrigal.readthedocs.io/en/latest/" +Source = "https://github.com/pysat/pysatMadrigal" + +[tool.coverage.report] + +[tool.pytest.ini_options] +addopts = "-x --cov=pysatMadrigal" +markers = [ + "all_inst", + "download", + "no_download", + "load_options", + "new_tests", + "first", + "second" +] diff --git a/pysatMadrigal/__init__.py b/pysatMadrigal/__init__.py index c1a628f6..3c72456a 100644 --- a/pysatMadrigal/__init__.py +++ b/pysatMadrigal/__init__.py @@ -1,2 +1,16 @@ +"""Core library for pysatMadrigal. + +This is a library of `pysat` instrument modules and methods designed to support +instruments archived at the Madrigal portal. + +""" + +try: + from importlib import metadata +except ImportError: + import importlib_metadata as metadata + from pysatMadrigal import instruments # noqa F401 from pysatMadrigal import utils # noqa F401 + +__version__ = metadata.version('pysatMadrigal') diff --git a/pysatMadrigal/instruments/__init__.py b/pysatMadrigal/instruments/__init__.py index d44d0a81..814cc06b 100644 --- a/pysatMadrigal/instruments/__init__.py +++ b/pysatMadrigal/instruments/__init__.py @@ -6,12 +6,18 @@ """Import the Instrument sub-modules and methods.""" # Import Madrigal instruments from pysatMadrigal.instruments import dmsp_ivm +from pysatMadrigal.instruments import dmsp_ssj from pysatMadrigal.instruments import gnss_tec from pysatMadrigal.instruments import jro_isr +from pysatMadrigal.instruments import madrigal_dst +from pysatMadrigal.instruments import madrigal_geoind from pysatMadrigal.instruments import madrigal_pandas +from pysatMadrigal.instruments import ngdc_ae +from pysatMadrigal.instruments import omni2_imf # Import Madrigal methods from pysatMadrigal.instruments import methods # noqa F401 # Define variable name with all available instruments -__all__ = ['dmsp_ivm', 'gnss_tec', 'jro_isr', 'madrigal_pandas'] +__all__ = ['dmsp_ivm', 'dmsp_ssj', 'gnss_tec', 'jro_isr', 'madrigal_dst', + 'madrigal_geoind', 'madrigal_pandas', 'ngdc_ae', 'omni2_imf'] diff --git a/pysatMadrigal/instruments/dmsp_ivm.py b/pysatMadrigal/instruments/dmsp_ivm.py index 6ee57645..8f9bea13 100644 --- a/pysatMadrigal/instruments/dmsp_ivm.py +++ b/pysatMadrigal/instruments/dmsp_ivm.py @@ -1,9 +1,12 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- -# -*- coding: utf-8 -*- """Supports the Defense Meteorological Satellite Program (DMSP) IVM instruments. The Ion Velocity Meter (IVM) is comprised of the Retarding Potential Analyzer @@ -45,8 +48,6 @@ ---- Please provide name and email when downloading data with this routine. -Code development supported by NSF grant 1259508 - """ import datetime as dt @@ -104,6 +105,18 @@ 'f17': {tag: dt.datetime(2009, 1, 1) for tag in inst_ids['f17']}, 'f18': {tag: dt.datetime(2017, 12, 30) for tag in inst_ids['f18']}} +# TODO(#99): Remove when compliant with multi-day load tests +_new_tests = {'f18': {'': False}, 'f14': {'utd': False}} + +# Set the clean warnings for testing +_clean_warn = {inst_id: {tag: {clean_level: [('logger', 'WARN', + 'this level 1 data has no qual', + clean_level)] + if tag != 'utd' else [] + for clean_level in ['clean', 'dusty', 'dirty']} + for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} + # ---------------------------------------------------------------------------- # Instrument methods diff --git a/pysatMadrigal/instruments/dmsp_ssj.py b/pysatMadrigal/instruments/dmsp_ssj.py new file mode 100644 index 00000000..52287b74 --- /dev/null +++ b/pysatMadrigal/instruments/dmsp_ssj.py @@ -0,0 +1,278 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. +# ---------------------------------------------------------------------------- +"""Support the DMSP Special Sensor-J (SSJ) instrument and derived products. + +The Defense Meteorological Satellite Program (DMSP) SSJ measures precipitating +particles using spectrometery. The Auroral Boundary Index (ABI) is automatically +computed from this data set and marks the midnight equatorward boundary in each +hemisphere. + +Further questions can be addressed to: + Gordon Wilson + Air Force Research Lab + RVBXP + 3550 Aberdeen Avenue SE, Bldg 570 + Kirtland Air Force Base, NM 87117-5776 + Phone: 505-853-2027 + e-mail: gordon.wilson@kirtland.af.mil +or + Ernie Holeman (ernestholeman7408@comcast.net) + +Please send a copy of all publications that use the Auroral Boundary Index +(ABI) to Dr. Gordon Wilson at the above address. + + +Properties +---------- +platform + 'dmsp' +name + 'ssj' +tag + 'abi' +inst_id + 'f11' + +Example +------- +:: + + import pysat + dmsp = pysat.Instrument('dmsp', 'ssj', 'abi', clean_level='clean') + dmsp.download(dt.datetime(2017, 12, 30), dt.datetime(2017, 12, 31), + user='Firstname+Lastname', password='email@address.com') + dmsp.load(2017, 363) + +Note +---- +Please provide name and email when downloading data with this routine. + +""" + +import datetime as dt +import functools +import numpy as np +import pandas as pds +import warnings + +from pysat import logger +from pysat.utils.time import create_date_range + +from pysatMadrigal.instruments.methods import dmsp +from pysatMadrigal.instruments.methods import general + +# ---------------------------------------------------------------------------- +# Instrument attributes + +platform = 'dmsp' +name = 'ssj' +tags = {'abi': 'Midnight Auroral Boundary Index'} +inst_ids = {'': [tag for tag in tags.keys()]} + +pandas_format = True + +# Madrigal tags +madrigal_inst_code = 180 +madrigal_tag = {'': {'abi': '17110'}} + +# Local attributes +dmsp_fname = general.madrigal_file_format_str(madrigal_inst_code) +supported_tags = {inst_id: {tag: dmsp_fname for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +remote_tags = { + inst_id: {tag: supported_tags[inst_id][tag].format(file_type='hdf5') + for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument test attributes + +_test_dates = {'': {'abi': dt.datetime(1982, 12, 30)}} + +_clean_warn = {'': {'abi': {'dirty': [('warning', UserWarning, + 'No quality control level "dirty"', + 'dirty')]}}} + +# ---------------------------------------------------------------------------- +# Instrument methods + + +def init(self): + """Initialize the Instrument object with values specific to DMSP IVM.""" + self.acknowledgements = ''.join([ + general.cedar_rules(), '\nThe Air Force Research Laboratory Auroral ', + 'Boundary Index (ABI) was provided by the United States Air Force ', + 'Research Laboratory, Kirtland Air Force Base, New Mexico.\n', + 'Please send a copy of all publications that use the ABI to Dr. ', + 'Gordon Wilson at: gordon.wilson@kirtland.af.mil\n', + 'Dr. Wilson considers this data set deprecated.']) + + logger.info(self.acknowledgements) + self.references = dmsp.references(self.name) + return + + +def clean(self): + """Clean DMSP IVM data cleaned to the specified level. + + Note + ---- + Supports 'clean' and 'dusty' + + 'clean' QC == 1 + 'dusty' QC <= 2 + 'dirty' and 'none' allow all QC flags (QC <= 3) + + When called directly by pysat, a clean level of 'none' causes pysat to skip + this routine. + + Warnings + -------- + UserWarning + If the 'dirty' level is invoked (same as no cleaning) + + """ + if self.clean_level == 'clean': + iclean, = np.where(self['eqb_qc_fl'] <= 1) + elif self.clean_level == 'dusty': + iclean, = np.where(self['eqb_qc_fl'] <= 2) + else: + warnings.warn('No quality control level "dirty", using "none"') + iclean = None + + # Downselect data based upon cleaning conditions above + if iclean is not None: + self.data = self[iclean] + + return + + +# ---------------------------------------------------------------------------- +# Instrument functions +# +# Use the default Madrigal and pysat methods + +# Support listing the local files +list_files = functools.partial(general.list_files, + supported_tags=supported_tags, + file_cadence=pds.DateOffset(years=1)) + +# Set the list_remote_files routine +list_remote_files = functools.partial(general.list_remote_files, + inst_code=madrigal_inst_code, + kindats=madrigal_tag, + supported_tags=remote_tags) + + +# Set the load routine +def load(fnames, tag='', inst_id=''): + """Load DMSP SSJ4 data from Madrigal after accounting for date tags. + + Parameters + ---------- + fnames : array-like + Iterable of filename strings, full path, to data files to be loaded. + This input is nominally provided by pysat itself. + tag : str + Tag name used to identify particular data set to be loaded. This input + is nominally provided by pysat itself and is not used here. (default='') + inst_id : str + Instrument ID used to identify particular data set to be loaded. + This input is nominally provided by pysat itself, and is not used here. + (default='') + + Returns + ------- + data : pds.DataFrame or xr.Dataset + A pandas DataFrame or xarray Dataset holding the data from the file + meta : pysat.Meta + Metadata from the file, as well as default values from pysat + + Raises + ------ + ValueError + If data columns expected to create the time index are missing or if + coordinates are not supplied for all data columns. + + Note + ---- + Currently HDF5 reading breaks if a different file type was used previously + + This routine is called as needed by pysat. It is not intended + for direct user interaction. + + """ + # The ABI data has a yearly cadance, extract the unique filenames to load + load_fnames = list() + file_dates = list() + for fname in fnames: + file_dates.append(dt.datetime.strptime(fname[-10:], '%Y-%m-%d')) + if fname[0:-11] not in load_fnames: + load_fnames.append(fname[0:-11]) + + # Load the data and metadata + data, meta = general.load(load_fnames, tag=tag, inst_id=inst_id) + + # If there is a date range, downselect here + if len(file_dates) > 0: + idx, = np.where((data.index >= min(file_dates)) + & (data.index < max(file_dates) + dt.timedelta(days=1))) + data = data.iloc[idx, :] + + return data, meta + + +def download(date_array, tag='', inst_id='', data_path=None, user=None, + password=None, file_type='hdf5'): + """Download DMSP SSJ4 data from Madrigal. + + Parameters + ---------- + date_array : array-like + list of datetimes to download data for. The sequence of dates need not + be contiguous. + tag : str + Tag identifier used for particular dataset. This input is provided by + pysat. (default='') + inst_id : str + Satellite ID string identifier used for particular dataset. This input + is provided by pysat. (default='') + data_path : str + Path to directory to download data to. (default=None) + user : str + User string input used for download. Provided by user and passed via + pysat. If an account is required for downloads this routine here must + error if user not supplied. (default=None) + password : str + Password for data download. (default=None) + file_type : str + File format for Madrigal data. (default='hdf5') + + Note + ---- + The user's names should be provided in field user. Ritu Karidhal should + be entered as Ritu+Karidhal + + The password field should be the user's email address. These parameters + are passed to Madrigal when downloading. + + The affiliation field is set to pysat to enable tracking of pysat + downloads. + + """ + # Ensure the date range is correct + if date_array.freq not in ['AS-JAN', 'YS', 'AS']: + date_array = create_date_range( + dt.datetime(date_array[0].year, 1, 1), date_array[-1], freq='YS') + + # Download the remote files + general.download(date_array, inst_code=str(madrigal_inst_code), + kindat=madrigal_tag[inst_id][tag], data_path=data_path, + user=user, password=password, file_type=file_type) + return diff --git a/pysatMadrigal/instruments/gnss_tec.py b/pysatMadrigal/instruments/gnss_tec.py index fbeb3393..b9220969 100644 --- a/pysatMadrigal/instruments/gnss_tec.py +++ b/pysatMadrigal/instruments/gnss_tec.py @@ -1,9 +1,17 @@ -# -*- coding: utf-8 -*-. +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. +# ---------------------------------------------------------------------------- """Supports the MIT Haystack GNSS TEC data products. The Global Navigation Satellite System (GNSS) is used in conjunction with a world-wide receiver network to produce total electron content (TEC) data -products, including vertical and line-of-sight TEC. +products, including vertical and line-of-sight (or slant) TEC. Downloads data from the MIT Haystack Madrigal Database. @@ -14,7 +22,9 @@ name 'tec' tag - 'vtec', 'site' + 'vtec', 'site', 'los' +inst_id + '' (not used) Examples -------- @@ -24,22 +34,36 @@ import pysat import pysatMadrigal as pymad + # Get and load all vertical TEC for 19 Nov 2017 vtec = pysat.Instrument(inst_module=pymad.instruments.gnss_tec, tag='vtec') vtec.download(dt.datetime(2017, 11, 19), dt.datetime(2017, 11, 20), user='Firstname+Lastname', password='email@address.com') vtec.load(date=dt.datetime(2017, 11, 19)) + # Get and load the GLONASS slant TEC from the zzon site on 1 Jan 2023 + stec = pysat.Instrument(inst_module=pymad.instruments.gnss_tec, tag='los') + stec.download(start=dt.datetime(2023, 1, 1), user='Firstname+Lastname', + password='email@address.com') + stec.load(2023, 1, los_method='site', los_value='zzon', + gnss_network='glonass') Note ---- Please provide name and email when downloading data with this routine. +The line-of-sight data is too large to load an entire file at once. Data may be +loaded by individual receiver site for any number of days (recommended to load +one day) or a given time. To discover the available sites and times (exact times +are required for selection), you may use the +`pysatMadrigal.instruments.methods.gnss.get_los_times` and +`pysatMadrigal.instruments.methods.gnss.get_los_receiver_sites` functions. + """ import datetime as dt import numpy as np -from pysat import logger +import pysat from pysatMadrigal.instruments.methods import general from pysatMadrigal.instruments.methods import gnss @@ -49,20 +73,22 @@ platform = 'gnss' name = 'tec' -tags = {'vtec': 'vertical TEC', 'site': 'Sites used in daily TEC data'} +tags = {'vtec': 'vertical TEC', 'site': 'Sites used in daily TEC data', + 'los': 'line-of-sight TEC'} inst_ids = {'': [tag for tag in tags.keys()]} pandas_format = False # Madrigal tags madrigal_inst_code = 8000 -madrigal_tag = {'': {'vtec': '3500', 'site': '3506'}} -# TODO(#12): `, 'los': '3505'}}` +madrigal_tag = {'': {'vtec': '3500', 'site': '3506', 'los': '3505'}} # Local attributes fname = general.madrigal_file_format_str(madrigal_inst_code, verbose=False).split("*") supported_tags = {ss: {'vtec': ''.join(['gps', fname[1], 'g', fname[2]]), + 'los': ''.join(['los_{{year:04d}}{{month:02d}}', + '{{day:02d}}', fname[2]]), 'site': ''.join(['site_{{year:04d}}{{month:02d}}', '{{day:02d}}', fname[2]])} for ss in inst_ids.keys()} @@ -73,7 +99,20 @@ # Instrument test attributes _test_dates = {'': {'vtec': dt.datetime(2017, 11, 19), - 'site': dt.datetime(2001, 1, 1)}} + 'site': dt.datetime(2001, 1, 1), + 'los': dt.datetime(2023, 1, 1)}} +_test_load_opt = {'': {'los': [{'los_method': 'site', 'los_value': 'zzon', + 'gnss_network': 'glonass'}, + {'los_method': 'time', + 'los_value': dt.datetime(2023, 1, 1)}]}} +_test_download_ci = {'': {'los': False}} # Download is too large to test +_clean_warn = {'': {tag: {clean_level: [('logger', 'INFO', + 'Data provided at a clean level' + if tag == 'site' else + 'further cleaning may be performed', + clean_level)] + for clean_level in ['clean', 'dusty', 'dirty']} + for tag in inst_ids['']}} # ---------------------------------------------------------------------------- # Instrument methods @@ -84,7 +123,7 @@ def init(self): self.acknowledgements = '\n'.join([gnss.acknowledgements(self.name), general.cedar_rules()]) self.references = gnss.references(self.name) - logger.info(self.acknowledgements) + pysat.logger.info(self.acknowledgements) return @@ -98,12 +137,14 @@ def clean(self): `clean_level` is None. """ - if self.tag in ["vtec", "site"]: - msg = "Data provided at a clean level" - if self.tag == "vtec": - msg = "".join([msg, ", further cleaning may be performed using ", - "the measurement error 'dtec'"]) - logger.info(msg) + msg = "Data provided at a clean level" + if self.tag == "vtec": + msg = "".join([msg, ", further cleaning may be performed using ", + "the measurement error 'dtec'"]) + elif self.tag == "los": + msg = "".join([msg, ", further cleaning may be performed using ", + "the measurement error 'dlos_tec'"]) + pysat.logger.info(msg) return @@ -211,6 +252,10 @@ def download(date_array, tag='', inst_id='', data_path=None, user=None, downloads. """ + if tag == 'los': + pysat.logger.warning( + 'LoS download is very large and succeptible to failure.') + general.download(date_array, inst_code=str(madrigal_inst_code), kindat=madrigal_tag[inst_id][tag], data_path=data_path, user=user, password=password, file_type=file_type, url=url) @@ -218,7 +263,8 @@ def download(date_array, tag='', inst_id='', data_path=None, user=None, return -def load(fnames, tag='', inst_id=''): +def load(fnames, tag='', inst_id='', los_method='site', los_value=None, + gnss_network='all'): """Load the GNSS TEC data. Parameters @@ -231,6 +277,14 @@ def load(fnames, tag='', inst_id=''): inst_id : str Instrument ID used to identify particular data set to be loaded. This input is nominally provided by pysat itself. (default='') + los_method : str + For 'los' tag only, load data for a unique GNSS receiver site ('site') + or at a unique time ('time') (default='site') + los_value : str, dt.datetime, or NoneType + For 'los' tag only, load data at this unique site or time (default=None) + gnss_nework : bool + For 'los' tag only, limit data by GNSS network if not 'all'. Currently + supports 'all', 'gps', and 'glonass' (default='all') Returns ------- @@ -239,47 +293,53 @@ def load(fnames, tag='', inst_id=''): meta : pysat.Meta Object containing metadata such as column names and units - """ - # Define the xarray coordinate dimensions (apart from time) - # Not needed for netCDF - xcoords = {'vtec': {('time', 'gdlat', 'glon', 'kindat', 'kinst'): - ['gdalt', 'tec', 'dtec'], - ('time', ): ['year', 'month', 'day', 'hour', 'min', - 'sec', 'ut1_unix', 'ut2_unix', 'recno']}, - 'site': {('time', 'gps_site'): ['gdlatr', 'gdlonr']}} + Raises + ------ + ValueError + If tag is 'los' and no valid 'los_value' is provided or unknown tag - # Load the specified data - data, meta = general.load(fnames, tag, inst_id, xarray_coords=xcoords[tag]) - - # Squeeze the kindat and kinst 'coordinates', but keep them as floats - squeeze_dims = np.array(['kindat', 'kinst']) - squeeze_mask = [sdim in data.coords for sdim in squeeze_dims] - if np.any(squeeze_mask): - data = data.squeeze(dim=squeeze_dims[squeeze_mask]) + Note + ---- + The line-of-sight data is too large to load an entire file at once. Data + may be loaded by individual receiver site for any number of days + (recommended to load one day) or a given time. To discover the available + sites and times (exact times are required for selection), you may use the + `pysatMadrigal.instruments.methods.gnss.get_los_times` and + `pysatMadrigal.instruments.methods.gnss.get_los_receiver_sites` functions. - # Fix the units for tec and dtec + """ + # Load the specified data if tag == 'vtec': - meta['tec'] = {meta.labels.units: 'TECU', meta.labels.min_val: 0.0, - meta.labels.max_val: np.nan} - meta['dtec'] = {meta.labels.units: 'TECU', meta.labels.min_val: 0.0, + data, meta, lat_keys, lon_keys = gnss.load_vtec(fnames) + elif tag == 'site': + data, meta, lat_keys, lon_keys = gnss.load_site(fnames) + elif tag == 'los': + if los_value is None: + raise ValueError('must specify a valid {:}'.format(los_method)) + + data, meta, lat_keys, lon_keys = gnss.load_los(fnames, los_method, + los_value, gnss_network) + + if len(data.dims.keys()) > 0: + # Squeeze the kindat and kinst 'coordinates', but keep them as floats + squeeze_dims = np.array(['kindat', 'kinst']) + squeeze_mask = [sdim in data.coords for sdim in squeeze_dims] + if np.any(squeeze_mask): + data = data.squeeze(dim=squeeze_dims[squeeze_mask]) + + # Get the maximum and minimum values for time, latitude, and longitude + meta['time'] = {meta.labels.notes: data['time'].values.dtype.__doc__, + meta.labels.min_val: np.nan, meta.labels.max_val: np.nan} - # Get the maximum and minimum values for time, latitude, longitude, - # and altitude - meta['time'] = {meta.labels.notes: data['time'].values.dtype.__doc__, - meta.labels.min_val: np.nan, meta.labels.max_val: np.nan} - if tag == 'vtec': - meta['gdalt'] = {meta.labels.min_val: 0.0, meta.labels.max_val: np.nan} - lat_key = 'gdlat' - lon_key = 'glon' - else: - lat_key = 'gdlatr' - lon_key = 'gdlonr' + for lat_key in lat_keys: + meta[lat_key] = {meta.labels.min_val: -90.0, + meta.labels.max_val: 90.0} - meta[lat_key] = {meta.labels.min_val: -90.0, meta.labels.max_val: 90.0} - min_lon = 0.0 if data[lon_key].values.min() >= 0.0 else -180.0 - meta[lon_key] = {meta.labels.min_val: min_lon, - meta.labels.max_val: min_lon + 360.0} + for lon_key in lon_keys: + min_lon = 0.0 if data[lon_key].values.min() >= 0.0 else -180.0 + meta[lon_key] = {meta.labels.min_val: min_lon, + meta.labels.max_val: min_lon + 360.0} return data, meta @@ -320,10 +380,10 @@ def list_remote_files(tag, inst_id, start=dt.datetime(1998, 10, 15), pysatMadrigal.instruments.methods.general.list_remote_files """ - if tag == 'site': - two_break = None - elif tag == 'vtec': + if tag == 'vtec': two_break = 99 + else: + two_break = None files = general.list_remote_files( tag, inst_id, supported_tags=remote_tags, diff --git a/pysatMadrigal/instruments/jro_isr.py b/pysatMadrigal/instruments/jro_isr.py index 47361c13..996a94b6 100644 --- a/pysatMadrigal/instruments/jro_isr.py +++ b/pysatMadrigal/instruments/jro_isr.py @@ -1,9 +1,12 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- -# -*- coding: utf-8 -*-. """Supports the Incoherent Scatter Radar at the Jicamarca Radio Observatory. Properties @@ -88,6 +91,26 @@ 'oblique_rand': dt.datetime(2000, 11, 9), 'oblique_long': dt.datetime(2010, 4, 12)}} +# TODO(#99): Remove when compliant with multi-day load tests +_new_tests = {'': {'drifts': False, 'drifts_ave': False, 'oblique_stan': False, + 'oblique_rand': False, 'oblique_long': False}} + +# Set the clean warnings for testing +_clean_warn = {'': {tag: {clean_level: [('logger', 'WARN', + 'this level 2 data has no quality ', + clean_level)] + for clean_level in ['clean', 'dusty']} + for tag in inst_ids['']}} + +for tag in inst_ids['']: + if tag.find('oblique') == 0: + _clean_warn[''][tag]['dirty'] = [('logger', 'WARN', + 'this level 2 data has no quality ', + 'dirty')] + for clean_level in ['clean', 'dusty', 'dirty']: + _clean_warn[''][tag][clean_level].append( + ('logger', 'INFO', 'The double pulse, coded ', clean_level)) + # ---------------------------------------------------------------------------- # Instrument methods diff --git a/pysatMadrigal/instruments/madrigal_dst.py b/pysatMadrigal/instruments/madrigal_dst.py new file mode 100644 index 00000000..1df06e18 --- /dev/null +++ b/pysatMadrigal/instruments/madrigal_dst.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. +# ---------------------------------------------------------------------------- +"""Supports access to Dst data archived at Madrigal. + +Properties +---------- +platform + 'madrigal' +name + 'dst' +tag + None supported +inst_id + None supported + +Note +---- +Please provide name (user) and email (password) when downloading data with this +routine. + +Warnings +-------- +The entire data set (1 Jan 1597 through a period close to the recent day) is +provided in a single file on Madrigal. + +Examples +-------- +:: + + + import datetime as dt + import pysat + import pysatMadrigal as py_mad + + # Download Dst data from Madrigal + dst = pysat.Instrument(inst_module=py_mad.instruments.madrigal_dst) + dst.download(start=py_mad.instruments.madrigal_dst.madrigal_start, + user='Firstname+Lastname', password='email@address.com') + dst.load(date=dt.datetime(1981, 1, 1)) + +""" + +import datetime as dt +import functools + +import pysat + +from pysatMadrigal.instruments.methods import general + +# ---------------------------------------------------------------------------- +# Instrument attributes + +platform = 'madrigal' +name = 'dst' +tags = {'': ''} +inst_ids = {'': list(tags.keys())} +pandas_format = True + +# Madrigal tags and limits +madrigal_inst_code = 212 +madrigal_tag = {'': {'': "30006"}} +madrigal_start = dt.datetime(1957, 1, 1) +madrigal_end = dt.datetime.utcnow() + +# Local attributes +# +# Need a way to get the filename strings for a particular instrument unless +# wildcards start working +supported_tags = { + inst_id: {tag: general.madrigal_file_format_str(madrigal_inst_code, + verbose=False) + for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()} +remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5') + for kk in inst_ids[ss]} for ss in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument test attributes + +_test_dates = {inst_id: {tag: madrigal_start for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +_test_download = {inst_id: {tag: True for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +_clean_warn = {inst_id: {tag: {clvl: [('logger', 'WARN', + "No cleaning available", clvl)] + for clvl in ['clean', 'dusty', 'dirty']} + for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument methods + + +def init(self): + """Initialize the Instrument object in support of Madrigal access. + + Parameters + ---------- + kindat : str + Madrigal instrument experiment code(s). (default='') + + """ + # Set the standard pysat attributes + self.acknowledgements = general.cedar_rules() + self.references = ''.join(['See referenece list and publication at: ', + 'Sugiura M. and T. Kamei, http://', + 'wdc.kugi.kyoto-u.ac.jp/dstdir/dst2/', + 'onDstindex.html, last updated June 1991, ', + 'accessed Dec 2020']) + + # Remind the user of the Rules of the Road + pysat.logger.info(self.acknowledgements) + return + + +def clean(self): + """Raise warning that cleaning is not needed for the OMNI2 data. + + Note + ---- + Supports 'clean', 'dusty', 'dirty' in the sense that it prints + a message noting there is no cleaning. + 'None' is also supported as it signifies no cleaning. + + Routine is called by pysat, and not by the end user directly. + + """ + pysat.logger.warning("No cleaning available for the Madrigal Dst") + + return + + +# ---------------------------------------------------------------------------- +# Instrument functions +# +# Use the default Madrigal and pysat methods +file_cadence = madrigal_end - madrigal_start +two_digit_year_break = 50 + +# Set the download routine +download = functools.partial(general.download, + inst_code=str(madrigal_inst_code), + kindat=madrigal_tag['']['']) + +# Set the list routine +list_files = functools.partial(general.list_files, + supported_tags=supported_tags, + file_cadence=file_cadence, + two_digit_year_break=two_digit_year_break) + +# Set list_remote_files routine +list_remote_files = functools.partial(general.list_remote_files, + supported_tags=remote_tags, + inst_code=madrigal_inst_code, + kindats=madrigal_tag, + two_digit_year_break=two_digit_year_break) + + +def load(fnames, tag='', inst_id=''): + """Load the Madrigal Dst data. + + Parameters + ----------- + fnames : list + List of filenames + tag : str + tag name used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. (default='') + inst_id : str + Instrument ID used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. (default='') + + Returns + -------- + data : pds.DataFrame + Object containing IMF data + meta : pysat.Meta + Object containing metadata such as column names and units + + Raises + ------ + ValueError + Unexpected time variable names + + """ + # Cycle through all the filenames, getting the desired start and stop times + fstart = None + fstop = None + for fname_date in fnames: + # Split the date from the filename + fname = fname_date[:-11] + fdate = dt.datetime.strptime(fname_date[-10:], '%Y-%m-%d') + fstop = fdate + + if fstart is None: + fstart = fdate + + fstop += dt.timedelta(days=1) + + # There is only one file for this Instrument + data, meta = general.load([fname], tag=tag, inst_id=inst_id) + + # Select the data for the desired time period + data = data[fstart:fstop] + + return data, meta diff --git a/pysatMadrigal/instruments/madrigal_geoind.py b/pysatMadrigal/instruments/madrigal_geoind.py new file mode 100644 index 00000000..97e888e8 --- /dev/null +++ b/pysatMadrigal/instruments/madrigal_geoind.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. +# ---------------------------------------------------------------------------- +"""Supports access to F10.7, Kp, and Ap data archived at Madrigal. + +Properties +---------- +platform + 'madrigal' +name + 'geoind' +tag + None supported +inst_id + None supported + +Note +---- +Please provide name (user) and email (password) when downloading data with this +routine. + +Warnings +-------- +The entire data set (1 Jan 1950 through 31 Dec 1987) is provided in a single +file on Madrigal. + +Examples +-------- +:: + + + import datetime as dt + import pysat + import pysatMadrigal as py_mad + + # Download geophysical indices from Madrigal + gind = pysat.Instrument(inst_module=py_mad.instruments.madrigal_geoind) + gind.download(start=py_mad.instruments.madrigal_geoind.madrigal_start, + user='Firstname+Lastname', password='email@address.com') + gind.load(date=dt.datetime(1981, 1, 1)) + +""" + +import datetime as dt +import functools + +import pysat + +from pysatMadrigal.instruments.methods import general + +# ---------------------------------------------------------------------------- +# Instrument attributes + +platform = 'madrigal' +name = 'geoind' +tags = {'': ''} +inst_ids = {'': list(tags.keys())} +pandas_format = True + +# Madrigal tags and limits +madrigal_inst_code = 210 +madrigal_tag = {'': {'': "30007"}} +madrigal_start = dt.datetime(1950, 1, 1) +madrigal_end = dt.datetime.utcnow() + +# Local attributes +# +# Need a way to get the filename strings for a particular instrument unless +# wildcards start working +supported_tags = { + inst_id: {tag: general.madrigal_file_format_str(madrigal_inst_code, + verbose=False) + for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()} +remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5') + for kk in inst_ids[ss]} for ss in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument test attributes + +_test_dates = {inst_id: {tag: madrigal_start for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +_test_download = {inst_id: {tag: True for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +_clean_warn = {inst_id: {tag: {clvl: [('logger', 'WARN', + "No cleaning available", clvl)] + for clvl in ['clean', 'dusty', 'dirty']} + for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument methods + + +def init(self): + """Initialize the Instrument object in support of Madrigal access.""" + # Set the standard pysat attributes + self.acknowledgements = general.cedar_rules() + self.references = "\n".join( + [''.join(["Covington, A.E. (1948), Solar noise observations on 10.7 ", + "centimeters Solar noise observations on 10.7 centimeters,", + " Proceedings of the IRE, 36(44), p 454-457."]), + ''.join(["J. Bartels, The technique of scaling indices K and Q of ", + "geomagnetic activity, Ann. Intern. Geophys. Year 4, ", + "215-226, 1957."]), + ''.join(["J. Bartels, The geomagnetic measures for the time-", + "variations of solar corpuscular radiation, described for ", + "use in correlation studies in other geophysical fields, ", + "Ann. Intern. Geophys. Year 4, 227-236, 1957."]), + ''.join(["P.N. Mayaud, Derivation, Meaning and Use of Geomagnetic ", + "Indices, Geophysical Monograph 22, Am. Geophys. Union, ", + "Washington D.C., 1980."]), + ''.join(["G.K. Rangarajan, Indices of magnetic activity, in ", + "Geomagnetism, edited by I.A. Jacobs, Academic, San Diego,", + " 1989."]), + ''.join(["M. Menvielle and A. Berthelier, The K-derived planetary ", + "indices: description and availability, Rev. Geophys. 29, ", + "3, 415-432, 1991."])]) + + # Remind the user of the Rules of the Road + pysat.logger.info(self.acknowledgements) + return + + +def clean(self): + """Raise warning that cleaning is not needed for the OMNI2 data. + + Note + ---- + Supports 'clean', 'dusty', 'dirty' in the sense that it prints + a message noting there is no cleaning. + 'None' is also supported as it signifies no cleaning. + + Routine is called by pysat, and not by the end user directly. + + """ + pysat.logger.warning( + "No cleaning available for the Madrigal geophysical indices") + + return + + +# ---------------------------------------------------------------------------- +# Instrument functions +# +# Use the default Madrigal and pysat methods +file_cadence = madrigal_end - madrigal_start +two_digit_year_break = 50 + +# Set the download routine +download = functools.partial(general.download, + inst_code=str(madrigal_inst_code), + kindat=madrigal_tag['']['']) + +# Set the list routine +list_files = functools.partial(general.list_files, + supported_tags=supported_tags, + file_cadence=file_cadence, + two_digit_year_break=two_digit_year_break) + +# Set list_remote_files routine +list_remote_files = functools.partial(general.list_remote_files, + supported_tags=remote_tags, + inst_code=madrigal_inst_code, + kindats=madrigal_tag, + two_digit_year_break=two_digit_year_break) + + +def load(fnames, tag='', inst_id=''): + """Load the Madrigal geoindex data. + + Parameters + ----------- + fnames : list + List of filenames + tag : str + tag name used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. (default='') + inst_id : str + Instrument ID used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. (default='') + + Returns + -------- + data : pds.DataFrame + Object containing geoindex data + meta : pysat.Meta + Object containing metadata such as column names and units + + Raises + ------ + ValueError + Unexpected time variable names + + """ + # Cycle through all the filenames, getting the desired start and stop times + fstart = None + fstop = None + for fname_date in fnames: + # Split the date from the filename + fname = fname_date[:-11] + fdate = dt.datetime.strptime(fname_date[-10:], '%Y-%m-%d') + fstop = fdate + + if fstart is None: + fstart = fdate + + fstop += dt.timedelta(days=1) + + # There is only one file for this Instrument + data, meta = general.load([fname], tag=tag, inst_id=inst_id) + + # Select the data for the desired time period + data = data[fstart:fstop] + + return data, meta diff --git a/pysatMadrigal/instruments/madrigal_pandas.py b/pysatMadrigal/instruments/madrigal_pandas.py index 33df67c3..574f09f3 100644 --- a/pysatMadrigal/instruments/madrigal_pandas.py +++ b/pysatMadrigal/instruments/madrigal_pandas.py @@ -75,7 +75,9 @@ name = 'pandas' tags = dict() pandas_format = True -excluded_tags = ['8105'] # Pandas-style data that requires special support + +# Pandas-style data that requires special support +excluded_tags = ['120', '180', '210', '211', '212', '8105'] # Assign only tags with pysat-compatible file format strings pandas_codes = general.known_madrigal_inst_codes(pandas_format=True) @@ -98,17 +100,21 @@ # ---------------------------------------------------------------------------- # Instrument test attributes - -# Need to sort out test day setting for unit testing, maybe through a remote -# function -tag_dates = {'120': dt.datetime(1963, 11, 27), '170': dt.datetime(1998, 7, 1), - '180': dt.datetime(2000, 1, 1), '210': dt.datetime(1950, 1, 1), - '211': dt.datetime(1978, 1, 1), '212': dt.datetime(1957, 1, 1), - '7800': dt.datetime(2009, 11, 10)} +tag_dates = {'170': dt.datetime(1998, 7, 1), '7800': dt.datetime(2009, 11, 10)} _test_dates = {'': {tag: tag_dates[tag] if tag in tag_dates.keys() else tag_dates['7800'] for tag in tags.keys()}} _test_download = {'': {tag: True for tag in tags.keys()}} +# TODO(#99): Remove when compliant with multi-day load tests +_new_tests = {'': {'7800': False}} + +# Set the clean warnings for testing +_clean_warn = {'': {tag: {clean_level: [('logger', 'WARN', + "can't support instrument-specific", + clean_level)] + for clean_level in ['clean', 'dusty', 'dirty']} + for tag in inst_ids['']}} + # ---------------------------------------------------------------------------- # Instrument methods @@ -235,27 +241,25 @@ def list_files(tag, inst_id, data_path, kindat='', format_str=None, return out -def download(date_array, tag='', inst_id='', data_path=None, user=None, - password=None, file_type='hdf5', kindat=''): +def download(date_array, tag, inst_id, data_path, user=None, password=None, + file_type='hdf5', kindat=''): """Download data from Madrigal. Parameters ---------- date_array : array-like - list of datetimes to download data for. The sequence of dates need not + List of datetimes to download data for. The sequence of dates need not be contiguous. tag : str - Madrigal Instrument code cast as a string. (default='') + Madrigal Instrument code cast as a string. inst_id : str - Satellite ID string identifier used for particular dataset. (default='') + Instrument ID used for particular dataset. data_path : str - Path to directory to download data to. (default=None) - user : str - User string input used for download. Provided by user and passed via - pysat. If an account is required for dowloads this routine here must - error if user not supplied. (default=None) - password : str - Password for data download. (default=None) + Path to directory to download data to. + user : str or NoneType + User name, raises an error if user not supplied. (default=None) + password : str or NoneType + User email, raises an error if not supplied. (default=None) file_type : str File format for Madrigal data. (default='hdf5') kindat : str @@ -263,8 +267,13 @@ def download(date_array, tag='', inst_id='', data_path=None, user=None, instrument. May be a single value, blank (all), or a comma-delimited list. (defaut='') - Notes - ----- + Raises + ------ + ValueError + If user or password are not supplied + + Note + ---- The user's names should be provided in field user. Maria Goeppert Mayer should be entered as "Maria Goeppert Mayer" diff --git a/pysatMadrigal/instruments/methods/dmsp.py b/pysatMadrigal/instruments/methods/dmsp.py index 37bab141..0eb4c56e 100644 --- a/pysatMadrigal/instruments/methods/dmsp.py +++ b/pysatMadrigal/instruments/methods/dmsp.py @@ -1,9 +1,12 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- -# -*- coding: utf-8 -*- """Methods supporting the Defense Meteorological Satellite Program (DMSP).""" import numpy as np @@ -31,7 +34,20 @@ def references(name): 'SSIES-3) on Spacecraft of the Defense', 'Meteorological Satellite Program (Air Force', 'Phillips Laboratory, Hanscom AFB, MA, 1994),', - 'Vol. 1, p. 25.'))} + 'Vol. 1, p. 25.')), + 'ssj': ''.join(['Gussenhoven, M. S., D. A. Hardy and W. J. Burke, ', + 'DMSP/F2 electron observations of equatorward ', + 'auroral boundaries and their relationship to ', + 'magnetospheric electric fields, J. Geophys. Res.,', + ' 86, 768-778, 1981.\nGussenhoven, M. S., D. A. ', + 'Hardy and N. Heinemann, Systematics of the ', + 'equatorward diffuse auroral boundary, J. Geophys.', + ' Res., 88, 5692-5708, 1983.\nHardy, D. A., E. ', + 'G. Holeman, W. J. Burke, L. C. Gentile and K. H. ', + 'Bounar (2008), Probability distributions of ', + 'electron precipitation at high magnetic latitudes', + ', Journal of Geophysical Research, Volume 113, ', + 'Issue A6, doi10.1029/2007JA012746.'])} return refs[name] diff --git a/pysatMadrigal/instruments/methods/general.py b/pysatMadrigal/instruments/methods/general.py index 2751b260..4ca23bda 100644 --- a/pysatMadrigal/instruments/methods/general.py +++ b/pysatMadrigal/instruments/methods/general.py @@ -1,9 +1,12 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- -# -*- coding: utf-8 -*-. """General routines for integrating CEDAR Madrigal instruments into pysat.""" import datetime as dt @@ -19,7 +22,6 @@ from madrigalWeb import madrigalWeb -logger = pysat.logger file_types = {'hdf5': 'hdf5', 'netCDF4': 'netCDF4', 'simple': 'simple.gz'} @@ -481,13 +483,239 @@ def madrigal_file_format_str(inst_code, strict=False, verbose=True): if strict: raise ValueError(msg) elif verbose: - logger.warning(msg) + pysat.logger.warning(msg) fstr += "{file_type}" return fstr +def sort_file_formats(fnames): + """Separate filenames by file format type. + + Parameters + ---------- + fnames : array-like + Iterable of filename strings, full path, to data files to be loaded. + This input is nominally provided by pysat itself. + + Returns + ------- + load_file_types : dict + A dictionary with file types as keys and a list of filenames for + each file type. + + """ + # Sort the files by file format type + load_file_types = {ftype: [] for ftype in file_types.keys()} + for fname in fnames: + for ftype in file_types.keys(): + if fname.find(ftype) > 0: + load_file_types[ftype].append(fname) + break + + if fname not in load_file_types[ftype]: + # Raise a logger warning if a file with an unknown extension + # is encountered + pysat.logger.warning( + 'file with unknown file type: {:}'.format(fname)) + + return load_file_types + + +def update_meta_with_hdf5(file_ptr, meta): + """Get meta data from a Madrigal HDF5 file. + + Parameters + ---------- + file_ptr : h5py._hl.files.File + Pointer to an open HDF5 file + meta : pysat.Meta + Existing Meta class to be updated + + Returns + ------- + file_labels : list + List of metadata available + + """ + # Get the Madrigal metadata + file_meta = file_ptr['Metadata']['Data Parameters'] + file_labels = list() + + # Load available info into pysat.Meta if not already present + for item in file_meta: + name_string = item[0].decode('UTF-8') + unit_string = item[3].decode('UTF-8') + desc_string = item[1].decode('UTF-8') + file_labels.append(name_string) + + # Only update metadata if necessary + if name_string.lower() not in meta: + meta[name_string.lower()] = {meta.labels.name: name_string, + meta.labels.units: unit_string, + meta.labels.desc: desc_string} + + # Add additional metadata notes. Custom attributes attached to meta are + # attached to the MetaHeader object later + for key in file_ptr['Metadata']: + if key != 'Data Parameters': + setattr(meta, key.replace(' ', '_'), file_ptr['Metadata'][key][:]) + + return file_labels + + +def build_madrigal_datetime_index(mad_data): + """Create a datetime index using standard Madrigal variables. + + Parameters + ---------- + mad_data : pds.DataFrame + Madrigal data, expects time variables 'year', 'month', 'day', 'hour', + 'min', and 'sec' + + Returns + ------- + data_time : + Datetime index for use by pysat + + Raises + ------ + ValueError + If expected time variables are missing + + """ + # Set the standard time keys + time_keys = np.array(['year', 'month', 'day', 'hour', 'min', 'sec']) + + # Construct datetime index from times + if not np.all([key in mad_data.columns for key in time_keys]): + time_keys = [key for key in time_keys if key not in mad_data.columns] + raise ValueError(' '.join(["unable to construct time index, missing ", + repr(time_keys)])) + + # Get the UT seconds of day and build the datetime index + uts = 3600.0 * mad_data.loc[:, 'hour'] + 60.0 * mad_data.loc[:, 'min'] \ + + mad_data.loc[:, 'sec'] + data_time = pysat.utils.time.create_datetime_index( + year=mad_data.loc[:, 'year'], month=mad_data.loc[:, 'month'], + day=mad_data.loc[:, 'day'], uts=uts) + + return data_time + + +def convert_pandas_to_xarray(xarray_coords, data, time_ind): + """Convert Madrigal HDF5/simple data from pandas to xarray. + + Parameters + ---------- + xarray_coords : list or NoneType + List of keywords to use as coordinates if xarray output is desired + instead of a Pandas DataFrame. Can build an xarray Dataset + that have different coordinate dimensions by providing a dict + inside the list instead of coordinate variable name strings. Each dict + will have a tuple of coordinates as the key and a list of variable + strings as the value. Empty list if None. For example, + xarray_coords=[{('time',): ['year', 'doy'], + ('time', 'gdalt'): ['data1', 'data2']}]. (default=None) + data : pds.DataFrame + Data to be converted into the xarray format + time_ind : pds.DatetimeIndex or NoneType + Time index for the data or None for no time index + + Returns + ------- + data : xr.Dataset + Data in the dataset format. + + """ + # If a list was provided, recast as a dict and grab the data columns + if not isinstance(xarray_coords, dict): + xarray_coords = {tuple(xarray_coords): [col for col in data.columns + if col not in xarray_coords]} + + # Determine the order in which the keys should be processed: + # Greatest to least number of dimensions + len_dict = {len(xcoords): xcoords for xcoords in xarray_coords.keys()} + coord_order = [len_dict[xkey] for xkey in sorted( + [lkey for lkey in len_dict.keys()], reverse=True)] + + # Append time to the data frame, if provided + if time_ind is not None: + data = data.assign(time=pds.Series(time_ind, index=data.index)) + + # Cycle through each of the coordinate dimensions + xdatasets = list() + for xcoords in coord_order: + if data.empty: + break + elif not np.all([xkey.lower() in data.columns for xkey in xcoords]): + raise ValueError(''.join(['unknown coordinate key in [', + repr(xcoords), '], use only: ', + repr(data.columns)])) + elif not np.all([xkey.lower() in data.columns + for xkey in xarray_coords[xcoords]]): + good_ind = [i for i, xkey in enumerate(xarray_coords[xcoords]) + if xkey.lower() in data.columns] + + if len(good_ind) == 0: + raise ValueError('All data variables {:} are unknown.'.format( + xarray_coords[xcoords])) + elif len(good_ind) < len(xarray_coords[xcoords]): + # Remove the coordinates that aren't present. + temp = np.array(xarray_coords[xcoords])[good_ind] + + # Warn user, some of this may be due to a file format change. + bad_ind = [i for i in range(len(xarray_coords[xcoords])) + if i not in good_ind] + pysat.logger.warning( + 'unknown data variable(s) {:}, using only: {:}'.format( + np.array(xarray_coords[xcoords])[bad_ind], temp)) + + # Assign good data as a list. + xarray_coords[xcoords] = list(temp) + + # Select the desired data values + sel_data = data[list(xcoords) + xarray_coords[xcoords]] + + # Remove duplicates before indexing, to ensure data with the same values + # at different locations are kept + sel_data = sel_data.drop_duplicates() + + # Set the indices + sel_data = sel_data.set_index(list(xcoords)) + + # Recast as an xarray + xdatasets.append(sel_data.to_xarray()) + + # Get the necessary information to test the data + lcols = data.columns + len_data = len(lcols) + + # Merge all of the datasets + if len(xdatasets) > 0: + data = xr.merge(xdatasets) + test_variables = [xkey for xkey in data.variables.keys()] + ltest = len(test_variables) + + # Test to see that all data was retrieved + if ltest != len_data: + if ltest < len_data: + estr = 'missing: {:}'.format(' '.join([ + dvar for dvar in lcols if dvar not in test_variables])) + else: + estr = 'have extra: {:}'.format(' '.join([ + tvar for tvar in test_variables if tvar not in lcols])) + raise ValueError(''.join([ + 'coordinates not supplied for all data columns', + ': {:d} != {:d}; '.format(ltest, len_data), estr])) + else: + # Return an empty object + data = xr.Dataset() + + return data + + def load(fnames, tag='', inst_id='', xarray_coords=None): """Load data from Madrigal into Pandas or XArray. @@ -534,13 +762,8 @@ def load(fnames, tag='', inst_id='', xarray_coords=None): for direct user interaction. """ - # Test the file formats - load_file_types = {ftype: [] for ftype in file_types.keys()} - for fname in fnames: - for ftype in file_types.keys(): - if fname.find(ftype) > 0: - load_file_types[ftype].append(fname) - break + # Test and sort the file formats + load_file_types = sort_file_formats(fnames) # Initialize xarray coordinates, if needed if xarray_coords is None: @@ -637,30 +860,10 @@ def load(fnames, tag='', inst_id='', xarray_coords=None): # Open the specified file and get the data and metadata filed = h5py.File(fname, 'r') file_data = filed['Data']['Table Layout'] - file_meta = filed['Metadata']['Data Parameters'] - # Load available info into pysat.Meta if this is the first file + new_labels = update_meta_with_hdf5(filed, meta) if len(labels) == 0: - for item in file_meta: - name_string = item[0].decode('UTF-8') - unit_string = item[3].decode('UTF-8') - desc_string = item[1].decode('UTF-8') - labels.append(name_string) - - # Only update metadata if necessary - if name_string.lower() not in meta: - meta[name_string.lower()] = { - meta.labels.name: name_string, - meta.labels.units: unit_string, - meta.labels.desc: desc_string} - - # Add additional metadata notes. Custom attributes attached to - # meta are attached to corresponding Instrument object when - # pysat receives data and meta from this routine - for key in filed['Metadata']: - if key != 'Data Parameters': - setattr(meta, key.replace(' ', '_'), - filed['Metadata'][key][:]) + labels = new_labels # Load data into frame, with labels from metadata ldata = pds.DataFrame.from_records(file_data, columns=labels) @@ -671,109 +874,11 @@ def load(fnames, tag='', inst_id='', xarray_coords=None): # Extended processing is the same for simple and HDF5 files # # Construct datetime index from times - time_keys = np.array(['year', 'month', 'day', 'hour', 'min', 'sec']) - if not np.all([key in ldata.columns for key in time_keys]): - time_keys = [key for key in time_keys - if key not in ldata.columns] - raise ValueError(' '.join(["unable to construct time index, ", - "missing {:}".format(time_keys)])) - - uts = 3600.0 * ldata.loc[:, 'hour'] + 60.0 * ldata.loc[:, 'min'] \ - + ldata.loc[:, 'sec'] - time = pysat.utils.time.create_datetime_index( - year=ldata.loc[:, 'year'], month=ldata.loc[:, 'month'], - day=ldata.loc[:, 'day'], uts=uts) + time = build_madrigal_datetime_index(ldata) # Declare index or recast as xarray if coord_len > 0: - # If a list was provided, recast as a dict and grab the data - # columns - if not isinstance(xarray_coords, dict): - xarray_coords = {tuple(xarray_coords): - [col for col in ldata.columns - if col not in xarray_coords]} - - # Determine the order in which the keys should be processed: - # Greatest to least number of dimensions - len_dict = {len(xcoords): xcoords - for xcoords in xarray_coords.keys()} - coord_order = [len_dict[xkey] for xkey in sorted( - [lkey for lkey in len_dict.keys()], reverse=True)] - - # Append time to the data frame - ldata = ldata.assign(time=pds.Series(time, index=ldata.index)) - - # Cycle through each of the coordinate dimensions - xdatasets = list() - for xcoords in coord_order: - if not np.all([xkey.lower() in ldata.columns - for xkey in xcoords]): - raise ValueError(''.join(['unknown coordinate key ', - 'in [{:}'.format(xcoords), - '], use only: {:}'.format( - ldata.columns)])) - if not np.all([xkey.lower() in ldata.columns - for xkey in xarray_coords[xcoords]]): - good_ind = [ - i for i, xkey in enumerate(xarray_coords[xcoords]) - if xkey.lower() in ldata.columns] - - if len(good_ind) == 0: - raise ValueError(''.join([ - 'All data variables {:} are unknown.'.format( - xarray_coords[xcoords])])) - elif len(good_ind) < len(xarray_coords[xcoords]): - # Remove the coordinates that aren't present. - temp = np.array(xarray_coords[xcoords])[good_ind] - - # Warn user, some of this may be due to a file - # format update or change. - bad_ind = [i for i in - range(len(xarray_coords[xcoords])) - if i not in good_ind] - logger.warning(''.join([ - 'unknown data variable(s) {:}, '.format( - np.array(xarray_coords[xcoords])[bad_ind]), - 'using only: {:}'.format(temp)])) - - # Assign good data as a list. - xarray_coords[xcoords] = list(temp) - - # Select the desired data values - sel_data = ldata[list(xcoords) + xarray_coords[xcoords]] - - # Remove duplicates before indexing, to ensure data with - # the same values at different locations are kept - sel_data = sel_data.drop_duplicates() - - # Set the indices - sel_data = sel_data.set_index(list(xcoords)) - - # Recast as an xarray - xdatasets.append(sel_data.to_xarray()) - - # Get the necessary information to test the data - lcols = ldata.columns - len_data = len(lcols) - - # Merge all of the datasets - ldata = xr.merge(xdatasets) - test_variables = [xkey for xkey in ldata.variables.keys()] - ltest = len(test_variables) - - # Test to see that all data was retrieved - if ltest != len_data: - if ltest < len_data: - estr = 'missing: {:}'.format( - ' '.join([dvar for dvar in lcols - if dvar not in test_variables])) - else: - estr = 'have extra: {:}'.format( - ' '.join([tvar for tvar in test_variables - if tvar not in lcols])) - raise ValueError(''.join([ - 'coordinates not supplied for all data columns', - ': {:d} != {:d}; '.format(ltest, len_data), estr])) + ldata = convert_pandas_to_xarray(xarray_coords, ldata, time) else: # Set the index to time ldata.index = time @@ -781,10 +886,10 @@ def load(fnames, tag='', inst_id='', xarray_coords=None): # Raise a logging warning if there are duplicate times. This # means the data should be stored as an xarray Dataset if np.any(time.duplicated()): - logger.warning(''.join(["duplicated time indices, ", - "consider specifing additional", - " coordinates and storing the ", - "data as an xarray Dataset"])) + pysat.logger.warning(''.join([ + "duplicated time indices, consider specifing ", + "additional coordinates and storing the data as an ", + "xarray Dataset"])) # Compile a list of the data objects fdata.append(ldata) @@ -819,7 +924,7 @@ def load(fnames, tag='', inst_id='', xarray_coords=None): def download(date_array, inst_code=None, kindat=None, data_path=None, user=None, password=None, url="http://cedar.openmadrigal.org", - file_type='hdf5'): + file_type='hdf5', **kwargs): """Download data from Madrigal. Parameters @@ -847,6 +952,9 @@ def download(date_array, inst_code=None, kindat=None, data_path=None, File format for Madrigal data. Load routines currently only accepts 'hdf5' and 'netCDF4', but any of the Madrigal options may be used here. (default='hdf5') + **kwargs : dict + Additional kwarg catch, allows general use when tag/inst_id are not + needed for a given instrument. Raises ------ @@ -878,12 +986,12 @@ def download(date_array, inst_code=None, kindat=None, data_path=None, start = date_array.min() stop = date_array.max() if start == stop: - stop += dt.timedelta(days=1) + stop = date_array.shift().max() # Initialize the connection to Madrigal - logger.info('Connecting to Madrigal') + pysat.logger.info('Connecting to Madrigal') web_data = madrigalWeb.MadrigalData(url) - logger.info('Connection established.') + pysat.logger.info('Connection established.') files = get_remote_filenames(inst_code=inst_code, kindat=kindat, user=user, password=password, @@ -902,12 +1010,12 @@ def download(date_array, inst_code=None, kindat=None, data_path=None, if not os.path.isfile(local_file): fstr = ''.join(('Downloading data for ', local_file)) - logger.info(fstr) + pysat.logger.info(fstr) web_data.downloadFile(mad_file.name, local_file, user, password, "pysat", format=file_type) else: estr = ''.join((local_file, ' already exists. Skipping.')) - logger.info(estr) + pysat.logger.info(estr) return @@ -1015,7 +1123,7 @@ def get_remote_filenames(inst_code=None, kindat='', user=None, password=None, files = list() istr = "Found {:d} Madrigal experiments between {:s} and {:s}".format( len(exp_list), start.strftime('%d %B %Y'), stop.strftime('%d %B %Y')) - logger.info(istr) + pysat.logger.info(istr) for exp in exp_list: if good_exp(exp, date_array=date_array): file_list = web_data.getExperimentFiles(exp.id) @@ -1157,13 +1265,6 @@ def list_remote_files(tag, inst_id, inst_code=None, kindats=None, user=None, format_str = supported_tags[inst_id][tag] kindat = kindats[inst_id][tag] - # TODO(#1022, pysat) Note default of `pysat.Instrument.remote_file_list` - # for start and stop is None. Setting defaults needed for Madrigal. - if start is None: - start = dt.datetime(1900, 1, 1) - if stop is None: - stop = dt.datetime.utcnow() - # Retrieve remote file experiment list files = get_remote_filenames(inst_code=inst_code, kindat=kindat, user=user, password=password, url=url, start=start, @@ -1179,7 +1280,7 @@ def list_remote_files(tag, inst_id, inst_code=None, kindats=None, user=None, format_str = format_str.replace('.hdf5', '.h5') # Parse these filenames to grab out the ones we want - logger.info("Parsing filenames") + pysat.logger.info("Parsing filenames") if format_str.find('*') < 0: stored = pysat.utils.files.parse_fixed_width_filenames(filenames, format_str) @@ -1188,7 +1289,7 @@ def list_remote_files(tag, inst_id, inst_code=None, kindats=None, user=None, format_str, '.') # Process the parsed filenames and return a properly formatted Series - logger.info("Processing filenames") + pysat.logger.info("Processing filenames") return pysat.utils.files.process_parsed_filenames(stored, two_digit_year_break) diff --git a/pysatMadrigal/instruments/methods/gnss.py b/pysatMadrigal/instruments/methods/gnss.py index 2aa3df23..0283a182 100644 --- a/pysatMadrigal/instruments/methods/gnss.py +++ b/pysatMadrigal/instruments/methods/gnss.py @@ -1,11 +1,23 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- -# -*- coding: utf-8 -*- """Methods supporting the Global Navigation Satellite System platform.""" +import datetime as dt +import h5py +import numpy as np +import pandas as pds + +import pysat + +from pysatMadrigal.instruments.methods import general + def acknowledgements(name): """Provide the acknowledgements for different GNSS instruments. @@ -63,6 +75,253 @@ def references(name): Suggested Instrument reference(s) """ - refs = {'tec': "Rideout and Coster (2006) doi:10.1007/s10291-006-0029-5"} + refs = {'tec': "\n".join([ + "Rideout and Coster (2006) doi:10.1007/s10291-006-0029-5", + "Vierinen et al., (2016) doi:10.5194/amt-9-1303-2016"])} return refs[name] + + +def load_vtec(fnames): + """Load the GNSS vertical TEC data. + + Parameters + ---------- + fnames : list + List of filenames + + Returns + ------- + data : xarray.Dataset + Object containing satellite data + meta : pysat.Meta + Object containing metadata such as column names and units + lat_key : list + Latitude key names + lon_key : list + Longitude key names + + """ + # Define the xarray coordinate dimensions and lat/lon keys + xcoords = {('time', 'gdlat', 'glon', 'kindat', 'kinst'): + ['gdalt', 'tec', 'dtec'], + ('time', ): ['year', 'month', 'day', 'hour', 'min', + 'sec', 'ut1_unix', 'ut2_unix', 'recno']} + lat_keys = ['gdlat'] + lon_keys = ['glon'] + + # Load the specified data + data, meta = general.load(fnames, 'vtec', '', xarray_coords=xcoords) + + # Fix the units for tec and dtec + meta['tec'] = {meta.labels.units: 'TECU', meta.labels.min_val: 0.0, + meta.labels.max_val: np.nan} + meta['dtec'] = {meta.labels.units: 'TECU', meta.labels.min_val: 0.0, + meta.labels.max_val: np.nan} + + # Get the maximum and minimum values for altiutde, along with lat/lon keys + meta['gdalt'] = {meta.labels.min_val: 0.0, meta.labels.max_val: np.nan} + + return data, meta, lat_keys, lon_keys + + +def load_site(fnames): + """Load the GNSS TEC site data. + + Parameters + ---------- + fnames : list + List of filenames + + Returns + ------- + data : xarray.Dataset + Object containing satellite data + meta : pysat.Meta + Object containing metadata such as column names and units + lat_keys : list + Latitude key names + lon_keys : list + Longitude key names + + """ + # Define the xarray coordinate dimensions and lat/lon keys + xcoords = {('time', 'gps_site'): ['gdlatr', 'gdlonr']} + lat_keys = ['gdlatr'] + lon_keys = ['gdlonr'] + + # Load the specified data + data, meta = general.load(fnames, 'site', '', xarray_coords=xcoords) + + return data, meta, lat_keys, lon_keys + + +def load_los(fnames, los_method, los_value, gnss_network='all'): + """Load the GNSS slant TEC data. + + Parameters + ---------- + fnames : list + List of filenames + los_method : str + For 'los' tag only, load data for a unique GNSS receiver site ('site') + or at a unique time ('time') + los_value : str or dt.datetime + For 'los' tag only, load data at this unique site or time + gnss_network : bool + Limit data by GNSS network, if not 'all'. Currently supports 'all', + 'gps', and 'glonass' (default='all') + + Returns + ------- + data : xarray.Dataset + Object containing satellite data + meta : pysat.Meta + Object containing metadata such as column names and units + lat_keys : list + Latitude key names + lon_keys : list + Longitude key names + + """ + # Define the xarray coordinate dimensions and lat/lon keys + xcoords = {('time', 'gps_site', 'sat_id', 'kindat', 'kinst'): + ['pierce_alt', 'los_tec', 'dlos_tec', 'tec', 'azm', 'elm', + 'gdlat', 'glon', 'rec_bias', 'drec_bias'], + ('time', ): ['year', 'month', 'day', 'hour', 'min', + 'sec', 'ut1_unix', 'ut2_unix', 'recno'], + ('time', 'sat_id', ): ['gnss_type'], + ('time', 'gps_site', ): ['gdlatr', 'gdlonr']} + lat_keys = ['gdlatr', 'gdlat'] + lon_keys = ['gdlonr', 'glon'] + + # Sort and test the desired filenames by file format + load_file_types = general.sort_file_formats(fnames) + + for ftype in load_file_types.keys(): + if ftype != 'hdf5' and len(load_file_types[ftype]) > 0: + pysat.logger.warning( + 'unable to load non-HDF5 slant TEC files: {:}'.format( + load_file_types[ftype])) + + # Initalize the meta data + meta = pysat.Meta() + + # Load the data using the desired method + if los_method.lower() == 'site': + sel_key = 'gps_site' + + # Convert the site to bytes + los_value = np.bytes_(los_value) + elif los_method.lower() == 'time': + sel_key = 'ut1_unix' + + # Convert the input datetime to UNIX seconds + los_value = (los_value - dt.datetime(1970, 1, 1)).total_seconds() + else: + raise ValueError('unsupported selection type: {:}'.format(los_method)) + + # Load the data by desired method + data = list() + labels = list() + for fname in load_file_types['hdf5']: + with h5py.File(fname, 'r') as fin: + sel_arr = fin['Data']['Table Layout'][sel_key] + sel_mask = sel_arr == los_value + + if gnss_network.lower() != 'all': + # Redefine the selection mask to include network as well + gnss_val = '{:8s}'.format(gnss_network.upper()) + try: + net_arr = fin['Data']['Table Layout']['gnss_type'][sel_mask] + sel_mask[sel_mask] = net_arr.astype(str) == gnss_val + except ValueError: + # If the 'gnss_type' is not available, all data is GPS + if gnss_network.lower() != 'gps': + sel_mask[sel_mask] = False + + # Save the output for the desired slice + if sel_mask.any(): + data.extend(list(fin['Data']['Table Layout'][sel_mask])) + + # Save the meta data + labels = general.update_meta_with_hdf5(fin, meta) + + # If this is time selection, only need to load from one file + if len(data) > 0: + break + + # Load data into frame, with labels from metadata + data = pds.DataFrame.from_records(data, columns=labels) + + if not data.empty: + # Enforce lowercase variable names + data.columns = [item.lower() for item in data.columns] + + # Convert the data to an xarray Dataset + time_ind = general.build_madrigal_datetime_index(data) + else: + time_ind = None + + # Convert the output to xarray + data = general.convert_pandas_to_xarray(xcoords, data, time_ind) + + return data, meta, lat_keys, lon_keys + + +def get_los_receiver_sites(los_fnames): + """Retrieve an array of unique receiver names for the desired LoS files. + + Parameters + ---------- + los_fnames : list + List of filenames + + Returns + ------- + sites : np.array + Array of strings containing GNSS receiver names with data in the files + + """ + los_fnames = pysat.utils.listify(los_fnames) + sites = list() + + # Get all of the potential sites + for fname in los_fnames: + with h5py.File(fname, 'r') as fin: + site_arr = fin['Data']['Table Layout']['gps_site'] + sites.extend(list(site_arr.astype(str))) + + # Find the unique sites + sites = np.unique(sites) + return sites + + +def get_los_times(los_fnames): + """Retrieve an array of unique times for the desired LoS files. + + Parameters + ---------- + los_fnames : list + List of filenames + + Returns + ------- + all_times : np.array + Array of datetime objects with data in the files + + """ + los_fnames = pysat.utils.listify(los_fnames) + all_times = list() + + # Get all of the potential sites + for fname in los_fnames: + with h5py.File(fname, 'r') as fin: + time_arr = fin['Data']['Table Layout']['ut1_unix'] + + # Convert from unix time to a datetime object + all_times.extend([dt.datetime(1970, 1, 1) + + dt.timedelta(seconds=int(time_val)) + for time_val in time_arr]) + + return all_times diff --git a/pysatMadrigal/instruments/methods/jro.py b/pysatMadrigal/instruments/methods/jro.py index 880f7bbf..4d8dcf3e 100644 --- a/pysatMadrigal/instruments/methods/jro.py +++ b/pysatMadrigal/instruments/methods/jro.py @@ -1,9 +1,12 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- -# -*- coding: utf-8 -*- """Methods supporting the Jicamarca Radio Observatory (JRO) platform.""" import numpy as np diff --git a/pysatMadrigal/instruments/ngdc_ae.py b/pysatMadrigal/instruments/ngdc_ae.py new file mode 100644 index 00000000..e711fe55 --- /dev/null +++ b/pysatMadrigal/instruments/ngdc_ae.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. +# ---------------------------------------------------------------------------- +"""Supports access to taped data of AE from the World Data Center A (Boulder). + +Properties +---------- +platform + 'ngdc' +name + 'ae' +tag + None supported +inst_id + None supported + +Note +---- +Please provide name (user) and email (password) when downloading data with this +routine. + +Warnings +-------- +The entire data set (1 Jan 1978 through 31 Dec 1987) is provided in a single +file on Madrigal. + +Examples +-------- +:: + + + import datetime as dt + import pysat + import pysatMadrigal as py_mad + + # Download AE data from Madrigal + aei = pysat.Instrument(inst_module=py_mad.instruments.ngdc_ae) + aei.download(start=py_mad.instruments.ngdc_ae.madrigal_start, + user='Firstname+Lastname', password='email@address.com') + aei.load(date=dt.datetime(1981, 1, 1)) + +""" + +import datetime as dt +import functools +import numpy as np +import pandas as pds + +import h5py +import pysat + +from pysatMadrigal.instruments.methods import general + +# ---------------------------------------------------------------------------- +# Instrument attributes + +platform = 'ngdc' +name = 'ae' +tags = {'': ''} +inst_ids = {'': list(tags.keys())} +pandas_format = True + +# Madrigal tags and limits +madrigal_inst_code = 211 +madrigal_tag = {'': {'': "30008"}} +madrigal_start = dt.datetime(1978, 1, 1) +madrigal_end = dt.datetime(1988, 1, 1) + +# Local attributes +# +# Need a way to get the filename strings for a particular instrument unless +# wildcards start working +supported_tags = { + inst_id: {tag: general.madrigal_file_format_str(madrigal_inst_code, + verbose=False) + for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()} +remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5') + for kk in inst_ids[ss]} for ss in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument test attributes + +_test_dates = {inst_id: {tag: madrigal_start for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +_test_download = {inst_id: {tag: True for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +_clean_warn = { + inst_id: {tag: {'dusty': [('logger', 'WARN', + "'dusty' and 'clean' levels are the same", + 'dusty')]} + for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument methods + + +def init(self): + """Initialize the Instrument object in support of Madrigal access.""" + # Set the standard pysat attributes + self.acknowledgements = general.cedar_rules() + self.references = ''.join(['Davis, T. Neil and Masahisa Sugiura. “Auroral', + ' electrojet activity index AE and its ', + 'universal time variations.” Journal of ', + 'Geophysical Research 71 (1966): 785-801.']) + + # Remind the user of the Rules of the Road + pysat.logger.info(self.acknowledgements) + return + + +def clean(self): + """Raise warning that cleaning is not possible for general data. + + Note + ---- + Supports 'clean', 'dusty', 'dirty' in the sense that all + levels use the flag to clean data the same way. + 'None' is also supported as it signifies no cleaning. + + Routine is called by pysat, and not by the end user directly. + + """ + + warned = False + for dvar in self.variables: + if self.meta[dvar, self.meta.labels.units].find('nT') >= 0: + # The 'clean', 'dusty', and 'dirty' levels all replace the missing + # parameter value of -32766 with NaN + mask = self[dvar] == self.meta[dvar, self.meta.labels.fill_val] + self[dvar][mask] == np.nan + self.meta[dvar] = {self.meta.labels.fill_val: np.nan} + + if self.clean_level in ['clean', 'dusty']: + if self.clean_level == 'dusty' and not warned: + pysat.logger.warning( + "The NGDC AE 'dusty' and 'clean' levels are the same.") + warned = True + + # The 'clean' and 'dusty' levels replace the parameter error + # value of -32766 with NaN + self[dvar][self[dvar] == -32766] = np.nan + + return + + +# ---------------------------------------------------------------------------- +# Instrument functions +# +# Use the default Madrigal and pysat methods +file_cadence = madrigal_end - madrigal_start +two_digit_year_break = 50 + +# Set the download routine +download = functools.partial(general.download, + inst_code=str(madrigal_inst_code), + kindat=madrigal_tag[''][''], file_type='hdf5') + +# Set the list routine +list_files = functools.partial(general.list_files, + supported_tags=supported_tags, + file_cadence=file_cadence, + two_digit_year_break=two_digit_year_break) + +# Set list_remote_files routine +list_remote_files = functools.partial(general.list_remote_files, + supported_tags=remote_tags, + inst_code=madrigal_inst_code, + kindats=madrigal_tag, + two_digit_year_break=two_digit_year_break) + + +def load(fnames, tag='', inst_id=''): + """Load the NGDC AE data. + + Parameters + ----------- + fnames : list + List of filenames + tag : str + tag name used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. (default='') + inst_id : str + Instrument ID used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. (default='') + + Returns + -------- + data : pds.DataFrame + Object containing satellite data + meta : pysat.Meta + Object containing metadata such as column names and units + + Raises + ------ + ValueError + Unexpected time variable names + + """ + # Initialize the output + meta = pysat.Meta() + labels = [] + data = None + fill_val = -32767 + notes = "".join(["Assumed parameters error values are assigned a value ", + "of -32766 for clean levels of 'dirty' or 'none'"]) + + # Cycle through all the filenames, getting the desired start and stop times + fstart = None + fstop = None + for fname_date in fnames: + # Split the date from the filename + fname = fname_date[:-11] + fdate = dt.datetime.strptime(fname_date[-10:], '%Y-%m-%d') + fstop = fdate + + if fstart is None: + fstart = fdate + + fstop += dt.timedelta(days=1) + + # There is only one file for this Instrument + with h5py.File(fname, 'r') as filed: + file_data = filed['Data']['Table Layout'] + file_meta = filed['Metadata']['Data Parameters'] + + # Load available info into pysat.Meta if this is the first file + if len(labels) == 0: + for item in file_meta: + name_string = item[0].decode('UTF-8') + unit_string = item[3].decode('UTF-8') + desc_string = item[1].decode('UTF-8') + labels.append(name_string) + + # Only update metadata if necessary + if name_string.lower() not in meta: + meta_dict = {meta.labels.name: name_string, + meta.labels.units: unit_string, + meta.labels.desc: desc_string} + + if unit_string.find('nT') >= 0: + # Fill and error values only apply to index values + meta_dict[meta.labels.fill_val] = fill_val + meta_dict[meta.labels.notes] = notes + + meta[name_string.lower()] = meta_dict + + # Add additional metadata notes. Custom attributes attached to + # meta are attached to corresponding Instrument object when + # pysat receives data and meta from this routine + for key in filed['Metadata']: + if key != 'Data Parameters': + setattr(meta, key.replace(' ', '_'), filed['Metadata'][key][:]) + + # Extended processing is the same for simple and HDF5 files + # + # Construct datetime index from times + time_keys = np.array(['year', 'month', 'day', 'hour', 'hm', 'hmi']) + lower_labels = [ll.lower() for ll in labels] + time_keys = [key for key in time_keys if key not in lower_labels] + if len(time_keys) > 0: + raise ValueError(' '.join(["unable to construct time index, ", + "missing {:}".format(time_keys)])) + + # Get the date information + year = file_data[:]['year'] + month = file_data[:]['month'] + day = file_data[:]['day'] + fdate = pysat.utils.time.create_datetime_index(year=year, month=month, + day=day) + + # Get the data mask + dmask = (fdate >= fstart) & (fdate < fstop) + + # Construct the time index + hour = file_data[dmask]['hour'] + minute = (file_data[dmask]['hm'] / 100.0 - hour) * 100.0 + uts = 3600.0 * hour + 60.0 * minute + file_data[dmask]['hmi'] + + tindex = pysat.utils.time.create_datetime_index( + year=year[dmask], month=month[dmask], day=day[dmask], uts=uts) + + # Load the data into a pandas DataFrame + data = pds.DataFrame.from_records(file_data[dmask], columns=labels, + index=tindex) + + # Ensure that data is at least an empty Dataset + if data is None: + data = pds.DataFrame(dtype=np.float64) + + return data, meta diff --git a/pysatMadrigal/instruments/omni2_imf.py b/pysatMadrigal/instruments/omni2_imf.py new file mode 100644 index 00000000..5df6e661 --- /dev/null +++ b/pysatMadrigal/instruments/omni2_imf.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. +# ---------------------------------------------------------------------------- +"""Supports access to OMNI 2 IMF data archived at Madrigal. + +Properties +---------- +platform + 'omni2' +name + 'imf' +tag + None supported +inst_id + None supported + +Note +---- +Please provide name (user) and email (password) when downloading data with this +routine. + +Glenn Campbell and Bill Rideout completely rebuilt the Madrigal interplanetary +magnetic field data using data from: +ftp://nssdcftp.gsfc.nasa.gov/spacecraft_data/omni/. The old file had +originally come from Cedar and had gaps even in places where there was data +available. This new Madrigal file is based on the Omni 2 data set, described +at http://nssdc.gsfc.nasa.gov/omniweb/. (4 May 2004, brideout@haystack.mit.edu) + +The OMNI data may be directly downloaded using pysatNASA and is now described +at: https://omniweb.gsfc.nasa.gov/html/ow_data.html + +Warnings +-------- +The entire data set (27 Nov 1963 through 3 Jun 2019) is provided in a single +file on Madrigal. The download method will break this file up by year for +easier access. + +Examples +-------- +:: + + + import datetime as dt + import pysat + import pysatMadrigal as py_mad + + # Download IMF data from Madrigal + imf = pysat.Instrument(inst_module=py_mad.instruments.omni2_imf) + imf.download(start=py_mad.instruments.omni2_imf.madrigal_start, + user='Firstname+Lastname', password='email@address.com') + imf.load(date=dt.datetime(1981, 1, 1)) + +""" + +import datetime as dt +import functools + +import pysat + +from pysatMadrigal.instruments.methods import general + +# ---------------------------------------------------------------------------- +# Instrument attributes + +platform = 'omni2' +name = 'imf' +tags = {'': ''} +inst_ids = {'': list(tags.keys())} +pandas_format = True + +# Madrigal tags and limits +madrigal_inst_code = 120 +madrigal_tag = {'': {'': "30012"}} +madrigal_start = dt.datetime(1963, 11, 27) +madrigal_end = dt.datetime(2019, 6, 4) + +# Local attributes +# +# Need a way to get the filename strings for a particular instrument unless +# wildcards start working +supported_tags = { + inst_id: {tag: general.madrigal_file_format_str(madrigal_inst_code, + verbose=False) + for tag in inst_ids[inst_id]} for inst_id in inst_ids.keys()} +remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5') + for kk in inst_ids[ss]} for ss in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument test attributes + +_test_dates = {inst_id: {tag: madrigal_start for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +_test_download = {inst_id: {tag: True for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} +_clean_warn = {inst_id: {tag: {clvl: [('logger', 'WARN', + "No cleaning available", clvl)] + for clvl in ['clean', 'dusty', 'dirty']} + for tag in inst_ids[inst_id]} + for inst_id in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument methods + + +def init(self): + """Initialize the Instrument object in support of Madrigal access.""" + # Set the standard pysat attributes + self.acknowledgements = ''.join([general.cedar_rules(), '\nFor full ', + 'acknowledgement info, please see: ', + 'https://omniweb.gsfc.nasa.gov/html/', + 'citing.html']) + self.references = ' '.join(('J.H. King and N.E. Papitashvili, Solar', + 'wind spatial scales in and comparisons', + 'of hourly Wind and ACE plasma and', + 'magnetic field data, J. Geophys. Res.,', + 'Vol. 110, No. A2, A02209,', + '10.1029/2004JA010649.')) + + # Remind the user of the Rules of the Road + pysat.logger.info(self.acknowledgements) + return + + +def clean(self): + """Raise warning that cleaning is not needed for the OMNI2 data. + + Note + ---- + Supports 'clean', 'dusty', 'dirty' in the sense that it prints + a message noting there is no cleaning. + 'None' is also supported as it signifies no cleaning. + + Routine is called by pysat, and not by the end user directly. + + """ + pysat.logger.warning("No cleaning available for the collected Omni 2 IMF") + + return + + +# ---------------------------------------------------------------------------- +# Instrument functions +# +# Use the default Madrigal and pysat methods +file_cadence = madrigal_end - madrigal_start +two_digit_year_break = 50 + +# Set the download routine +download = functools.partial(general.download, + inst_code=str(madrigal_inst_code), + kindat=madrigal_tag['']['']) + +# Set the list routine +list_files = functools.partial(general.list_files, + supported_tags=supported_tags, + file_cadence=file_cadence, + two_digit_year_break=two_digit_year_break) + +# Set list_remote_files routine +list_remote_files = functools.partial(general.list_remote_files, + supported_tags=remote_tags, + inst_code=madrigal_inst_code, + kindats=madrigal_tag, + two_digit_year_break=two_digit_year_break) + + +def load(fnames, tag='', inst_id=''): + """Load the OMNI2 IMF data. + + Parameters + ----------- + fnames : list + List of filenames + tag : str + tag name used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. (default='') + inst_id : str + Instrument ID used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. (default='') + + Returns + -------- + data : pds.DataFrame + Object containing IMF data + meta : pysat.Meta + Object containing metadata such as column names and units + + Raises + ------ + ValueError + Unexpected time variable names + + """ + # Cycle through all the filenames, getting the desired start and stop times + fstart = None + fstop = None + for fname_date in fnames: + # Split the date from the filename + fname = fname_date[:-11] + fdate = dt.datetime.strptime(fname_date[-10:], '%Y-%m-%d') + fstop = fdate + + if fstart is None: + fstart = fdate + + fstop += dt.timedelta(days=1) + + # There is only one file for this Instrument + data, meta = general.load([fname], tag=tag, inst_id=inst_id) + + # Test to see if there is data beyond the expected file end + if data.index[-1] > madrigal_end: + pysat.logger.critical(''.join(['There is data beyond ', + '{:}'.format(madrigal_end), ' in the ', + 'OMNI2 IMF file, please notify the ', + 'pysatMadrigal developers so that they ', + 'can update this Instrument.'])) + + # Select the data for the desired time period + data = data[fstart:fstop] + + return data, meta diff --git a/pysatMadrigal/tests/test_instruments.py b/pysatMadrigal/tests/test_instruments.py index d3ed072c..20277db1 100644 --- a/pysatMadrigal/tests/test_instruments.py +++ b/pysatMadrigal/tests/test_instruments.py @@ -1,12 +1,22 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- """Unit tests for the Instruments.""" +import datetime as dt +import os +import pathlib + # Import the test classes from pysat +import pysat from pysat.tests.classes import cls_instrument_library as clslib +from pysat.utils.testing import eval_bad_input import pysatMadrigal @@ -34,3 +44,43 @@ class TestInstruments(clslib.InstLibTests): instrument test class. """ + + +class TestInstrumentLoadError(object): + """Class for unit testing errors raised when loading data.""" + + def setup_method(self): + """Run before every method to create a clean testing setup.""" + self.inst_kwargs = [{'inst_module': pysatMadrigal.instruments.gnss_tec, + 'tag': 'los', 'los_method': 'site'}] + self.load_time = dt.datetime(2001, 1, 1) + self.fake_file = '' + return + + def teardown_method(self): + """Run after every method to clean up previous testing.""" + if os.path.isfile(self.fake_file): + os.remove(self.fake_file) + + del self.inst_kwargs, self.load_time, self.fake_file + return + + def test_bad_los_value(self): + """Test ValueError when the `los_value` is omitted.""" + inst = pysat.Instrument(**self.inst_kwargs[0]) + + # Ensure a file is available + if self.load_time not in inst.files.files.keys(): + self.fake_file = os.path.join( + inst.files.data_path, + self.inst_kwargs[0]['inst_module'].supported_tags[inst.inst_id][ + inst.tag].format(file_type='hdf5').format( + year=self.load_time.year, month=self.load_time.month, + day=self.load_time.day, version=1)) + pysat.utils.files.check_and_make_path(inst.files.data_path) + pathlib.Path(self.fake_file).touch() + inst = pysat.Instrument(**self.inst_kwargs[0]) + + eval_bad_input(inst.load, ValueError, "must specify a valid", + input_kwargs={'date': self.load_time}) + return diff --git a/pysatMadrigal/tests/test_methods_dmsp.py b/pysatMadrigal/tests/test_methods_dmsp.py index cc0f1bc2..1d0efd7a 100644 --- a/pysatMadrigal/tests/test_methods_dmsp.py +++ b/pysatMadrigal/tests/test_methods_dmsp.py @@ -1,9 +1,13 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- -"""Test methods for `pysatMadrigal.instruments.methods.gnss`.""" +"""Test methods for `pysatMadrigal.instruments.methods.dmsp`.""" import logging import numpy as np diff --git a/pysatMadrigal/tests/test_methods_general.py b/pysatMadrigal/tests/test_methods_general.py index 5fc36eb4..dffee573 100644 --- a/pysatMadrigal/tests/test_methods_general.py +++ b/pysatMadrigal/tests/test_methods_general.py @@ -1,7 +1,11 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- """Unit tests for the general instrument methods.""" @@ -17,6 +21,7 @@ import netCDF4 as nc import pandas as pds import pysat +from pysat.utils.testing import eval_bad_input import pytest import xarray as xr @@ -42,6 +47,26 @@ def test_acknowledgements(self): assert self.out.find("CEDAR 'Rules of the Road'") >= 0 return + def test_sort_file_format(self, caplog): + """Test successful sorting of file names by extension.""" + # Get the output and raise the logging warning + with caplog.at_level(logging.WARN, logger='pysat'): + self.out = general.sort_file_formats(['test.hdf5', 'test.netCDF4', + 'test.simple.gz', 'test.bad']) + + # Evaluate the output + assert isinstance(self.out, dict) + pysat.utils.testing.assert_lists_equal(list(self.out.keys()), + ['hdf5', 'netCDF4', 'simple']) + + # Evaluate the logger warning + # TODO(#101) Use pysat warnings test + assert len(caplog.records) == 1, "unexpected number of warnings" + assert caplog.records[0].levelname == "WARNING" + assert caplog.records[0].message.find( + "file with unknown file type") >= 0 + return + @pytest.mark.parametrize("xarray_coords", [None, ["lat"]]) def test_empty_load(self, xarray_coords): """Test the general load function with no data files.""" @@ -244,11 +269,9 @@ def test_check_madrigal_params_no_code(self, inst_code): del self.kwargs['kindats'], self.kwargs['supported_tags'] self.kwargs['inst_code'] = inst_code - # Get the expected error message and evaluate it - with pytest.raises(ValueError) as verr: - general._check_madrigal_params(**self.kwargs) - - assert str(verr).find("Unknown Madrigal instrument code") >= 0 + eval_bad_input(general._check_madrigal_params, ValueError, + "Unknown Madrigal instrument code", + input_kwargs=self.kwargs) return @pytest.mark.parametrize("bad_val", [None, 17, False, 12.34]) @@ -269,10 +292,9 @@ def test_check_madrigal_params_bad_input(self, bad_val, test_key): self.kwargs[test_key] = bad_val # Get the expected error message and evaluate it - with pytest.raises(ValueError) as verr: - general._check_madrigal_params(**self.kwargs) - - assert str(verr).find("The madrigal database requries a username") >= 0 + eval_bad_input(general._check_madrigal_params, ValueError, + "The madrigal database requries a username", + input_kwargs=self.kwargs) return @pytest.mark.parametrize("del_val", ['kindats', 'supported_tags']) @@ -289,19 +311,17 @@ def test_list_remote_files_bad_kwargs(self, del_val): del self.kwargs[del_val] # Get the expected error message and evaluate it - with pytest.raises(ValueError) as verr: - general.list_remote_files('testing', 'tag', **self.kwargs) - - assert str(verr).find("Must supply supported_tags and kindats") >= 0 + eval_bad_input(general.list_remote_files, ValueError, + "Must supply supported_tags and kindats", + input_args=['testing', 'tag'], input_kwargs=self.kwargs) return def test_list_remote_files_bad_tag_inst_id(self): """Test that an error is thrown if None is passed through.""" # Get the expected error message and evaluate it - with pytest.raises(KeyError) as kerr: - general.list_remote_files('testing', 'not_tag', **self.kwargs) - - assert str(kerr).find('not_tag') >= 0 + eval_bad_input(general.list_remote_files, KeyError, "not_tag", + input_args=['testing', 'not_tag'], + input_kwargs=self.kwargs) return @pytest.mark.parametrize("in_key, in_val, test_verr", [ @@ -323,10 +343,9 @@ def test_download_valueerror(self, in_key, in_val, test_verr): del self.kwargs['supported_tags'], self.kwargs['kindats'] self.kwargs[in_key] = in_val - with pytest.raises(ValueError) as verr: - general.download([], **self.kwargs) - - assert str(verr).find(test_verr) >= 0 + # Get the expected error message and evaluate it + eval_bad_input(general.download, ValueError, test_verr, + input_args=[[]], input_kwargs=self.kwargs) return def test_get_remote_filenames_bad_date_array(self): @@ -334,10 +353,20 @@ def test_get_remote_filenames_bad_date_array(self): del self.kwargs['supported_tags'], self.kwargs['kindats'] self.kwargs['date_array'] = [] - with pytest.raises(ValueError) as verr: - general.get_remote_filenames(**self.kwargs) + # Get the expected error message and evaluate it + eval_bad_input(general.get_remote_filenames, ValueError, + "unknown date_array supplied", input_kwargs=self.kwargs) + return - assert str(verr).find("unknown date_array supplied") >= 0 + # TODO(#102) Expand testing of pandas to xarray method below + def test_convert_pandas_to_xarray_bad_data_vars(self): + """Test raises ValueError for unexpected date_array input.""" + self.kwargs = [{('time', ): ['bad_var']}, pds.DataFrame([0]), + pds.DatetimeIndex([dt.datetime(2001, 1, 1)])] + + # Get the expected error message and evaluate it + eval_bad_input(general.convert_pandas_to_xarray, ValueError, + "All data variables", input_args=self.kwargs) return @@ -502,8 +531,17 @@ def teardown_method(self): # Remove the temporary directory and file for tfile in self.temp_files: if os.path.isfile(tfile): - os.remove(tfile) - self.data_path.cleanup() + try: + os.remove(tfile) + except PermissionError: + pass # Windows thinks files are always open + + try: + self.data_path.cleanup() + except Exception: + # TODO(#https://github.com/pysat/pysat/issues/974): Windows fix + # until `ignore_cleanup_errors=True` can be used (3.10 is lowest) + pass del self.data_path, self.temp_files, self.xarray_coords, self.data del self.meta @@ -601,6 +639,9 @@ def test_load_netcdf(self, nfiles): # Evaluate the loaded data self.eval_dataset_meta_output() + # Close for Windows OS + self.data.close() + return def test_load_netcdf_extra_xarray_coord(self): @@ -617,6 +658,9 @@ def test_load_netcdf_extra_xarray_coord(self): # Evaluate the loaded data self.eval_dataset_meta_output() + # Close for Windows OS + self.data.close() + return diff --git a/pysatMadrigal/tests/test_methods_gnss.py b/pysatMadrigal/tests/test_methods_gnss.py index cbe9d728..432d732f 100644 --- a/pysatMadrigal/tests/test_methods_gnss.py +++ b/pysatMadrigal/tests/test_methods_gnss.py @@ -1,12 +1,20 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- """Test methods for `pysatMadrigal.instruments.methods.gnss`.""" +import datetime as dt +import logging import pytest +from pysat.utils.testing import eval_bad_input + from pysatMadrigal.instruments.methods import gnss @@ -31,3 +39,50 @@ def test_ref_output(self, func, comp_str, in_args): self.out = getattr(gnss, func)(*in_args) assert self.out.find(comp_str) >= 0 return + + +class TestGNSSBadLoad(object): + """Test GNSS load warnings and errors.""" + + def setup_method(self): + """Run before every method to create a clean testing setup.""" + self.bad_fnames = ['los_20230101.simple.gz', 'los_20230102.netCDF4'] + return + + def teardown_method(self): + """Run after every method to clean up previous testing.""" + del self.bad_fnames + return + + def test_bad_file_type_warning(self, caplog): + """Test logger warning for unsupported file types loading LoS data.""" + + # Get the output and raise the logging warning + with caplog.at_level(logging.WARN, logger='pysat'): + gnss.load_los(self.bad_fnames, "site", "zzon") + + # Test the logger warning + # TODO(#101) Use pysat eval warnings + assert len(caplog.records) == 2, "unexpected number of warnings" + + for record in caplog.records: + assert record.levelname == "WARNING" + assert record.message.find("unable to load non-HDF5 slant TEC") >= 0 + return + + def test_bad_sel_type(self): + """Test ValueError raised for an unknown LoS down-selection type.""" + + eval_bad_input(gnss.load_los, ValueError, "unsupported selection type", + input_args=[self.bad_fnames, "bad_sel", "bad_val"]) + return + + def test_empty_los_load(self): + """Test the returned dataset is empty for a LoS load.""" + data, meta, lats, lons = gnss.load_los(self.bad_fnames, "time", + dt.datetime(2023, 1, 1)) + + assert len(data.dims.keys()) == 0 + assert len(lats) == 2 + assert len(lons) == 2 + return diff --git a/pysatMadrigal/tests/test_methods_jro.py b/pysatMadrigal/tests/test_methods_jro.py index b3278d2d..273ed68d 100644 --- a/pysatMadrigal/tests/test_methods_jro.py +++ b/pysatMadrigal/tests/test_methods_jro.py @@ -1,7 +1,11 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- """Test methods for `pysatMadrigal.instruments.methods.jro`.""" @@ -42,8 +46,8 @@ class TestJROCalcLoc(object): def setup_method(self): """Run before every method to create a clean testing setup.""" - self.inst = pysat.Instrument('pysat', 'testing_xarray', num_samples=100) - self.stime = pysat.instruments.pysat_testing_xarray._test_dates[''][''] + self.inst = pysat.Instrument('pysat', 'ndtesting', num_samples=100) + self.stime = pysat.instruments.pysat_ndtesting._test_dates[''][''] # Set the hard-coded values self.az = 206.0 @@ -62,7 +66,7 @@ def teardown_method(self): return def transform_testing_to_jro(self, azel_type=''): - """Alter `testing_xarray` to mirror the JRO-ISR data.""" + """Alter `ndtesting` to mirror the JRO-ISR data.""" # Load the data self.inst.load(date=self.stime) diff --git a/pysatMadrigal/tests/test_utils_coords.py b/pysatMadrigal/tests/test_utils_coords.py index 28ec8802..2664d8e0 100644 --- a/pysatMadrigal/tests/test_utils_coords.py +++ b/pysatMadrigal/tests/test_utils_coords.py @@ -1,3 +1,12 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. +# ---------------------------------------------------------------------------- """Tests for the coordinate conversion functions.""" import numpy as np diff --git a/pysatMadrigal/utils/coords.py b/pysatMadrigal/utils/coords.py index e5db684f..d95ba582 100644 --- a/pysatMadrigal/utils/coords.py +++ b/pysatMadrigal/utils/coords.py @@ -1,7 +1,11 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # Full license can be found in License.md # Full author list can be found in .zenodo.json file # DOI:10.5281/zenodo.3824979 +# +# DISTRIBUTION STATEMENT A: Approved for public release. Distribution is +# unlimited. # ---------------------------------------------------------------------------- """Coordinate transformation functions.""" diff --git a/pysatMadrigal/version.txt b/pysatMadrigal/version.txt deleted file mode 100644 index 6e8bf73a..00000000 --- a/pysatMadrigal/version.txt +++ /dev/null @@ -1 +0,0 @@ -0.1.0 diff --git a/requirements.txt b/requirements.txt index 10b79155..063b76f0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,5 +3,5 @@ madrigalWeb>=2.6 numpy packaging pandas -pysat>=3.0.3 +pysat>=3.1.0 xarray diff --git a/setup.cfg b/setup.cfg index 77550243..aa9e557d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,68 +1,9 @@ [metadata] name = pysatMadrigal -version = file: pysatMadrigal/version.txt -url = https://github.com/pysat/pysatMadrigal -author = Angeline G. Burrell, et al. -author_email = pysat.developers@gmail.com -description = 'Madrigal instrument support for the pysat ecosystem' -keywords = - pysat - ionosphere - Madrigal - CEDAR - thermosphere - GPS - GNSS - TEC - Jicamarca - DMSP - ISR - Incoherent scatter radar -classifiers = - Development Status :: 4 - Beta - Topic :: Scientific/Engineering :: Physics - Topic :: Scientific/Engineering :: Atmospheric Science - Intended Audience :: Science/Research - License :: OSI Approved :: BSD License - Natural Language :: English - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Operating System :: MacOS :: MacOS X - Operating System :: POSIX :: Linux -license_file = LICENSE -long_description = file: README.md -long_description_content_type = text/markdown - -[options] -python_requires = >= 3.6 -setup_requires = setuptools >= 38.6; pip >= 10 -include_package_data = True -zip_safe = False -packages = find: -install_requires = h5py - madrigalWeb - numpy - packaging - pandas - pysat - xarray - -[coverage:report] -omit = */instruments/templates/* +version = 0.2.0 [flake8] max-line-length = 80 ignore = W503 - -[tool:pytest] -markers = - all_inst: tests all instruments - download: tests for downloadable instruments - no_download: tests for instruments without download support - load_options: tests for instruments with additional options - first: first tests to run - second: second tests to run + D200 + D202 diff --git a/setup.py b/setup.py deleted file mode 100644 index 2e9b3e74..00000000 --- a/setup.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# Copyright (C) 2020, Authors -# Full license can be found in License.md and AUTHORS.md -# ----------------------------------------------------------------------------- -"""Package setup.""" - -from setuptools import setup - - -# Run setup. Setuptools will look for parameters in [metadata] section of -# setup.cfg -setup() diff --git a/test_requirements.txt b/test_requirements.txt index fc59dc85..3bfdc26d 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,4 +1,4 @@ -coveralls<3.3 +coveralls flake8 flake8-docstrings hacking>=1.0 @@ -7,4 +7,4 @@ numpydoc pytest-cov pytest-ordering sphinx -sphinx_rtd_theme +sphinx_rtd_theme>=1.2.2,<2.0.0