diff --git a/.github/workflows/stats.yml b/.github/workflows/stats.yml index 4bd12a454..6cde36c2f 100644 --- a/.github/workflows/stats.yml +++ b/.github/workflows/stats.yml @@ -17,9 +17,9 @@ jobs: name: Summary of instrument libraries steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} diff --git a/CHANGELOG.md b/CHANGELOG.md index 2352a5ca6..30d0d79d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,80 @@ Change Log All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/). +[3.1.0] - 2023-06-02 +-------------------- +* New Features + * Added a utility to ensure two xarray Datasets can be concatonated, and + incorporated this utility into the Instrument file loading + * Added unit tests for different file cadences in the Instrument class + * Added `to_inst` method to the Constellation class + * Added `export_pysat_info` kwarg to `to_netcdf` routines to select whether + pysat instrument info is written to files + * Added Constellation class examples to the docs tutorial + * Added links to the project standards repository to the docs + * Improved formatting of custom kwargs when running `print` on an instrument + * Added a core utility to update fill values consistently in the data and + metadata + * Adapted `check_and_make_path` to treat an empty path as the current dir + * Added `meta_kwargs` attribute and kwarg to Instrument, allowing full custom + specification of the Meta class on instantiation + * Expanded MetaLabels type defaults for 'max_val', 'min_val', and 'fill_val' + to include more common data types + * Added `data_types` input to Meta and certain MetaLabels methods, allowing + the default values to be set to the specified data type when multiple types + are allowed, ensure these are updated when adding new data to an Instrument + * Added `_update_label_types` to MetaLabels, expanding the Python float/int + types to include all numpy float/int types + * Added `strict_dim_check` for loading xarray Datasets through netCDF + * Added `combine_by_coords` kwarg to `io.load_netcdf` for use on multi-file + xarray Datasets +* Deprecations + * Deprecated the Instrument kwarg `labels` in favor of `meta_kwargs` and + replaced the `meta_labels` attribute with the `meta_kwargs` attribute + * Deprecated the `labels` keyword arg in favor of `meta_kwargs` in the + netCDF I/O functions and Instrument sub-module + * Deprecated the `malformed_index` kwarg in the test instruments. This is + replaced by `non_monotonic_index` and `non_unique_index` +* Bug Fix + * Allow `pysat.instruments.methods.general.list_files` to handle file + cadences other than daily or monthly + * Allow equality assessments if optional kwargs are used in Instrument + * Fixed an issue with setting single variables in xarray coords (#988) + * Fixed `pysat.Instrument.bounds` to handle all input types for `step` + and `width` regardless of `start` and `stop` time. Also fixed + seasonal bounds specified using file names. + * Fixed `pysat.utils.io.apply_table_translation_to_file` check for duplicates + in the meta translation table + * Fixed an issue when passing dates through load_remote_files (#1022) + * Fixed a bug where data may not have any times, but still not be empty + * Fixed a bug where metadata with values of None are assigned as useful + attributes when attaching metadata to xarray objects + * Fixed a bug where a multi_file_day non-monotonic xarray index failed to + merge datasets (#1005) + * Fixed a bug in testing for setting multiple optional load kwargs (#1097) + * Fixed a bug when setting xarray data as a tuple + * Fixed a bug when loading constellations for partially empty instrument lists + * Fixed a bug when cleaning up temporary directories on windows during testing + * Fixed a bug in Instrument loading with a pad, where RangeIndex slicing no + longer works on an empty series +* Maintenance + * Added roadmap to readthedocs + * Improved the documentation in `pysat.utils.files` + * Clarified documentation and tests for name slicing support in pandas + * Clarified documentation for adding new instruments + * Fixed broken links in docs + * Updated docstring header underline lengths and addressed documentation + build errors and warnings + * Additional unit tests for data padding when a data index is non-monotonic. + * Deprecated the `malformed_index` kwarg in the test instruments. This is + replaced by `non_monotonic_index` and `non_unique_index` + * Set the `instruments.pysat_testing` tag='no_download' to return an empty + pandas DataFrame for `load` + * Added `constellations.testing_partial`, which loads a partially empty + constellation dataset + * Reduced default num_samples for constellation test objects + * Improved consistency in metadata for test instruments + [3.0.6] - 2022-12-21 -------------------- * Bug Fix @@ -226,6 +300,7 @@ This project adheres to [Semantic Versioning](https://semver.org/). * Cleaned up excess variables upon import * Removed `data_path` check from `pysat.instruments.methods.general.list_files` * Compatible with netCDF v1.6.0 + * Updated default labels in load_netcdf routines to match core pysat metadata [3.0.1] - 2021-07-28 -------------------- diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index aad5a4bef..4c1a95415 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -13,7 +13,8 @@ are generally held fortnightly. Short version ------------- -* Submit bug reports and feature requests at [GitHub](https://github.com/pysat/pysat/issues) +* Submit bug reports and feature requests at + [GitHub](https://github.com/pysat/pysat/issues) * Make pull requests to the ``develop`` branch @@ -160,3 +161,14 @@ These include: final period * When casting is necessary, use `np.int64` and `np.float64` to ensure operating system agnosticism + + +Ecosystem Style Guidelines +-------------------------- + +If you are creating a new project that you wish to incorporate into the pysat +ecosystem: welcome! We have a +[template repository](https://github.com/pysat/pysatEcosystem_Template) that +contains many of the common documents needed for a new project that you can use +to get started. You may find this helpful when getting started, though this +repository is under active development. diff --git a/docs/api.rst b/docs/api.rst index d4c3a4918..b1768d984 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -39,6 +39,7 @@ Meta .. autoclass:: pysat.Meta :members: + :noindex: .. _api-metalabels: @@ -57,6 +58,7 @@ MetaHeader .. autoclass:: pysat.MetaHeader :members: + :noindex: .. _api-orbits: diff --git a/docs/citing.rst b/docs/citing.rst index 0f288f881..0ff303c7f 100644 --- a/docs/citing.rst +++ b/docs/citing.rst @@ -2,10 +2,13 @@ Citations in the pysat ecosystem ================================ When referring to this software package, please cite the original paper by -Stoneback et al [2018] ``_ as well as the -package ``_. Note that this DOI will -always point to the latest version of the code. A list of DOIs for all -versions can be found at the Zenodo page above. +Stoneback et al [2018] ``_ +as well as the package ``_. Note that +this DOI will always point to the latest version of the code. A list of DOIs +for all versions can be found at the Zenodo page above. Depending on +usage, citation of the full ecosystem paper by Stoneback et al [2023] +``_ +may also be appropriate. Example for citation in BibTex for a generalized version: diff --git a/docs/conf.py b/docs/conf.py index ff4ede14b..a64369fc3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -85,7 +85,7 @@ # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. -language = None +language = 'en' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. diff --git a/docs/devel/pysatEcosystem_Template.rst b/docs/devel/pysatEcosystem_Template.rst new file mode 100644 index 000000000..92ddf398d --- /dev/null +++ b/docs/devel/pysatEcosystem_Template.rst @@ -0,0 +1,10 @@ +.. _devel-ecotemp: + +pysatEcosystem_Template +----------------------- + +`pysatEcosystem_Template `_ +is a repository holding common files contained in all pysat ecosystem +repositories. This makes it easier to start a new pysat ecosystem package or +maintain an existing package, as the current project standards will be recorded +here for ease of reference. These standards are currently under development. diff --git a/docs/devel/pysatTutorials.rst b/docs/devel/pysatTutorials.rst new file mode 100644 index 000000000..0cd62d00b --- /dev/null +++ b/docs/devel/pysatTutorials.rst @@ -0,0 +1,8 @@ +.. _devel-tut: + +pysatTutorials +-------------- + +`pysatTutorials `_ acts as a repository +for different Jupyter notebooks with tutorials and examples for scientific +processing and analysis using pysat ecosystem packages. diff --git a/docs/ecosystem.rst b/docs/ecosystem.rst index 60f9679bd..6664e09bc 100644 --- a/docs/ecosystem.rst +++ b/docs/ecosystem.rst @@ -50,3 +50,19 @@ The packages listed below provide useful tools for data analysis. analysis/pysatModels.rst analysis/pysatSeasons.rst analysis/pysatSpaceWeather.rst + + +.. _eco_devel: + +Development Tools +================= + +The packages listed below are repositories for pysat ecosystem tutorials, as +well as development standards and tools. + +.. toctree:: + :maxdepth: 2 + + devel/pysatEcosystem_Template.rst + devel/pysatTutorials.rst + diff --git a/docs/examples.rst b/docs/examples.rst index 13f2a56f8..93f94d9c3 100644 --- a/docs/examples.rst +++ b/docs/examples.rst @@ -6,9 +6,11 @@ Examples The pysat framework reduces the steps needed when performing certain science data investigations. It allows you to focus on developing routines that make your investigation unique, and creating the output data or visualizations you -require. This section provides examples, with the full code for each example -made available in the -`pysatTutorials `_ repository. +require. This section provides examples, with the full code for each example, +as well as additional examples, made available in the +`pysatTutorials `_ repository, as +well as the recent `PyHC Summer School tutorials +`_. .. toctree:: :maxdepth: 2 diff --git a/docs/index.rst b/docs/index.rst index f92dd086c..6e8810cea 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -20,5 +20,6 @@ Welcome to pysat's documentation! dependency.rst api.rst contributing.rst + roadmap.rst faq.rst release_notes.rst diff --git a/docs/new_instrument.rst b/docs/new_instrument.rst index bd8c9e7c1..06f39603a 100644 --- a/docs/new_instrument.rst +++ b/docs/new_instrument.rst @@ -3,12 +3,12 @@ Adding a New Instrument ======================= -pysat works by calling modules written for specific instruments -that load and process the data consistent with the pysat standard. The +:py:mod:`pysat` works by calling modules written for specific instruments that +load and process the data consistent with the :py:mod:`pysat` standard. The name of the module corresponds to the combination 'platform_name' provided -when initializing a pysat instrument object. The module should be placed in -the pysat instruments directory or registered (see below) for automatic -discovery. A compatible module may also be supplied directly using +when initializing a :py:mod:`pysat` instrument object. The module should be +placed in the :py:mod:`pysat` instruments directory or registered (see below) +for automatic discovery. A compatible module may also be supplied directly using .. code:: Python @@ -17,12 +17,13 @@ discovery. A compatible module may also be supplied directly using A general template has also been included to make starting any Instrument module easier at :py:mod:`pysat.instruments.templates.template_instrument`. -Some data repositories have pysat templates prepared to assist in integrating a -new instrument. See the associated pysat* package for that particular data -source, such as pysatNASA for supporting additional NASA instruments. +Some data repositories have :py:mod:`pysat` templates prepared to assist in +integrating a new instrument. See the associated pysat* package for that +particular data source, such as :py:mod:`pysatNASA` for supporting additional +NASA instruments. -External modules may be registered as -part of pysat's user instrument registry using the following syntax: +External modules may be registered as part of the :py:mod:`pysat` user +instrument registry using the following syntax: .. code-block:: python @@ -41,18 +42,18 @@ be instantiated with the instrument's platform and name: .. code-block:: python - inst = Instrument('myplatform', 'myname') + inst = pysat.Instrument('myplatform', 'myname') .. _rst_new_inst-libs: Instrument Libraries -------------------- -pysat instruments can reside in external libraries. The registry methods -described above can be used to provide links to these instrument libraries -for rapid access. For instance, pysat instruments which handle the outputs -of geophysical models (such as the TIE-GCM model) reside in the pysatModels -package. +:py:mod:`pysat` instruments can reside in external libraries. The registry +methods described above can be used to provide links to these instrument +libraries for rapid access. For instance, :py:mod:`pysat` instruments that +handle the outputs of geophysical models (such as the TIE-GCM model) reside in +the :py:mod:`pysatModels` package. .. _rst_new_inst-naming: @@ -60,8 +61,8 @@ package. Naming Conventions ------------------ -pysat uses a hierarchy of named variables to define each specific data product. -In order this is: +:py:mod:`pysat` uses a hierarchy of named variables to define each specific +data product. In order this is: * platform * name @@ -83,6 +84,14 @@ ICON, JRO, COSMIC, and SuperDARN. Note that this may be a single satellite, a constellation of satellites, a ground-based observatory, or a collaboration of ground-based observatories. +Sometimes it is not practical to set a unique platform name for a data set. An +example of this are many of the space weather indices managed by +:py:mod:`pysatSpaceWeather`. In this case, the solar and geomagnetic indices are +included in a common 'Space Weather' platform (sw), regardless of their origin. +This allows users to access a given index using different :py:attr:`inst_id` +and :py:attr:`tag` values, even if the mission or observatory that produced the +indices differ. + name ^^^^ @@ -107,24 +116,33 @@ identical or similar satellites, or multiple instruments on the same satellite with different look directions. For example, the DMSP satellites carry similar instrument suites across multiple spacecraft. These are labeled as f11-f18. +:py:attr:`inst_id` is also commonly used to distinguish between the same data +product at different sample rates. An example of this may be seen in the +:py:mod:`pysatNASA.instruments.timed_guvi` data for the 'sdr-imaging' and +'sdr-spectrograph' :py:attr:`tag` values. As a rule, when trying to decide if +a characteristic should be assigned as a :py:attr:`tag` or :py:attr:`inst_id` +attribute, the :py:attr:`inst_id` value should subdivide the :py:attr:`tag` +data set in a clear way that does not require a long description. + Naming Requirements in Instrument Module ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Each instrument file must include the platform and name as variables at the top-code-level of the file. Additionally, the tags and inst_ids supported by -the module must be stored as dictionaries. +the module must be stored as dictionaries. Note that all required names should +be lowercase when defined in the instrument module. .. code:: python platform = 'your_platform_name' name = 'name_of_instrument' - # Dictionary keyed by tag with a string description of that dataset + # Dictionary keyed by tag with a string description of that data set tags = {'': 'The standard processing for the data. Loaded by default', 'fancy': 'A higher-level processing of the data.'} # Dictionary keyed by inst_id with a list of supported tags for each key - inst_ids = {'A': ['', 'fancy'], 'B': ['', 'fancy'], 'C': ['']} + inst_ids = {'sat-a': ['', 'fancy'], 'sat-b': ['', 'fancy'], 'sat-c': ['']} Note that the possible tags that can be invoked are '' and 'fancy'. The tags dictionary includes a short description for each of these tags. A blank tag @@ -134,10 +152,15 @@ The supported inst_ids should also be stored in a dictionary. Each key name here points to a list of the possible tags that can be associated with that particular :py:attr:`inst_id`. Note that not all satellites in the example support every level of processing. In this case the 'fancy' processing is -available for satellites A and B, but not C. +available for satellites 'sat-a' and 'sat-b', but not 'sat-c'. -For a dataset that does not need multiple levels of tags and inst_ids, an empty -string can be used. The code below only supports loading a single data set. +For a data set that does not need multiple levels of :py:attr:`tag` and +:py:attr:`inst_id` attributes, an empty string can be used. The code below only +supports loading a single data set. However, using an empty string for the +:py:attr:`tag` is discouraged if it is possible for the same platform to have +another distinct version of this data set in the future. This is unlikely to +be an issue for satellite data sets, but should be taken into account for +ground-based platforms. .. code:: python @@ -146,13 +169,14 @@ string can be used. The code below only supports loading a single data set. tags = {'': ''} inst_ids = {'': ['']} -The DMSP IVM (dmsp_ivm) instrument module in pysatMadrigal is a practical -example of a pysat instrument that uses all levels of variable names. An -:ref:`api-instrument-template` is also provided within pysat. +The DMSP IVM (dmsp_ivm) instrument module in :py:mod:`pysatMadrigal` is a +practical example of a :py:mod:`pysat` instrument that uses all levels of +variable names. An :ref:`api-instrument-template` is also provided within +:py:mod:`pysat`. -Note that during instantiation of a :py:class:`pysat.Instrument`, pysat uses -the :py:attr:`tags` and :py:attr:`inst_ids` above to determine if the values -provided by a user are supported by the code. +Note that during instantiation of a :py:class:`pysat.Instrument`, +:py:mod:`pysat` uses the :py:attr:`tags` and :py:attr:`inst_ids` above to +determine if the values provided by a user are supported by the code. .. _rst_new_inst-reqattrs: @@ -170,29 +194,30 @@ they must be defined for every instrument. tags = {'': ''} inst_ids = {'': ['']} -pysat also requires that instruments include information pertaining to +:py:mod:`pysat` also requires that instruments include information pertaining to acknowledgements and references for an instrument. These are simply defined as strings at the instrument level. In the most basic case, these can be defined with the data information at the top. -pysat also requires that a logger handle be defined and instrumentment +:py:mod:`pysat` also requires that a logger handle be defined and instrument information pertaining to acknowledgements and references be included. These ensure that people using the data know who to contact with questions and what they should reference when publishing their results. The logging handle should -be assigned to the pysat logger handle, while the references and acknowedgements -are defined as instrument attributes within the initalization method. +be assigned to the :py:mod:`pysat` logger handle, while the references and +acknowledgments are defined as instrument attributes within the initialization +method. .. code:: python platform = 'your_platform_name' name = 'name_of_instrument' - tags = {'tag1': 'tag1 Descripton', + tags = {'tag1': 'tag1 Description', 'tag2': 'tag2 Description'} inst_ids = {'': [tag for tag in tags.keys()]} def init(self): - """Initializes the Instrument object with instrument specific values. - """ + """Initializes the Instrument object with instrument specific values.""" + self.acknowledgements = ''.join(['Ancillary data provided under ', 'Radchaai grant PS31612.E3353A83']) if self.tag == 'tag1': @@ -209,21 +234,46 @@ are defined as instrument attributes within the initalization method. Required Routines ----------------- -Three methods are required within a new instrument module to support pysat -operations, with functionality to cover finding files, loading data from -specified files, and downloading new files. While the methods below are -sufficient to engage with pysat, additional optional methods are needed for -full pysat support. +Three methods are required within a new instrument module to support +:py:mod:`pysat` operations, with functionality to cover finding files, loading +data from specified files, and downloading new files. While the methods below +are sufficient to engage with :py:mod:`pysat`, additional optional methods are +needed for full :py:mod:`pysat` support. + +Note that these methods are not directly invoked by the user, but by +:py:mod:`pysat` as needed in response to user inputs. + + +init +^^^^ + +The instrument :py:meth:`init` method runs once at instrument instantiation, +and handles the acknowledgement of the source of data. Because this is key for +scientific collaboration, acknowledgements and references are required for all +:py:mod:`pysat` instruments. + +.. code:: Python + + def init(self): + """Initializes the Instrument object with instrument specific values.""" + + self.acknowledgements = 'Follow the rules of the road by contacting PI' + self.references = '2001: A Space Oddessy (1968)' + pysat.logger.info(self.acknowledgements) -Note that these methods are not directly invoked by the user, but by pysat -as needed in response to user inputs. + return + +``self`` is a :py:class:`pysat.Instrument` object. :py:func:`init` should +modify ``self`` in-place as needed; equivalent to a custom routine. It is +expected to attach the :py:attr:`acknowledgements` and :py:attr:`references` +attributes to ``self``. list_files ^^^^^^^^^^ -pysat maintains a list of files to enable data management functionality. To get -this information pysat expects a module function +:py:mod:`pysat` maintains a list of files to enable data management +functionality. To get this information :py:mod:`pysat` expects a module function :py:func:`platform_name.list_files` to return a :py:class:`pandas.Series` of filenames indexed by time with a method signature of: @@ -232,10 +282,11 @@ filenames indexed by time with a method signature of: def list_files(tag='', inst_id='', data_path='', format_str=None): return pandas.Series(files, index=datetime_index) -:py:attr:`inst_id` and :py:attr:`tag` are passed in by pysat to select a -specific subset of the available data. The location on the local filesystem to -search for the files is passed in data_path. The list_files method must return -a :py:class:`pandas.Series` of filenames indexed by datetime objects. +:py:attr:`inst_id` and :py:attr:`tag` are passed in by :py:mod:`pysat` to +select a specific subset of the available data. The location on the local +filesystem to search for the files is passed in ``data_path``. The +:py:meth:`list_files` method must return a :py:class:`pandas.Series` of +filenames indexed by datetime objects. A user must also supply a file template string suitable for locating files on their system at pysat.Instrument instantiation, passed via ``format_str``, @@ -243,8 +294,9 @@ that must be supported. Sometimes users obtain files from non-traditional sources and ``format_str`` makes it easier for those users to use an existing instrument module to work with those files. -pysat will by default store data in pysat_data_dir/platform/name/tag/inst_id, -helpfully provided in data_path, where pysat_data_dir is specified by using +:py:mod:`pysat` will by default store data in +``pysat_data_dir/platform/name/tag/inst_id``, helpfully provided in +``data_path``, where pysat_data_dir is specified by using ``pysat.params['data_dirs'] = pysat_data_dir``. Note that an alternative directory structure may be specified using the :py:class:`pysat.Instrument` keyword ``directory_format`` at instantiation. The default is recreated using @@ -254,16 +306,17 @@ keyword ``directory_format`` at instantiation. The default is recreated using dformat = '{platform}/{name}/{tag}/{inst_id}' inst=pysat.Instrument(platform, name, directory_format=dformat) -Note that pysat handles the path information thus instrument module developers -do not need to do anything to support the ``directory_format`` keyword. +Note that :py:mod:`pysat` handles the path information thus instrument module +developers do not need to do anything to support the ``directory_format`` +keyword. Pre-Built list_files Methods and Support ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Finding local files is generally similar across data sets thus pysat +Finding local files is generally similar across data sets thus :py:mod:`pysat` includes a variety of methods to make supporting this functionality easier. The simplest way to construct a valid list_files method is to use one of these -included pysat methods. +included :py:mod:`pysat` methods. A complete method is available in :py:func:`pysat.instruments.methods.general.list_files` that may find broad use. @@ -307,27 +360,27 @@ Given the range of compliance of filenames to a strict standard across the decades of space science parsing filenames with and without a ``delimiter`` can typically generate the same results, even for filenames without a consistently applied delimiter. As such either parser will function for most -situations however both remain within pysat to support currently unknown edge -cases that users may encounter. More practically, parsing with a delimiter -offers more support for the ``*`` wildcard than the fixed width parser. -It is generally advised to limit use of the ``*`` wildcard to prevent +situations however both remain within :py:mod:`pysat` to support currently +unknown edge cases that users may encounter. More practically, parsing with a +delimiter offers more support for the ``*`` wildcard than the fixed width +parser. It is generally advised to limit use of the ``*`` wildcard to prevent potential false positives if a directory has more than one instrument within. If the constructor is not appropriate, then lower level methods within :py:class:`pysat.Files` may also be used to reduce the workload in adding a new instrument. Access to the values of user provided template variables is not available via :py:meth:`pysat.Files.from_os` and thus requires use of the -same lower level methods in :py:method:`pysat.utils.files`. +same lower level methods in :py:mod:`pysat.utils.files`. See :py:func:`pysat.utils.time.create_datetime_index` for creating a datetime index for an array of irregularly sampled times. -pysat will invoke the list_files method the first time a particular instrument -is instantiated. After the first instantiation, by default :ref:`tutorial-params`, -pysat will not -search for instrument files as some missions can produce a large number of -files, which may take time to identify. The list of files associated -with an Instrument may be updated by adding ``update_files=True`` to the kwargs. +:py:mod:`pysat` will invoke the list_files method the first time a particular +instrument is instantiated. After the first instantiation, by default +:ref:`tutorial-params`, :py:mod:`pysat` will not search for instrument files as +some missions can produce a large number of files, which may take time to +identify. The list of files associated with an Instrument may be updated by +adding ``update_files=True`` to the kwargs. .. code:: python @@ -339,9 +392,9 @@ by calling :py:attr:`inst.files.files`. load ^^^^ -Loading data is a fundamental activity for data science and is -required for all pysat instruments. The work invested by the instrument -module author makes it possible for users to work with the data easily. +Loading data is a fundamental activity for data science and is required for all +:py:mod:`pysat` instruments. The work invested by the instrument module author +makes it possible for users to work with the data easily. The load module method signature should appear as: @@ -351,9 +404,9 @@ The load module method signature should appear as: return data, meta - :py:data:`fnames` contains a list of filenames with the complete data path - that pysat expects the routine to load data for. With most data sets + that :py:mod:`pysat` expects the routine to load data for. With most data sets the method should return the exact data that is within the file. - However, pysat is also currently optimized for working with + However, :py:mod:`pysat` is also currently optimized for working with data by day. This can present some issues for data sets that are stored by month or by year. See :ref:`instruments-sw` for examples of data sets stored by month(s). @@ -394,20 +447,23 @@ The load module method signature should appear as: .. code:: python - # Update units to meters, 'm' for variable - inst.meta[variable, inst.meta.labels.units] = 'm' + # Update units to meters, 'm' for variable `var`, other metadata are set to + # the defaults for this data type and label type + inst.meta[var] = {inst.meta.labels.units: 'm'} - If metadata is already stored with the file, creating the :py:class:`Meta` object is generally trivial. If this isn't the case, it can be tedious to fill out all information if there are many data parameters. In this case it - may be easier to fill out a text file. A basic convenience function is - provided for this situation. See :py:meth:`pysat.Meta.from_csv` for more - information. + may be easier to create a text file, though in many cases a separate function + is defined to provide metadata for specific data types (see + :py:func:`pysatSpaceWeather.instruments.methods.kp_ap.initialize_kp_metadata`). + A basic convenience function is provided if you decide to use a text file. + See :py:meth:`pysat.Meta.from_csv` for more information. download ^^^^^^^^ -Download support significantly lowers the hassle in dealing with any dataset. +Download support significantly lowers the hassle in dealing with any data set. To fetch data from the internet the download method should have the signature .. code:: python @@ -417,10 +473,11 @@ To fetch data from the internet the download method should have the signature * :py:data:`date_array`, a list of dates for which data will be downloaded * :py:data:`data_path`, the full path to the directory to store data -* :py:data:`user`, a string for the remote database username -* :py:data:`password`, a string for the remote database password +* :py:data:`user`, an optional string for the remote database username +* :py:data:`password`, an optional string for the remote database password -The routine should download the data and write it to the disk at the data_path. +The routine should download the data and write it to the disk at the location +provided by 'data_path', which will be supplied by :py:mod:`pysat`. .. _rst_new_inst-optattr: @@ -435,9 +492,10 @@ directory_format ^^^^^^^^^^^^^^^^ Allows the specification of a custom directory naming structure, where the files -for this Instrument will be stored within the pysat data directory. If not set -or if set to ``None``, it defaults to ``os.path.join('{platform}', '{name}', '{tag}', '{inst_id}')``. -The string format understands the keys :py:data:`platform`, :py:data:`name`, +for this Instrument will be stored within the :py:mod:`pysat` data directory. +If not set or if set to ``None``, it defaults to +``os.path.join('{platform}', '{name}', '{tag}', '{inst_id}')``. The string +format understands the keys :py:data:`platform`, :py:data:`name`, :py:data:`tag`, and :py:data:`inst_id`. This may also be a function that takes :py:data:`tag` and :py:data:`inst_id` as input parameters and returns an appropriate string. @@ -472,7 +530,7 @@ pandas_format This defaults to ``True`` and assumes the data are organized as a time series, allowing them to be stored as a :py:class:`pandas.DataFrame`. Setting this -attribute to ``False`` tells pysat that the data will be stored in an +attribute to ``False`` tells :py:mod:`pysat` that the data will be stored in an :py:class:`xarray.Dataset`. @@ -484,7 +542,7 @@ Optional Routines and Support Custom Keywords in Support Methods ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -If provided, pysat supports the definition and use of keywords for an +If provided, :py:mod:`pysat` supports the definition and use of keywords for an instrument module so that users may define their preferred default values. A custom keyword for an instrument module must be defined in each function that will receive that keyword argument if provided by the user. All instrument @@ -502,13 +560,14 @@ arguments. return data, meta If a user provides :py:data:`custom1` or :py:data:`custom2` at instantiation, -then pysat will pass those custom keyword arguments to :py:func:`load` with -every call. All user provided custom keywords are copied into the -Instrument object itself under :py:attr:`inst.kwargs` for use in other areas. -All available keywords, including default values, are also grouped by relevant -function in a dictionary, :py:attr:`inst.kwargs_supported`, attached to the -:py:class:`Instrument` object. Updates to values in :py:attr:`inst.kwargs` will -be propagated to the relevant function the next time that function is invoked. +then :py:mod:`pysat` will pass those custom keyword arguments to :py:func:`load` +with every call. All user provided custom keywords are copied into the +:py:class:`~pysat.Instrument` object itself under :py:attr:`inst.kwargs` for +use in other areas. All available keywords, including default values, are also +grouped by relevant function in a dictionary, :py:attr:`inst.kwargs_supported`, +attached to the :py:class:`Instrument` object. Updates to values in +:py:attr:`inst.kwargs` will be propagated to the relevant function the next +time that function is invoked. .. code:: python @@ -523,31 +582,12 @@ be propagated to the relevant function the next time that function is invoked. # Show keywords reserved for use by pysat print(inst.kwargs_reserved) -If a user supplies a keyword that is reserved or not supported by pysat, or by -any specific instrument module function, then an error is raised. Reserved -keywords are :py:data:`fnames`, :py:data:`inst_id`, :py:data:`tag`, -:py:data:`date_array`, :py:data:`data_path`, :py:data:`format_str`, -:py:data:`supported_tags`, :py:data:`start`, :py:data:`stop`, and -:py:data:`freq`. - - -init -^^^^ - -If present, the instrument init method runs once at instrument instantiation. - -.. code:: python - - def init(self): - self.acknowledgements = 'Thanks to Hal, for keeping me alive' - self.references = '2001: A Space Oddessy (1968)' - return - -``self`` is a :py:class:`pysat.Instrument` object. :py:func:`init` should modify -``self`` in-place as needed; equivalent to a custom routine. It is expected to -attach the :py:attr:`acknowledgements` and :py:attr:`references` attributes -to ``self``. - +If a user supplies a keyword that is reserved or not supported by +:py:mod:`pysat`, or by any specific instrument module function, then an error is +raised. Reserved keywords are :py:data:`fnames`, :py:data:`inst_id`, +:py:data:`tag`, :py:data:`date_array`, :py:data:`data_path`, +:py:data:`format_str`, :py:data:`supported_tags`, :py:data:`start`, +:py:data:`stop`, and :py:data:`freq`. preprocess ^^^^^^^^^^ @@ -577,8 +617,8 @@ Cleans instrument for levels supplied in inst.clean_level. def clean(self): return -``self`` is a :py:class:`pysat.Instrument` object. :py:func:`clean` should modify -``self`` in-place as needed; equivalent to a custom routine. +``self`` is a :py:class:`pysat.Instrument` object. :py:func:`clean` should +modify ``self`` in-place as needed; equivalent to a custom routine. list_remote_files ^^^^^^^^^^^^^^^^^ @@ -586,17 +626,17 @@ list_remote_files Returns a list of available files on the remote server. This method is required for the Instrument module to support the :py:meth:`download_updated_files` method, which makes it trivial for users to ensure they always have the most up -to date data. pysat developers highly encourage the development of this method, -when possible. +to date data. :py:mod:`pysat` developers highly encourage the development of +this method, when possible. .. code:: python def list_remote_files(tag='', inst_id='', start=None, stop=None, ...): return list_like -This method is called by several internal pysat functions, and can be directly -called by the user through the :py:meth:`inst.remote_file_list` method. The -user can search for subsets of files through optional keywords, such as: +This method is called by several internal :py:mod:`pysat` functions, and can be +directly called by the user through the :py:meth:`inst.remote_file_list` method. +The user can search for subsets of files through optional keywords, such as: .. code:: python @@ -607,8 +647,8 @@ user can search for subsets of files through optional keywords, such as: Logging ------- -pysat is connected to the Python logging module. This allows users to set -the desired level of direct feedback, as well as where feedback statements +:py:mod:`pysat` is connected to the Python logging module. This allows users to +set the desired level of direct feedback, as well as where feedback statements are delivered. The following line in each module is encouraged at the top-level so that the instrument module can provide feedback using the same mechanism @@ -633,14 +673,14 @@ will direct information, warnings, and debug statements appropriately. Testing Support =============== All modules defined in the ``__init__.py`` for pysat/instruments are -automatically tested when pysat code is tested. To support testing all of the -required routines, additional information is required by pysat. +automatically tested when :py:mod:`pysat` code is tested. To support testing all +of the required routines, additional information is required by :py:mod:`pysat`. -Below is example code from the pysatMadrigal Instrument module, dmsp_ivm.py. The -attributes are set at the top level simply by defining variable names with the -proper info. The various satellites within DMSP, F11, F12, F13 are separated -out using the inst_id parameter. 'utd' is used as a tag to delineate that the -data contains the UTD developed quality flags. +Below is example code from the :py:mod:`pysatMadrigal` Instrument module, +dmsp_ivm.py. The attributes are set at the top level simply by defining +variable names with the proper info. The various satellites within DMSP, F11, +F12, F13 are separated out using the inst_id parameter. 'utd' is used as a tag +to delineate that the data contains the UTD developed quality flags. .. code:: python @@ -682,12 +722,14 @@ will not be tested. The leading underscore in :py:attr:`_test_dates` ensures that this information is not added to the instrument's meta attributes, so it will not be present in Input/Output operations. -The standardized pysat tests are available in pysat.tests.instrument_test_class. -The test collection test_instruments.py imports this class, collects a list of -all available instruments (including potential :py:data:`tag`/:py:data:`inst_id` -combinations), and runs the tests using pytestmark. By default, pysat assumes -that your instrument has a fully functional download routine, and will run an -end-to-end test. If this is not the case, see the next section. +The standardized :py:mod:`pysat` tests are available in +:py:mod:`pysat.tests.instrument_test_class`. The test collection in +test_instruments.py imports this class, collects a list of all available +instruments (including potential :py:data:`tag`/:py:data:`inst_id` +combinations), and runs the tests using pytestmark. By default, +:py:mod:`pysat` assumes that your instrument has a fully functional download +routine, and will run an end-to-end test. If this is not the case, see the next +section. .. _rst_test-special: @@ -704,8 +746,8 @@ locally generated. To let the test routines know this is the case, the :py:attr:`_test_download` flag is used. This flag uses the same dictionary structure as :py:attr:`_test_dates`. -For instance, say we have an instrument team that wants to use pysat to -manage their data products. Level 1 data is locally generated by the team, +For instance, say we have an instrument team that wants to use :py:mod:`pysat` +to manage their data products. Level 1 data is locally generated by the team, and Level 2 data is provided to a public repository. The instrument should be set up as follows: @@ -735,21 +777,42 @@ and supporting methods may be found at :py:mod:`pysat.instruments.methods`. General ^^^^^^^ -A general instrument template is included with pysat, +A general instrument template is included with :py:mod:`pysat`, :py:mod:`pysat.instruments.templates.template_instrument`, that has the full set of required and optional methods, and docstrings, that may be used as a starting -point for adding a new instrument to pysat. +point for adding a new instrument to :py:mod:`pysat`. Note that there are general supporting methods for adding an Instrument. See :ref:`api-methods-general` for more. -This tells the test routines to skip the download / load tests for Level 1 data. -Instead, the download function for this flag will be tested to see if it has an -appropriate user warning that downloads are not available. +This tells the test routines to skip the download and load tests for Level 1 +data. Instead, the download function for this flag will be tested to see if it +has an appropriate user warning that downloads are not available. + +Note that :py:mod:`pysat` assumes that this flag is True if no variable is +present. Thus, specifying only ``_test_download = {'': {'Level_1': False}}`` +has the same effect, and Level 2 tests will still be run. + +Load Options +^^^^^^^^^^^^ + +As there may be different ways to load data using custom keyword arguments, the +:py:attr:`_test_load_opt` attribute can be used to support testing of each +custom keyword argument option. These should be included as a list that is +accessed through a dictionary with :py:attr:`inst_id` and :py:attr:`tag` keys. + +.. code:: python + + platform = 'observatory' + name = 'data' + tags = {'historic': 'Historic data', + 'newfangled': 'Newfangled data, has different formatting options'} + inst_ids = {'': ['historic', 'newfangled']} + _test_dates = {'': {'historic': dt.datetime(1900, 1, 1), + 'newfangled': dt.datetime(2000, 1, 1)}} + _test_load_opt = {'': {'newfangled': [{'historic_format': True}, + {'historic_format': False}]}} -Note that pysat assumes that this flag is True if no variable is present. -Thus specifying only ``_test_download = {'': {'Level_1': False}}`` has the -same effect, and Level 2 tests will still be run. FTP Access ^^^^^^^^^^ @@ -795,24 +858,3 @@ present. _test_dates = {'': {'Level_1': dt.datetime(2020, 1, 1), 'Level_2': dt.datetime(2020, 1, 1)}} _password_req = {'': {'Level_1': False}} - - -.. _rst_test-ackn: - -Data Acknowledgements ---------------------- - -Acknowledging the source of data is key for scientific collaboration. This can -generally be put in the :py:func:`init` function of each instrument. - -.. code:: Python - - def init(self): - """Initializes the Instrument object with instrument specific values. - """ - - self.acknowledgements = acknowledgements_string - self.references = references_string - pysat.logger.info(self.acknowledgements) - - return diff --git a/docs/roadmap.rst b/docs/roadmap.rst new file mode 100644 index 000000000..673aee579 --- /dev/null +++ b/docs/roadmap.rst @@ -0,0 +1,123 @@ +.. _roadmap: + +Roadmap +======= + +The long-term vision for :py:mod:`pysat` is to make the package suitable for +working with any combination of data. As :py:mod:`pysat` is intended to support +the development of highly robust and verifiable scientific analysis and +processing packages, :py:mod:`pysat` must produce each of its features with high +quality unit tests and documentation. + +This document provides a broad and long term vision of :py:mod:`pysat`. Specific +tasks associated with this roadmap may be found within the posted +`Issues `_ and +`Projects `_. + +An item being on the roadmap does not necessarily mean that it will happen. +During the implementation or testing periods we may discover issues that limit +the feature. + +Generality +---------- +Data support with :py:mod:`pysat` is currently focused on space-science data +sets. However, the features within the module also work well on other types of +data. Where appropriate, space-science specific features will be generalized for +a wider audience. + +Data Support +------------ +The :py:class:`Instrument` class currently supports both +:py:class:`pandas.DataFrame` and :py:class:`xarray.Dataset` formats, covering +1D time-series and multi-dimensional data that can be loaded into memory. Even +larger data sets would require that :py:mod:`pysat` integrate a data format such +as Dask. To cover the needs of any potential user, an ideal solution would be +for :py:mod:`pysat` to implement a clear public mechanism for users to add their +own data formats. Commonalities observed after integrating :py:mod:`dask`, +:py:mod:`pandas`, and :py:mod:`xarray` should provide a viable path forward for +this generalization. + +Multiple Data Sources +--------------------- +The Instrument class is designed to work on a single data source at a time. For +multiple data sources :py:mod:`pysat` is developing a Constellation class that +operates on multiple Instrument objects and will include methods designed to +assist in merging multiple data sets together. The Constellation class will +feature compatibility with the simpler Instrument object when possible. However, +given the additional complexity when working with multiple sources this may not +always be possible. Long term, we intend on providing functionality that can +merge a Constellation into a 'live' Instrument object for greatest compatibility. + +Metapackage +----------- +The minimal barriers to entry in open source software allows for a large +variety of packages, each with its own approach to a problem. A disadvantage +of this setup is that many of these packages have been developed without +interoperability in mind, presenting challenges when attempting to combine +these disparate packages towards a common goal. :py:mod:`pysat` provides a +versatility when coupling to data sources, which may be used to connect these +isolated packages together. Once a package is connected to :py:mod:`pysat` +then that functionality becomes available to all packages that incorporate +:py:mod:`pysat` as a source. The value and functionality of this large scale +:py:mod:`pysat` metapackage increases exponentially with every new connection. + +File Support +------------ +:py:mod:`pysat` currently supports tracking both data and metadata, as well as +the ability to create netCDF4 files, and is capable of maintaining compliance +with NASA's +`Space Physics Data Facility `_ +(SPDF) formatting requirements for NASA satellite missions. Support for creating +different types of files, as well as a variety of file standards, needs to be +enhanced to support a broader array of research areas. + +Data Iteration +-------------- +:py:mod:`pysat` currently features orbit iteration, a feature that transparently +provides complete orbits (across day/file breaks) calculated in real time. A +variety of orbit types are supported, each of which maps to a method looking for +a particular signal in the data to trigger upon. However, the current variety of +orbit types is insufficient to address community needs. The underlying class is +capable of iterating over a wider variety of conditions though this type of +functionality is not currently available to users. Improving access to this +area enables generalized real-time data pagination based upon custom user +supplied conditions. Ensuring good performance under a variety of conditions +requires upgrading and generalizing the data cacheing in :py:mod:`pysat` as well +as the orbit iteration interface. + +Performance +----------- +While it is critical for scientific outputs to be correct, results that are +equally correct but calculated quicker make it easier for scientists to fully +explore a data set. A benchmarking solution will be implemented and used to +identify areas with slow performance that could potentially be improved upon. + +Testing +------- +Unit tests confirming :py:mod:`pysat` behaves as expected is fundamental to the +scientific goals of the project. While unit test coverage is high, a general +review of all the unit tests needs to be performed. In particular, unit tests +written early in the project need to be brought up to project standards. The +test suite needs additional organization as many files are too long. Further, +tests need to be expanded to ensure that more combinations of features are +engaged at once to ensure interoperability. + +User Experience +--------------- +Providing a consistent, versatile, and easy to use interface is a core feature +for :py:mod:`pysat`. + +Documentation +------------- +Robust, accurate, consistent, comprehensive, and easy to understand +documentation is essential for any project presented to the community to build +upon. While great strides were made with the release of :py:mod:`pysat` v3.0, +additional review and expansion of examples and discussion would be helpful to +users. + +pysatPenumbra Modules +--------------------- +The development of analysis packages built on :py:mod:`pysat` has historically +revealed areas for improvement. Active engagement with these publicly developed +packages helps ensure that solutions are practical and responsive to community +requirements. diff --git a/docs/tutorial/tutorial_basics.rst b/docs/tutorial/tutorial_basics.rst index 0654bf17a..63fcb1bdb 100644 --- a/docs/tutorial/tutorial_basics.rst +++ b/docs/tutorial/tutorial_basics.rst @@ -324,7 +324,7 @@ the data at the :py:class:`pysat.Instrument` level that behaves the same whether # Convenient data assignment dmsp['ti'] = new_array - # Convenient data broadcasting assignment, sets a single value to all times + # Data broadcasting assignment for new variables, sets a single value to all times dmsp['ti'] = single_value # Assignment through index slicing @@ -333,7 +333,6 @@ the data at the :py:class:`pysat.Instrument` level that behaves the same whether # Assignment through datetime slicing dmsp[start:stop, 'ti'] = sub_array - Note that :py:func:`np.where` may be used to select a subset of data using either the convenient access or standard pandas or xarray selection methods. diff --git a/docs/tutorial/tutorial_constellation.rst b/docs/tutorial/tutorial_constellation.rst index 841426836..c7ce72a89 100644 --- a/docs/tutorial/tutorial_constellation.rst +++ b/docs/tutorial/tutorial_constellation.rst @@ -8,6 +8,8 @@ The :py:class:`pysat.Constellation` class is an alternative to the :py:class:`pysat.Instrument` objects and allows quicker processing and analysis on multiple data sets. +.. _tutorial-const-init: + Initialization -------------- @@ -47,3 +49,194 @@ The :py:attr:`~pysat._instrument.Instrument.name` values are: :py:data:`epam`, :py:data:`mag`, :py:data:`sis`, and :py:data:`swepam`. The only :py:class:`~pysat._instrument.Instrument` defining attributes that are not unique are the :py:attr:`~pysat._instrument.Instrument.name` values. + +Use a Constellation sub-module +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Some pysatEcosystem packages, such as :py:mod:`pysatNASA`, contain sub-modules +that specify commonly-used Constellations. This example uses the +`DE2 Instruments `_ +to create a Constellation of Dynamics Explorer 2 LANG, NACS, RPA, and WATS +instruments. + +.. code:: python + + import pysat + import pysatNASA + + # Initalize the DE2 Constellation using the DE2 constellation module + de2 = pysat.Constellation(const_module=pysatNASA.constellations.de2) + + # Display the results + print(de2) + +The last command will show that nine :py:class:`~pysat._instrument.Instrument` +objects were loaded by the module. + + +.. _tutorial-const-shared: + +Properties shared with Instruments +---------------------------------- + +Just as with a :py:class:`~pysat._instrument.Instrument` object, a +:py:class:`~pysat._constellation.Constellation` object will download and load +data using the :py:meth:`~pysat._constellation.Constellation.download` and +:py:meth:`~pysat._constellation.Constellation.load` methods. + +.. code:: python + + # Download today's data and load it into the Constellation + ace_rt.download() + ace_rt.load(date=ace_rt.today()) + +This will download data for all :py:class:`~pysat._constellation.Constellation` +:py:class:`~pysat._instrument.Instrument` objects into their appropriate +directories and then load the data for each +:py:class:`~pysat._instrument.Instrument`. + +Other :py:class:`~pysat._instrument.Instrument` properties and methods, such as +the loaded date, list of variables, custom function methods, and bounds behave +the same for the :py:class:`~pysat._constellation.Constellation` object. + + +.. _tutorial-const-unique: + +Properties differing from Instruments +------------------------------------- +:py:class:`~pysat._constellation.Constellation` also contains attributes that +are unique to this object or differ slightly from their +:py:class:`~pysat._instrument.Instrument` counterparts due to differing needs. + +Time index +^^^^^^^^^^ +For example, there is an :py:attr:`~pysat._constellation.Constellation.index` +attribute, but as this must represent times for all the desired +:py:class:`~pysat._instrument.Instrument` objects, this may not exactly match +the individual :py:attr:`~pysat._instrument.Instrument.index` objects. There +are two additional attributes that inform how this time index is constructed: +:py:attr:`~pysat._constellation.Constellation.common_index` and +:py:attr:`~pysat._constellation.Constellation.index_res`. If +:py:attr:`~pysat._constellation.Constellation.common_index` is ``True``, +only times present in all :py:class:`~pysat._instrument.Instrument` objects +are included in :py:attr:`~pysat._constellation.Constellation.index`. If +:py:attr:`~pysat._constellation.Constellation.common_index` is ``False``, +the maximum time range is used instead. +:py:attr:`~pysat._constellation.Constellation.index_res` provides the +:py:attr:`~pysat._constellation.Constellation.index` resolution, if it is not +``None``. If it is ``None``, then an appropriate resolution is established from +the individual :py:attr:`~pysat._instrument.Instrument.index` objects. It is +standard to have :py:attr:`~pysat._constellation.Constellation.common_index` be +``True`` and :py:attr:`~pysat._constellation.Constellation.index_res` set to +``None``. + +Empty flags +^^^^^^^^^^^ +A :py:class:`~pysat._constellation.Constellation` has more states of having +data loaded than merely ``True`` or ``False``; it is possible for only some of +the desired :py:class:`~pysat._instrument.Instrument` objects to have data. +To address this issue, there are two +:py:class:`~pysat._constellation.Constellation` attributes that address the +presence of loaded data: :py:attr:`~pysat._constellation.Constellation.empty` +and :py:attr:`~pysat._constellation.Constellation.empty_partial`. If ``True``, +:py:attr:`~pysat._constellation.Constellation.empty` indicates that no data +is loaded. If :py:attr:`~pysat._constellation.Constellation.empty_partial` +is ``True`` and :py:attr:`~pysat._constellation.Constellation.empty` is +``False``, some data is loaded. If both +:py:attr:`~pysat._constellation.Constellation.empty_partial` and +:py:attr:`~pysat._constellation.Constellation.empty` are ``False``, then all +:py:class:`~pysat._instrument.Instrument` objects have data. + +Instrument access +^^^^^^^^^^^^^^^^^ +You can access all the standard :py:class:`~pysat._instrument.Instrument` +attributes through the +:py:attr:`~pysat._constellation.Constellation.instruments` attribute. + +.. code:: python + + # Cycle through each ACE Real Time Instrument and print the most recent + # filename + for i, inst in enumerate(ace_rt.instruments): + print(ace_rt.names[i], inst.files.files[-1]) + +This should yield a list of ACE :py:attr:`~pysat._instrument.Instrument.name` +attributes and their files with the current or tomorrow's date. List attributes +that provide information about the individual +:py:class:`~pysat._constellation.Constellation` +:py:class:`~pysat._instrument.Instrument` objects include: +:py:attr:`~pysat._constellation.Constellation.platforms`, +:py:attr:`~pysat._constellation.Constellation.names`, +:py:attr:`~pysat._constellation.Constellation.tags`, and +:py:attr:`~pysat._constellation.Constellation.inst_ids`. + +.. _tutorial-const-to-inst: + +Converting to an Instrument +--------------------------- + +At a certain point in your data analysis, it may be desirable to convert your +:py:class:`~pysat._constellation.Constellation` into an +:py:class:`~pysat._instrument.Instrument`. This functionality is supported by +the class method :py:meth:`~pysat._constellation.Constellation.to_inst`. Let +us use the ACE realtime data Constellation in this example. + +.. code:: python + + # Convert the output to an Instrument + rt_inst = ace_rt.to_inst() + print(rt_inst) + +This yields: +:: + + pysat Instrument object + ----------------------- + Platform: 'ace' + Name: 'swepam_mag_epam_sis' + Tag: 'realtime' + Instrument id: '' + + Data Processing + --------------- + Cleaning Level: 'clean' + Data Padding: None + Keyword Arguments Passed to list_files: {} + Keyword Arguments Passed to load: {} + Keyword Arguments Passed to preprocess: {} + Keyword Arguments Passed to download: {} + Keyword Arguments Passed to list_remote_files: {} + Keyword Arguments Passed to clean: {} + Keyword Arguments Passed to init: {} + Custom Functions: 0 applied + + Local File Statistics + --------------------- + Number of files: 0 + + + Loaded Data Statistics + ---------------------- + Date: 09 January 2023 + DOY: 009 + Time range: 09 January 2023 15:15:00 --- 09 January 2023 16:45:00 + Number of Times: 91 + Number of variables: 33 + + Variable Names: + jd_ace_epam_realtime sec_ace_epam_realtime status_e + ... + sw_proton_dens sw_bulk_speed sw_ion_temp + + pysat Meta object + ----------------- + Tracking 7 metadata values + Metadata for 33 standard variables + Metadata for 0 ND variables + Metadata for 0 global attributes + +Currently, if you wish to save your modified +:py:class:`~pysat._constallation.Constallation` data to a NetCDF file, you must +first convert it to an :py:class:`~pysat._instrument.Instrument` using +:py:meth:`~pysat._constallation.Constallation.to_inst`. From there, you may +use :py:meth:`~pysat._instrument.Instrument.to_netcdf4` to create a NetCDF file. diff --git a/docs/tutorial/tutorial_custom.rst b/docs/tutorial/tutorial_custom.rst index 0fea3ed3b..6813501d7 100644 --- a/docs/tutorial/tutorial_custom.rst +++ b/docs/tutorial/tutorial_custom.rst @@ -110,8 +110,8 @@ the desired :py:attr:`instruments` using the :py:attr:`platform`, Attaching Custom Function to an Instrument ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Custom functions must be attached to an :py:class:`Instrument` object for pysat -to automatically apply the function upon every load. +Custom functions must be attached to an :py:class:`Instrument` object for +:py:mod:`pysat` to automatically apply the function upon every load. .. code:: python @@ -212,8 +212,8 @@ functionality. Note the same result is obtained. The DMSP :py:class:`Instrument` object and analysis are performed at the same level, so there is no strict gain by using -the pysat nano-kernel in this simple demonstration. However, we can use the -nano-kernel to translate this daily mean into an versatile +the :py:mod:`pysat` nano-kernel in this simple demonstration. However, we can +use the nano-kernel to translate this daily mean into an versatile instrument-independent function. @@ -245,10 +245,10 @@ Attaching Custom Function to a Constellation Attaching custom functions to :py:class:`Constellation` objects is done in the same way as for :py:class:`Instrument` objects. The only difference is the -additional keyword argument :py:var:`apply_inst`, which defaults to -:py:value:`True` and applies the custom function to all of the -:py:class:`Constellation` :py:class:`Instrument` objects. This example assumes -that the :py:mod:`pysatSpaceWeather` ACE Instruments have been registered. +additional keyword argument ``apply_inst``, which defaults to ``True`` and +applies the custom function to all of the :py:class:`Constellation` +:py:class:`Instrument` objects. This example assumes that the +:py:mod:`pysatSpaceWeather` ACE Instruments have been registered. .. code:: python diff --git a/docs/tutorial/tutorial_iteration.rst b/docs/tutorial/tutorial_iteration.rst index c4360ee68..ed3414b87 100644 --- a/docs/tutorial/tutorial_iteration.rst +++ b/docs/tutorial/tutorial_iteration.rst @@ -3,15 +3,15 @@ Iteration --------- -pysat supports iterative loading of data at daily, orbital, and custom -cadances. The examples below show you how this works and how to specify the +:py:mod:`pysat` supports iterative loading of data at daily, orbital, and custom +cadences. The examples below show you how this works and how to specify the loading limits. - Daily Iteration ^^^^^^^^^^^^^^^ -By default, pysat will iteratively load data at a daily cadance. +By default, :py:mod:`pysat` will iteratively load data at the instrument's file +cadence. If this is not specified, :py:mod:`pysat` will assume a daily cadence. .. code:: python @@ -94,7 +94,7 @@ Orbit Iteration ^^^^^^^^^^^^^^^ You can iterate by orbit as well as day. To do this, be sure to specify what -type of orbit pysat should use. +type of orbit :py:mod:`pysat` should use. .. code:: python @@ -168,8 +168,8 @@ Now, the output is: Maximum meridional magnetic perturbation: 21.67 nT on 05 Jan 2010 -pysat iteration also supports loading more than a single day/file of data -at a time as well as stepping through the data in daily increments larger +:py:mod:`pysat` iteration also supports loading more than a single day/file of +data at a time as well as stepping through the data in daily increments larger than a single day. Assignment of the data step size and width is also set via the bounds attribute. diff --git a/docs/tutorial/tutorial_verbosity.rst b/docs/tutorial/tutorial_verbosity.rst index 7ca17f789..b8715e6f1 100644 --- a/docs/tutorial/tutorial_verbosity.rst +++ b/docs/tutorial/tutorial_verbosity.rst @@ -3,7 +3,7 @@ Verbosity --------- -pysat uses Python's standard +:py:mod:`pysat` uses Python's standard `logging tools `_ to control the verbosity of output. By default only logger.warning messages are shown. For more detailed output you may change the logging level. @@ -14,7 +14,7 @@ are shown. For more detailed output you may change the logging level. import pysat pysat.logger.set_level(logging.INFO) -The logging level will be applied to all :py:class:`Instrument` data loaded by -pysat and to analysis tools run by the pysat penumbra packages. If you try to -update the logger level after doing anything with pysat in an interactive -session, it may not work. +The logging level will be applied to all :py:class:`~pysat._instrument.Instrument` data +loaded by :py:mod:`pysat` and to analysis tools run by the pysat penumbra +packages. If you try to update the logger level after doing anything with +:py:mod:`pysat` in an interactive session, it may not work. diff --git a/pysat/_constellation.py b/pysat/_constellation.py index 1295a682f..0252c7275 100644 --- a/pysat/_constellation.py +++ b/pysat/_constellation.py @@ -15,8 +15,10 @@ import datetime as dt import numpy as np import pandas as pds +import xarray as xr import pysat +from pysat.utils.coords import establish_common_coord class Constellation(object): @@ -424,41 +426,42 @@ def _index(self): out_res = None for inst in self.instruments: - if stime is None: - # Initialize the start and stop time - stime = inst.index[0] - etime = inst.index[-1] - - # If desired, determine the resolution - if self.index_res is None: - if inst.index.freq is None: - out_res = pysat.utils.time.calc_res(inst.index) - else: - out_res = pysat.utils.time.freq_to_res( - inst.index.freq) - else: - # Adjust the start and stop time as appropriate - if self.common_index: - if stime < inst.index[0]: - stime = inst.index[0] - if etime > inst.index[-1]: - etime = inst.index[-1] + if len(inst.index) > 0: + if stime is None: + # Initialize the start and stop time + stime = inst.index[0] + etime = inst.index[-1] + + # If desired, determine the resolution + if self.index_res is None: + if inst.index.freq is None: + out_res = pysat.utils.time.calc_res(inst.index) + else: + out_res = pysat.utils.time.freq_to_res( + inst.index.freq) else: - if stime > inst.index[0]: - stime = inst.index[0] - if etime < inst.index[-1]: - etime = inst.index[-1] - - # If desired, determine the resolution - if self.index_res is None: - if inst.index.freq is None: - new_res = pysat.utils.time.calc_res(inst.index) + # Adjust the start and stop time as appropriate + if self.common_index: + if stime < inst.index[0]: + stime = inst.index[0] + if etime > inst.index[-1]: + etime = inst.index[-1] else: - new_res = pysat.utils.time.freq_to_res( - inst.index.freq) - - if new_res < out_res: - out_res = new_res + if stime > inst.index[0]: + stime = inst.index[0] + if etime < inst.index[-1]: + etime = inst.index[-1] + + # If desired, determine the resolution + if self.index_res is None: + if inst.index.freq is None: + new_res = pysat.utils.time.calc_res(inst.index) + else: + new_res = pysat.utils.time.freq_to_res( + inst.index.freq) + + if new_res < out_res: + out_res = new_res # If a resolution in seconds was supplied, calculate the frequency if self.index_res is not None: @@ -619,6 +622,206 @@ def index(self): return self._index() + def to_inst(self, common_coord=True, fill_method=None): + """Combine Constellation data into an Instrument. + + Parameters + ---------- + common_coord : bool + For Constellations with any xarray.Dataset Instruments, True to + include locations where all coordinate arrays cover, False to use + the maximum location range from the list of coordinates + (default=True) + fill_method : str or NoneType + Fill method if common data coordinates do not match exactly. If + one of 'nearest', 'pad'/'ffill', 'backfill'/'bfill', or None then + no interpolation will occur. If 'linear', 'zero', 'slinear', + 'quadratic', 'cubic', or 'polynomial' are used, then 1D or ND + interpolation will be used. (default=None) + + Returns + ------- + inst : pysat.Instrument + A pysat Instrument containing all data from the constellation at a + common time index + + Note + ---- + Uses the common index, `self.index`, that was defined using information + from the Constellation Instruments in combination with a potential + user-supplied resolution defined through `self.index_res`. + + """ + fill_methods = ['nearest', 'pad', 'ffill', 'backfill', 'bfill'] + interp_type = [np.float64, np.int64, float, int] + + # Establish the desired time index + coords = {'time': self.index} + fill_coords = {'time': coords['time']} + + # Initalize the pysat Instrument + inst = pysat.Instrument() + inst._assign_attrs_from_const(self) + + # Initalize the common data object + if inst.pandas_format: + data = pds.DataFrame(data={}, index=coords['time']) + else: + # Get the common coordinates needed for all data + for cinst in self.instruments: + if not cinst.pandas_format: + for new_coord in cinst.data.coords.keys(): + if new_coord not in coords.keys(): + coords[new_coord] = cinst.data.coords[new_coord] + elif new_coord != 'time': + # Two instruments have the same coordinate, if they + # are not identical, we need to establish a common + # range and resolution. Note that this will only + # happen if the coordinates share the same names. + if(len(coords[new_coord]) + != len(cinst.data.coords[new_coord]) + or coords[new_coord].values + != cinst.data.coords[new_coord].values): + coords[new_coord] = establish_common_coord( + [coords[new_coord].values, + cinst.data.coords[new_coord].values], + common=common_coord) + + data = xr.Dataset(coords=coords) + + # Add the data and metadata from each instrument + for cinst in self.instruments: + cinst_str = '_'.join([attr for attr in [cinst.platform, cinst.name, + cinst.tag, cinst.inst_id] + if len(attr) > 0]) + for dvar in cinst.variables: + if dvar not in self.variables: + dname = '_'.join([dvar, cinst_str]) + else: + dname = dvar + + # Determine whether or not this data type is interpolatable + fill_meth = fill_method + interp_flag = False + if cinst[dvar].dtype in interp_type: + if fill_meth not in fill_methods and fill_meth is not None: + interp_flag = True + else: + if fill_meth not in fill_methods and fill_meth is not None: + fill_meth = 'nearest' + + # Assign the metadata, if it exists (e.g., not 'time') + if dvar in cinst.meta: + inst.meta[dname] = cinst.meta[dvar] + fill_val = cinst.meta[dvar, cinst.meta.labels.fill_val] + else: + fill_val = np.nan + + # Populate the data on the common coordinates + if cinst.pandas_format or (len(cinst[dvar].dims) == 1 + and 'time' in cinst[dvar].dims): + try: + if cinst.pandas_format: + ivals = cinst[dvar][coords['time']] + else: + ivals = pds.Series( + cinst[dvar].sel({'time': + coords['time']}).values, + index=coords['time']) + except KeyError: + # Not all common times are present, need to fill + # or interpolate + if fill_meth is None: + # Pad the data will fill values and then select + if cinst.pandas_format: + cinst_temp = cinst[dvar].copy() + else: + cinst_temp = cinst[dvar].to_pandas() + new_times = [dtime for dtime in coords['time'] + if dtime not in cinst_temp.index] + fill_data = pds.Series(fill_val, index=new_times) + cinst_temp = pds.concat([cinst_temp, fill_data]) + cinst_temp = cinst_temp.sort_index() + ivals = cinst_temp[coords['time']] + else: + if cinst.pandas_format: + cinst_temp = cinst[dvar].to_xarray() + if cinst[dvar].index.name != 'time': + cinst_temp = cinst_temp.rename({ + cinst[dvar].index.name: 'time'}) + else: + cinst_temp = cinst[dvar].copy() + + if interp_flag: + ivals = cinst_temp.interp(coords=fill_coords, + method=fill_meth) + else: + ivals = cinst_temp.sel(fill_coords, + method=fill_meth) + + # Get the data from the xarray object + ivals = pds.Series(ivals.values, + index=coords['time']) + + # Extend the data + if inst.pandas_format: + val_dict = {dname: ivals} + else: + val_dict = {dname: (('time'), ivals)} + data = data.assign(**val_dict) + elif dvar not in coords.keys(): + sel_dict = {dim: coords[dim] for dim in cinst[dvar].dims} + + if fill_meth is None: + cinst_temp = cinst[dvar].copy() + sel_key = '' + while sel_key is not None: + try: + ivals = cinst_temp.sel(sel_dict, + method=fill_meth) + sel_key = None + except KeyError as kerr: + # Get the coordinate with values that need to + # to be filled + sel_key = str(kerr).split('index')[-1].split( + "'")[1] + + # Set the coordinates for filling + new_coord = {sel_key: [ + cc for cc in sel_dict[sel_key] + if cc not in cinst_temp.coords[sel_key]]} + for dim in sel_dict.keys(): + if dim not in new_coord.keys(): + new_coord[dim] = sel_dict[dim] + + # Create a DataArray with fill data + fill_data = xr.DataArray(data=fill_val, + coords=new_coord, + dims=cinst_temp.dims) + + if len(fill_data.coords.keys()) < len( + cinst_temp.coords.keys()): + fill_data = fill_data.assign_coords( + {ckey: cinst_temp.coords[ckey] + for ckey in cinst_temp.coords.keys() + if ckey not in sel_dict.keys()}) + + # Merge the data objects + cinst_temp = xr.concat([cinst_temp, fill_data], + sel_key) + elif interp_flag: + ivals = cinst[dvar].interp(coords=sel_dict, + method=fill_meth) + else: + ivals = cinst[dvar].sel(sel_dict, method=fill_meth) + + # Assign the interpolated data + data = data.assign({dname: (cinst[dvar].dims, + ivals.values)}) + + inst.data = data + return inst + def today(self): """Obtain UTC date for today, see pysat.Instrument for details.""" @@ -694,7 +897,7 @@ def custom_attach(self, function, apply_inst=True, at_pos='end', argument. See Also - --------- + -------- Instrument.custom_attach : base method for attaching custom functions """ @@ -712,7 +915,7 @@ def custom_clear(self): """Clear the custom function list. See Also - --------- + -------- Instrument.custom_clear : base method for clearing custom functions """ @@ -737,7 +940,7 @@ def load(self, *args, **kwargs): References a dict of input keyword arguments See Also - --------- + -------- Instrument.load : base method for loading Instrument data """ @@ -767,7 +970,7 @@ def download(self, *args, **kwargs): References a dict of input keyword arguments See Also - --------- + -------- Instrument.download : base method for loading Instrument data Note diff --git a/pysat/_instrument.py b/pysat/_instrument.py index f7f4dd047..744b6c302 100644 --- a/pysat/_instrument.py +++ b/pysat/_instrument.py @@ -90,13 +90,12 @@ class Instrument(object): of files found will be checked to ensure the filesizes are greater than zero. Empty files are removed from the stored list of files. (default=False) - labels : dict + labels : dict or NoneType Dict where keys are the label attribute names and the values are tuples - that have the label values and value types in that order. - (default={'units': ('units', str), 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'min_val': ('value_min', float), - 'max_val': ('value_max', float), 'fill_val': ('fill', float)}) + that have the label values and value types in that order. If None uses + the Meta defaults. Deprecated, use `meta_kwargs` (default=None) + meta_kwargs : dict or NoneType + Dict to specify custom Meta initialization (default=None) custom : list or NoneType Input list containing dicts of inputs for `custom_attach` method inputs that may be applied or None (default=None) @@ -148,8 +147,8 @@ class Instrument(object): day meta : pysat.Meta Class holding the instrument metadata - meta_labels : dict - Dict containing defaults for new Meta data labels + meta_kwargs : dict + Dict containing defaults for Meta data orbits : pysat.Orbits Interface to extracting data orbit-by-orbit pandas_format : bool @@ -253,12 +252,7 @@ def __init__(self, platform=None, name=None, tag='', inst_id='', orbit_info=None, inst_module=None, data_dir='', directory_format=None, file_format=None, temporary_file_list=False, strict_time_flag=True, - ignore_empty_files=False, - labels={'units': ('units', str), 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), - 'fill_val': ('fill', np.float64)}, + ignore_empty_files=False, labels=None, meta_kwargs=None, custom=None, **kwargs): """Initialize `pysat.Instrument` object.""" @@ -429,8 +423,15 @@ def __init__(self, platform=None, name=None, tag='', inst_id='', # Create Meta instance with appropriate labels. Meta class methods will # use Instrument definition of MetaLabels over the Metadata declaration. - self.meta_labels = labels - self.meta = pysat.Meta(labels=self.meta_labels) + self.meta_kwargs = {} if meta_kwargs is None else meta_kwargs + + if labels is not None: + warnings.warn("".join(["`labels` is deprecated, use `meta_kwargs`", + "with the 'labels' key instead. Support ", + "for `labels` will be removed in v3.2.0+"]), + DeprecationWarning, stacklevel=2) + self.meta_kwargs["labels"] = labels + self.meta = pysat.Meta(**self.meta_kwargs) self.meta.mutable = False # Nano-kernel processing variables. Feature processes data on each load. @@ -589,8 +590,15 @@ def __eq__(self, other): else: # General check for everything else - checks.append(np.all(self.__dict__[key] - == other.__dict__[key])) + if isinstance(self.__dict__[key], dict): + try: + checks.append(str(self.__dict__[key]) + == str(other.__dict__[key])) + except AttributeError: + return False + else: + checks.append(np.all(self.__dict__[key] + == other.__dict__[key])) else: # Both objects don't have the same attached objects @@ -655,7 +663,7 @@ def __repr__(self): "', pad={:}, orbit_info=".format(self.pad), "{:}, ".format(self.orbit_info), "inst_module=", istr, ", custom=", cstr, - ", **{:}".format(in_kwargs), ")"]) + ", **{:}".format(repr(in_kwargs)), ")"]) return out_str @@ -675,9 +683,20 @@ def __str__(self): output_str += '---------------\n' output_str += "Cleaning Level: '{:s}'\n".format(self.clean_level) output_str += 'Data Padding: {:s}\n'.format(self.pad.__str__()) - for routine in self.kwargs.keys(): - output_str += 'Keyword Arguments Passed to {:s}: '.format(routine) - output_str += "{:s}\n".format(self.kwargs[routine].__str__()) + + # Total kwargs passed to each routine. + num_kwargs = sum([len(self.kwargs[routine].keys()) + for routine in self.kwargs.keys()]) + if num_kwargs > 0: + output_str += 'Keyword Arguments Passed: \n' + + for routine in self.kwargs.keys(): + # Only output for routine if kwargs are present. + if len(self.kwargs[routine].keys()) > 0: + output_str += " To {:s}: \n".format(routine) + for key in self.kwargs[routine].keys(): + output_str += " '{:s}': {:s}\n".format( + key, str(self.kwargs[routine][key])) num_funcs = len(self.custom_functions) output_str += "Custom Functions: {:d} applied\n".format(num_funcs) @@ -765,7 +784,7 @@ def __getitem__(self, key): # Slicing by date, inclusive. inst[datetime1:datetime2, 'name'] - # Slicing by name and row/date + # Slicing by name and row/date (pandas only) inst[datetime1:datetime2, 'name1':'name2'] """ @@ -845,10 +864,8 @@ def __getitem_xarray__(self, key): # Slicing by date, inclusive inst[datetime1:datetime2, 'name'] - # Slicing by name and row/date - inst[datetime1:datetime2, 'name1':'name2'] - """ + if 'Epoch' in self.data.indexes: epoch_name = 'Epoch' elif 'time' in self.data.indexes: @@ -957,6 +974,10 @@ def __setitem__(self, key, new_data): If tuple not used when assigning dimensions for new multidimensional data. + Warnings + -------- + If a single new value is set, the value will be broadcast over time. + Note ---- If no metadata provided and if metadata for 'name' not already stored @@ -967,6 +988,13 @@ def __setitem__(self, key, new_data): new = copy.deepcopy(new_data) + # Initialize as empty dict. + if self.meta._data_types is None: + mutable = self.meta.mutable + self.meta.mutable = True + self.meta._data_types = {} + self.meta.mutable = mutable + # Add data to main pandas.DataFrame, depending upon the input # slice, and a name if self.pandas_format: @@ -981,6 +1009,8 @@ def __setitem__(self, key, new_data): # for list, array. Assume key[0] is integer # (including list or slice). self.data.loc[self.data.index[key[0]], key[1]] = new + + self._update_data_types(key[1]) self.meta[key[1]] = {} return elif not isinstance(new, dict): @@ -1011,12 +1041,14 @@ def __setitem__(self, key, new_data): # subvariables. Meta can filter out empty metadata as # needed, the check above reduces the need to create # Meta instances. - ho_meta = pysat.Meta(labels=self.meta_labels) + ho_meta = pysat.Meta(**self.meta_kwargs) ho_meta[in_data[0].columns] = {} self.meta[key] = ho_meta # Assign data and any extra metadata self.data[key] = in_data + self._update_data_types(key) + self.meta[key] = new else: @@ -1050,42 +1082,59 @@ def __setitem__(self, key, new_data): # Try loading indexed as integers self.data[key[-1]][indict] = in_data + self._update_data_types(key[-1]) self.meta[key[-1]] = new return elif isinstance(key, str): # Assigning basic variables - if isinstance(in_data, xr.DataArray): - # If xarray input, take as is + if isinstance(in_data, (xr.DataArray, tuple)): + # If xarray or tuple input, take as is self.data[key] = in_data - elif len(np.shape(in_data)) == 1: + elif len(np.shape(in_data)) <= 1: # If not an xarray input, but still iterable, then we # go through to process the 1D input - if len(in_data) == len(self.index): + if np.shape(in_data) == np.shape(self.index): # 1D input has the correct length for storage along - # 'Epoch' + # 'Epoch'. self.data[key] = (epoch_name, in_data) - elif len(in_data) == 1: - # Only provided a single number in iterable, make that - # the input for all times - self.data[key] = (epoch_name, - [in_data[0]] * len(self.index)) + elif len(np.shape(in_data)) == 0 or len(in_data) == 1: + # Only a single number, or single in iterable. + if key in self.variables: + # If it already exists, assign as defined. + in_data = np.squeeze(in_data) + if np.shape(self.data[key]) == np.shape(in_data): + self.data[key] = in_data + else: + raise ValueError(' '.join(('Shape of input', + 'does not match', + 'existing shape of', + key))) + else: + # Otherwise broadcast over time. + warnings.warn(' '.join(('Input for {:}'.format(key), + 'is a single value.', + 'Broadcast over epoch.'))) + in_data = pysat.utils.listify(in_data) + self.data[key] = (epoch_name, + in_data * len(self.index)) elif len(in_data) == 0: # Provided an empty iterable, make everything NaN + warnings.warn(' '.join(('Input for {:} is'.format(key), + 'empty. Setting to broadcast', + 'as NaN over epoch.'))) self.data[key] = (epoch_name, [np.nan] * len(self.index)) - elif len(np.shape(in_data)) == 0: - # Not an iterable input, rather a single number. Make - # that number the input for all times. - self.data[key] = (epoch_name, [in_data] * len(self.index)) + else: + raise ValueError(' '.join(('Input for {:}'.format(key), + 'does not match expected', + 'dimensions. Value not', + 'set.'))) else: # Multidimensional input that is not an xarray. The user # needs to provide everything that is required for success. - if isinstance(in_data, tuple): - self.data[key] = in_data - else: - raise ValueError(' '.join(('Must provide dimensions', - 'for xarray multidim', - 'data using input tuple.'))) + # Passes the data through to get appropriate error from + # xarray. + self.data[key] = in_data elif hasattr(key, '__iter__'): # Multiple input strings (keys) are provided, but not in tuple @@ -1093,6 +1142,8 @@ def __setitem__(self, key, new_data): # individually. for keyname in key: self.data[keyname] = in_data[keyname] + self.meta._data_types[keyname] = self.data[ + keyname].values.dtype.type # Attach metadata self.meta[key] = new @@ -1141,6 +1192,10 @@ def __iter__(self): elif self._iter_type == 'date': # Iterate over dates. A list of dates is generated whenever # bounds are set. + + if len(self._iter_list) == 0: + raise IndexError('No dates to iterate over, check bounds') + for date in self._iter_list: # Use a copy trick, starting with null data in object self.data = self._null_data @@ -1447,6 +1502,64 @@ def _assign_attrs(self, by_name=False): '{:}'.format(missing)])) return + def _assign_attrs_from_const(self, const): + """Update Instrument attributes using Constellation data. + + Parameters + ---------- + const : pysat.Constellation + Data contained in a Constellation of Instruments + + Note + ---- + Updates the 'platform', 'name', 'tag', 'inst_id', 'clean_level', + 'pad', 'clean_level', 'date', 'doy', 'yr', 'acknowledgements', + 'references', and 'pandas_format' attributes + + """ + # Define the reference variables + up_attrs = ['platform', 'name', 'tag', 'inst_id', 'clean_level', 'pad', + 'pandas_format', 'date', 'doy', 'acknowledgements', 'yr', + 'references'] + clean_rank = {'clean': 4, 'dusty': 3, 'dirty': 2, 'none': 1, None: 1} + clean_assign = {4: 'clean', 3: 'dusty', 2: 'dirty', 1: 'none'} + + for attr in up_attrs: + cattr = [getattr(cinst, attr) for cinst in const.instruments] + cattr = list(set(cattr)) # np.unique doesn't work with pad + + if attr == 'pad': + # Set the pad value to the longest padding, if padding was set + cattr = [ca for ca in cattr if ca is not None] + if len(cattr) == 1: + # There was only one set pad, use it + setattr(self, attr, cattr[0]) + elif len(cattr) > 1: + # There are multiple pads, they can be reliably compared + # using the `freqstr` attribute, which is lowest for the + # longest pad period. + long_pad = cattr[0] + for i in range(1, len(cattr)): + if long_pad.freqstr > cattr[i].freqstr: + long_pad = cattr[i] + setattr(self, attr, long_pad) + elif len(cattr) == 1: + # There is only one value, set it + setattr(self, attr, cattr[0]) + elif attr == 'clean_level': + # Set the clean value to the lowest cleaning rank + crank = [clean_rank[cl] for cl in cattr] + setattr(self, attr, clean_assign[min(crank)]) + elif attr == 'pandas_format': + # If there is a mix of pandas and xarray data, use xarray + setattr(self, attr, False) + else: + # Combine all unique attributes as a string + astr = "\n" if attr in ['acknowledgements', + 'references'] else "_" + setattr(self, attr, astr.join(cattr)) + return + def _load_data(self, date=None, fid=None, inc=None, load_kwargs=None): """Load data for an instrument on given date or filename index. @@ -1514,12 +1627,12 @@ def _load_data(self, date=None, fid=None, inc=None, load_kwargs=None): except pds.errors.OutOfBoundsDatetime: bad_datetime = True data = self._null_data.copy() - mdata = pysat.Meta(labels=self.meta_labels) + mdata = pysat.Meta(**self.meta_kwargs) else: bad_datetime = False data = self._null_data.copy() - mdata = pysat.Meta(labels=self.meta_labels) + mdata = pysat.Meta(**self.meta_kwargs) output_str = '{platform} {name} {tag} {inst_id}' output_str = output_str.format(platform=self.platform, @@ -1803,6 +1916,30 @@ def _filter_netcdf4_metadata(self, mdata_dict, coltype, remove=False, # ----------------------------------------------------------------------- # Define all accessible methods + @property + def meta_labels(self): + """Provide Meta input for labels kwarg, deprecated. + + Returns + ------- + dict + Either Meta default provided locally or custom value provided + by user and stored in `meta_kwargs['labels']` + + """ + warnings.warn("".join(["Deprecated attribute, returns `meta_kwargs", + "['labels']` or Meta defaults if not set. Will", + " be removed in pysat 3.2.0+"]), + DeprecationWarning, stacklevel=2) + if 'labels' in self.meta_kwargs.keys(): + return self.meta_kwargs['labels'] + else: + return {'units': ('units', str), 'name': ('long_name', str), + 'notes': ('notes', str), 'desc': ('desc', str), + 'min_val': ('value_min', (float, int)), + 'max_val': ('value_max', (float, int)), + 'fill_val': ('fill', (float, int, str))} + @property def bounds(self): """Boundaries for iterating over instrument object by date or file. @@ -1810,20 +1947,22 @@ def bounds(self): Parameters ---------- start : dt.datetime, str, or NoneType - Start of iteration, if None uses first data date. - list-like collection also accepted. (default=None) + Start of iteration, disregarding any time of day information. + If None uses first data date. List-like collection also + accepted, allowing mutliple bound ranges. (default=None) stop : dt.datetime, str, or None - Stop of iteration, inclusive. If None uses last data date. - list-like collection also accepted. (default=None) + Stop of iteration, inclusive of the entire day regardless of + time of day in the bounds. If None uses last data date. + List-like collection also accepted, allowing mutliple bound + ranges, though it must match `start`. (default=None) step : str, int, or NoneType Step size used when iterating from start to stop. Use a - Pandas frequency string ('3D', '1M') when setting bounds by date, - an integer when setting bounds by file. Defaults to a single - day/file (default='1D', 1). + Pandas frequency string ('3D', '1M') or an integer (will assume + a base frequency equal to the file frequency). If None, defaults to + a single unit of file frequency (typically 1 day) (default=None). width : pandas.DateOffset, int, or NoneType - Data window used when loading data within iteration. Defaults to a - single day/file if not assigned. (default=dt.timedelta(days=1), - 1) + Data window used when loading data within iteration. If None, + defaults to a single file frequency (typically 1 day) (default=None) Raises ------ @@ -1903,6 +2042,19 @@ def bounds(self, value=None): else: raise ValueError('Too many input arguments.') + # Determine the value of the file frequency + if hasattr(self.files.files.index, "freqstr"): + file_freq = self.files.files.index.freqstr + if file_freq is None: + if len(self.files.files.index) > 1: + # The frequency needs to be calculated + file_freq = pysat.utils.time.calc_freq( + self.files.files.index) + else: + file_freq = '1D' # This is the pysat default + else: + file_freq = '1D' # This is the pysat default + # Pull out start and stop times now that other optional items have # been checked out. start = value[0] @@ -1913,13 +2065,18 @@ def bounds(self, value=None): self._iter_start = [self.files.start_date] self._iter_stop = [self.files.stop_date] self._iter_type = 'date' + + # Set the hidden iteration parameters if not specified if self._iter_step is None: - self._iter_step = '1D' + self._iter_step = file_freq + if self._iter_width is None: - self._iter_width = dt.timedelta(days=1) + self._iter_width = pds.tseries.frequencies.to_offset(file_freq) + if self._iter_start[0] is not None: # There are files. Use those dates. - ustops = [istop - self._iter_width + dt.timedelta(days=1) + ustops = [istop - self._iter_width + + pds.tseries.frequencies.to_offset(self._iter_step) for istop in self._iter_stop] ufreq = self._iter_step self._iter_list = pysat.utils.time.create_date_range( @@ -1966,6 +2123,7 @@ def bounds(self, value=None): # with first/last, as appropriate. if starts[0] is None: starts = [self.files[0]] + if stops[0] is None: stops = [self.files[-1]] @@ -1973,6 +2131,13 @@ def bounds(self, value=None): if self._iter_step is None: self._iter_step = 1 + if type(self._iter_step) not in [int, np.int32, np.int64]: + # Convert from a frequency string to an index + soff = pds.tseries.frequencies.to_offset(self._iter_step) + foff = pds.tseries.frequencies.to_offset( + self.files.files.index.freq) + self._iter_step = int(soff.n / foff.n) + # Default window size if self._iter_width is None: self._iter_width = 1 @@ -2013,41 +2178,60 @@ def bounds(self, value=None): # Default step size if self._iter_step is None: - self._iter_step = '1D' + self._iter_step = file_freq # Default window size if self._iter_width is None: - self._iter_width = dt.timedelta(days=1) - - # Create list-like of dates for iteration - starts = pysat.utils.time.filter_datetime_input(starts) - stops = pysat.utils.time.filter_datetime_input(stops) - freq = self._iter_step - width = self._iter_width - - # Ensure inputs are in reasonable date order - for start, stop in zip(starts, stops): - if start > stop: - estr = ' '.join(('Bounds must be set in increasing', - 'date order.', - start.strftime('%d %B %Y'), - 'is later than', - stop.strftime('%d %B %Y'))) - raise ValueError(estr) - - # Account for width of load. Don't extend past bound. - ustops = [stop - width + dt.timedelta(days=1) - for stop in stops] + self._iter_width = pds.tseries.frequencies.to_offset( + file_freq) + + # Identify start/stop pairings that lie outside of the file list + good_bounds = list() + for i, start in enumerate(starts): + # Ensure there are files + if self.files.stop_date is not None: + # Ensure the start and stop times intersect with + # the file list + if(start <= self.files.stop_date + and stops[i] >= self.files.start_date): + good_bounds.append(i) + + if len(good_bounds) > 0: + # Create list-like of dates for iteration + starts = list(pysat.utils.time.filter_datetime_input( + np.asarray(starts)[good_bounds])) + stops = list(pysat.utils.time.filter_datetime_input( + np.asarray(stops)[good_bounds])) + file_inc = pds.tseries.frequencies.to_offset(file_freq) + + # Ensure inputs are in reasonable date order + for start, stop in zip(starts, stops): + if start > stop: + estr = ' '.join(('Bounds must be set in increasing', + 'date order.', + start.strftime('%d %B %Y'), + 'is later than', + stop.strftime('%d %B %Y'))) + raise ValueError(estr) + + # Account for width of load. Don't extend past bound. To + # avoid pandas shenanigans, only perform adjustments to the + # stop time if the file width and file increment are unequal + ustops = [stop if self._iter_width == file_inc else + stop - self._iter_width + file_inc + for stop in stops] + + # Date range is inclusive, no need to pad. + self._iter_list = pysat.utils.time.create_date_range( + starts, ustops, freq=self._iter_step) + else: + self._iter_list = [] - # Date range is inclusive, no need to pad. - self._iter_list = pysat.utils.time.create_date_range(starts, - ustops, - freq=freq) - # go back to time index + # Convert the date range back to a time index format self._iter_list = pds.DatetimeIndex(self._iter_list) else: - raise ValueError(' '.join(('Input is not a known type, string', + raise ValueError(' '.join(('Input is not a known type: string', 'or datetime'))) self._iter_start = starts self._iter_stop = stops @@ -2205,9 +2389,24 @@ def concat_data(self, new_data, prepend=False, **kwargs): kwargs['sort'] = False concat_func = pds.concat else: + # Ensure the dimensions are equal + equal_dims = True + idat = 0 + while idat < len(new_data) - 1 and equal_dims: + if new_data[idat].dims != new_data[idat + 1].dims: + equal_dims = False + idat += 1 + + if not equal_dims: + # Update the dimensions, padding data where necessary + new_data = pysat.utils.coords.expand_xarray_dims( + new_data, self.meta, exclude_dims=['time']) + # Specify the dimension, if not otherwise specified if 'dim' not in kwargs: kwargs['dim'] = self.index.name + + # Set the concat function concat_func = xr.concat # Assign the concatenated data to the instrument @@ -2402,38 +2601,34 @@ def next(self, verifyPad=False): self.load(date=date, end_date=end_date, verifyPad=verifyPad) elif self._iter_type == 'file': - first = self.files.get_index(self._iter_list[0]) - last = self.files.get_index(self._iter_list[-1]) - step = self._iter_step - width = self._iter_width if self._fid is not None: - # Data already loaded in `.data` - if (self._fid < first) | (self._fid + step > last): + # Data already loaded in `.data`. Step size is always accounted + # for in the list of files. Get location of current file in + # iteration list. + idx = None + fname = self.files[self._fid] + for i, name in enumerate(self._iter_list): + if name == fname: + idx = i + break + + if idx is None: + estr = ''.join(('Unable to find loaded filename in the ', + 'supported iteration list. Please check ', + 'the Instrument bounds, `self.bounds` for', + ' supported iteration ranges.')) + raise StopIteration(estr) + elif idx >= len(self._iter_list) - 1: raise StopIteration('Outside the set file boundaries.') - else: - # Step size already accounted for in the list of files. Get - # location of current file in iteration list. - idx = None - fname = self.files[self._fid] - for i, name in enumerate(self._iter_list): - if name == fname: - idx = i - break - if idx is None: - estr = ''.join(('Unable to find loaded filename ', - 'in the supported iteration list. ', - 'Please check the Instrument bounds, ', - '`self.bounds` for supported iteration', - 'ranges.')) - raise StopIteration(estr) - fname = self._iter_list[idx + 1] + + fname = self._iter_list[idx + 1] else: # No data loaded yet, start with the first file fname = self._iter_list[0] # Get location for second file. Note a width of 1 loads single file. # Load range of files. - nfid = self.files.get_index(fname) + width - 1 + nfid = self.files.get_index(fname) + self._iter_width - 1 self.load(fname=fname, stop_fname=self.files[nfid], verifyPad=verifyPad) @@ -2489,33 +2684,30 @@ def prev(self, verifyPad=False): self.load(date=date, end_date=end_date, verifyPad=verifyPad) elif self._iter_type == 'file': - first = self.files.get_index(self._iter_list[0]) - last = self.files.get_index(self._iter_list[-1]) - step = self._iter_step - width = self._iter_width if self._fid is not None: - if (self._fid - step < first) or (self._fid > last): + # Find location of the desired file, recall that step size is + # already accounted for when createing the iteration file list + idx = None + fname = self.files[self._fid] + for i, name in enumerate(self._iter_list): + if name == fname: + idx = i + break + + if idx is None: + estr = ''.join(('Unable to find loaded filename in the ', + 'supported iteration list. Please check ', + 'the Instrument bounds, `self.bounds` for', + ' supported iteration ranges.')) + raise StopIteration(estr) + elif idx == 0: raise StopIteration('Outside the set file boundaries.') - else: - # Find location of the desired file - idx = None - fname = self.files[self._fid] - for i, name in enumerate(self._iter_list): - if name == fname: - idx = i - break - if idx is None: - estr = ''.join(('Unable to find loaded filename ', - 'in the supported iteration list. ', - 'Please check the Instrument bounds, ', - '`self.bounds` for supported iteration', - 'ranges.')) - raise StopIteration(estr) - fname = self._iter_list[idx - 1] + + fname = self._iter_list[idx - 1] else: fname = self._iter_list[-1] - nfid = self.files.get_index(fname) + width - 1 + nfid = self.files.get_index(fname) + self._iter_width - 1 self.load(fname=fname, stop_fname=self.files[nfid], verifyPad=verifyPad) @@ -3029,12 +3221,22 @@ def load(self, yr=None, doy=None, end_yr=None, end_doy=None, date=None, self._next_data, self._next_meta = self._load_next() # Make sure datetime indices for all data is monotonic + if self.pandas_format: + sort_method = "sort_index" + sort_args = [] + else: + sort_method = 'sortby' + sort_args = ['time'] + if not self._index(self._prev_data).is_monotonic_increasing: - self._prev_data.sort_index(inplace=True) + self._prev_data = getattr(self._prev_data, + sort_method)(*sort_args) if not self._index(self._curr_data).is_monotonic_increasing: - self._curr_data.sort_index(inplace=True) + self._curr_data = getattr(self._curr_data, + sort_method)(*sort_args) if not self._index(self._next_data).is_monotonic_increasing: - self._next_data.sort_index(inplace=True) + self._next_data = getattr(self._next_data, + sort_method)(*sort_args) # Make tracking indexes consistent with new loads if self._load_by_date: @@ -3110,19 +3312,20 @@ def load(self, yr=None, doy=None, end_yr=None, end_doy=None, date=None, # and xarray self.data = self._next_data.copy() self.data = self[temp_time:last_pad] - if not self.empty: + if len(self.index) > 0: if (self.index[0] == temp_time): self.data = self[1:] self.concat_data(stored_data, prepend=True) else: self.data = stored_data - self.data = self[first_pad:last_pad] + if len(self.index) > 0: + self.data = self[first_pad:last_pad] - # Want exclusive end slicing behavior from above - if not self.empty: - if (self.index[-1] == last_pad) & (not want_last_pad): - self.data = self[:-1] + # Want exclusive end slicing behavior from above + if not self.empty: + if (self.index[-1] == last_pad) & (not want_last_pad): + self.data = self[:-1] else: # If self.pad is False, load single day @@ -3181,6 +3384,34 @@ def load(self, yr=None, doy=None, end_yr=None, end_doy=None, date=None, else: warnings.warn(message, stacklevel=2) + # Transfer any extra attributes in meta to the Instrument object. + # Metadata types need to be initialized before preprocess is run. + # TODO(#1020): Change the way this kwarg is handled + if use_header or ('use_header' in self.kwargs['load'] + and self.kwargs['load']['use_header']): + self.meta.transfer_attributes_to_header() + else: + warnings.warn(''.join(['Meta now contains a class for global ', + 'metadata (MetaHeader). Default attachment ', + 'of global attributes to Instrument will ', + 'be Deprecated in pysat 3.2.0+. Set ', + '`use_header=True` in this load call or ', + 'on Instrument instantiation to remove this', + ' warning.']), DeprecationWarning, + stacklevel=2) + self.meta.transfer_attributes_to_instrument(self) + + # Transfer loaded data types to meta. + self.meta.mutable = True + if self.meta._data_types is None: + self.meta._data_types = {} + for key in self.variables: + data_type = self.data[key].dtype.type + self.meta._data_types[key] = data_type + + self.meta.mutable = False + sys.stdout.flush() + # Apply the instrument preprocess routine, if data present if not self.empty: # Does not require self as input, as it is a partial func @@ -3201,23 +3432,6 @@ def load(self, yr=None, doy=None, end_yr=None, end_doy=None, date=None, if (self.index[-1] == last_time) & (not want_last_pad): self.data = self[:-1] - # Transfer any extra attributes in meta to the Instrument object. - # TODO(#1020): Change the way this kwarg is handled - if use_header or ('use_header' in self.kwargs['load'] - and self.kwargs['load']['use_header']): - self.meta.transfer_attributes_to_header() - else: - warnings.warn(''.join(['Meta now contains a class for global ', - 'metadata (MetaHeader). Default attachment ', - 'of global attributes to Instrument will ', - 'be Deprecated in pysat 3.2.0+. Set ', - '`use_header=True` in this load call or ', - 'on Instrument instantiation to remove this', - ' warning.']), DeprecationWarning, - stacklevel=2) - self.meta.transfer_attributes_to_instrument(self) - self.meta.mutable = False - sys.stdout.flush() return def remote_file_list(self, start=None, stop=None, **kwargs): @@ -3248,10 +3462,12 @@ def remote_file_list(self, start=None, stop=None, **kwargs): return a subset of available files. """ + # Add the method kwargs if they are not set to defaults + if start is not None: + kwargs["start"] = start - # Add the function kwargs - kwargs["start"] = start - kwargs["stop"] = stop + if stop is not None: + kwargs["stop"] = stop # Add the user-supplied kwargs rtn_key = 'list_remote_files' @@ -3555,7 +3771,7 @@ def download(self, start=None, stop=None, date_array=None, def to_netcdf4(self, fname=None, base_instrument=None, epoch_name=None, zlib=False, complevel=4, shuffle=True, preserve_meta_case=False, export_nan=None, - unlimited_time=True, modify=False): + export_pysat_info=True, unlimited_time=True, modify=False): """Store loaded data into a netCDF4 file. .. deprecated:: 3.0.2 @@ -3597,6 +3813,9 @@ def to_netcdf4(self, fname=None, base_instrument=None, epoch_name=None, included will be written to the file. If not listed and a value is NaN then that attribute simply won't be included in the netCDF4 file. (default=None) + export_pysat_info : bool + If True, platform, name, tag, and inst_id will be appended to the + metadata. (default=True) unlimited_time : bool Flag specifying whether or not the epoch/time dimension should be unlimited; it is when the flag is True. (default=True) @@ -3632,10 +3851,27 @@ def to_netcdf4(self, fname=None, base_instrument=None, epoch_name=None, complevel=complevel, shuffle=shuffle, preserve_meta_case=preserve_meta_case, export_nan=export_nan, + export_pysat_info=export_pysat_info, unlimited_time=unlimited_time) return + def _update_data_types(self, key): + """Update the data types in pysat.Meta object. + + Parameters + ---------- + key : str or list-like + Data variable or list of these data variables for which the data + type should be updated. + + """ + + for lkey in pysat.utils.listify(key): + if not isinstance(lkey, slice) and lkey in self.variables: + self.meta._data_types[lkey] = self.data[lkey].values.dtype.type + return + # ---------------------------------------------------------------------------- # Utilities and variables supporting the Instrument Object diff --git a/pysat/_meta.py b/pysat/_meta.py index 5bfb3414e..85d5c8d54 100644 --- a/pysat/_meta.py +++ b/pysat/_meta.py @@ -34,13 +34,17 @@ class Meta(object): that have the label values and value types in that order. (default={'units': ('units', str), 'name': ('long_name', str), 'notes': ('notes', str), 'desc': ('desc', str), - 'min_val': ('value_min', float), 'max_val': ('value_max', float), - 'fill_val': ('fill', float)}) + 'min_val': ('value_min', (float, int)), + 'max_val': ('value_max', (float, int)), + 'fill_val': ('fill', (float, int, str))}) export_nan : list or NoneType List of labels that should be exported even if their value is NaN or None for an empty list. When used, metadata with a value of NaN will be excluded from export. Will always allow NaN export for labels of the float type. (default=None) + data_types : dict or NoneType + Dict of data types for variables names or None to determine after + loading the data. (default=None) Attributes ---------- @@ -163,9 +167,10 @@ class Meta(object): def __init__(self, metadata=None, header_data=None, labels={'units': ('units', str), 'name': ('long_name', str), 'notes': ('notes', str), 'desc': ('desc', str), - 'min_val': ('value_min', float), - 'max_val': ('value_max', float), - 'fill_val': ('fill', float)}, export_nan=None): + 'min_val': ('value_min', (float, int)), + 'max_val': ('value_max', (float, int)), + 'fill_val': ('fill', (float, int, str))}, + export_nan=None, data_types=None): """Initialize `pysat.Meta` object.""" # Set mutability of Meta attributes. This flag must be set before # anything else, or `__setattr__` breaks. @@ -177,12 +182,16 @@ def __init__(self, metadata=None, header_data=None, # Set the NaN export list self._export_nan = [] if export_nan is None else export_nan for lvals in labels.values(): - if lvals[0] not in self._export_nan and lvals[1] == float: + if(lvals[0] not in self._export_nan + and float in pysat.utils.listify(lvals[1])): self._export_nan.append(lvals[0]) # Set the labels self.labels = MetaLabels(metadata=self, **labels) + # Set the data types, if provided + self._data_types = data_types + # Initialize higher order (nD) data structure container, a dict self._ho_data = {} @@ -366,11 +375,18 @@ def __setitem__(self, data_vars, input_dat): data_vars = self.var_case_name(data_vars) meta_vars = list(self.keys()) def_vars = list() + data_var_types = None if self._data_types is None else list() for var in data_vars: if var not in meta_vars: def_vars.append(var) + if data_var_types is not None: + if var in self._data_types.keys(): + data_var_types.append(self._data_types[var]) + else: + data_var_types.append(None) + if len(def_vars) > 0: - self._insert_default_values(def_vars) + self._insert_default_values(def_vars, data_var_types) # Check if input dict empty. If so, no metadata was assigned by # the user. This is an empty call and we can head out, @@ -412,7 +428,8 @@ def __setitem__(self, data_vars, input_dat): # If this is a disagreement between byte data # and an expected str, resolve it here if(isinstance(to_be_set, bytes) - and self.labels.label_type[iattr] == str): + and str in pysat.utils.listify( + self.labels.label_type[iattr])): to_be_set = core_utils.stringify(to_be_set) else: # This type is incorrect, try casting it @@ -424,16 +441,18 @@ def __setitem__(self, data_vars, input_dat): iattr])]) try: if hasattr(to_be_set, '__iter__'): - if self.labels.label_type[ - iattr] == str: + if str in pysat.utils.listify( + self.labels.label_type[ + iattr]): to_be_set = '\n\n'.join( [str(tval) for tval in to_be_set]) else: raise TypeError("can't recast") else: - to_be_set = self.labels.label_type[ - iattr](to_be_set) + to_be_set = pysat.utils.listify( + self.labels.label_type[ + iattr])[0](to_be_set) # Inform user data was recast pysat.logger.info(''.join(( @@ -774,28 +793,39 @@ def __eq__(self, other_meta): # ----------------------------------------------------------------------- # Define the hidden methods - def _insert_default_values(self, data_var): + def _insert_default_values(self, data_var, data_type=None): """Set the default label values for a data variable. Parameters ---------- data_var : str or list Single or multiple data variable name(s). + data_type : type, list, or NoneType + Type for the data value(s) or None if not specified (default=None) Note ---- Sets NaN for all float values, -1 for all int values, 'data_var' for names labels, '' for all other str values, and None for any other - data type. + data type. If there are multiple data types, sets the data type (if + included). Otherwise, chooses the first type in the tuple. """ # Cycle through each label type to create a list of label names # and label default values labels = list() + lattrs = list() default_vals = list() name_idx = None + need_data_type = dict() for i, lattr in enumerate(self.labels.label_type.keys()): labels.append(getattr(self.labels, lattr)) + lattrs.append(lattr) + if(isinstance(self.labels.label_type[lattr], tuple) + and data_type is not None): + need_data_type[lattr] = True + else: + need_data_type[lattr] = False if lattr in ['name']: default_vals.append('') @@ -805,10 +835,28 @@ def _insert_default_values(self, data_var): # Assign the default values to the DataFrame for this data variable(s). data_vars = pysat.utils.listify(data_var) - for var in data_vars: + if data_type is None: + var_types = [None for dvar in data_vars] + else: + var_types = pysat.utils.listify(data_type) + + for i, var in enumerate(data_vars): + # Use the label defaults if this variable doesn't need to consider + # the data type + if not np.any(list(need_data_type.values())): + data_default = list(default_vals) + else: + data_default = [ + self.labels.default_values_from_attr( + lattrs[j], var_types[i]) if need_data_type[lattrs[j]] + else val for j, val in enumerate(default_vals)] + + # The default value for the name must be set after to be consistent if name_idx is not None: - default_vals[name_idx] = var - self._data.loc[var, labels] = default_vals + data_default[name_idx] = var + + # Update the meta data to the desired defaults + self._data.loc[var, labels] = data_default return @@ -1618,19 +1666,22 @@ class MetaLabels(object): Parameters ---------- units : tuple - Units label name and value type (default=('units', str)) + Units label name and value type(s) (default=('units', str)) name : tuple - Name label name and value type (default=('long_name', str)) + Name label name and value type(s) (default=('long_name', str)) notes : tuple - Notes label name and value type (default=('notes', str)) + Notes label name and value type(s) (default=('notes', str)) desc : tuple - Description label name and value type (default=('desc', str)) + Description label name and value type(s) (default=('desc', str)) min_val : tuple - Minimum value label name and value type (default=('value_min', float)) + Minimum value label name and value type(s) + (default=('value_min', (float, int))) max_val : tuple - Maximum value label name and value type (default=('value_max', float)) + Maximum value label name and value type(s) + (default=('value_max', (float, int))) fill_val : tuple - Fill value label name and value type (default=('fill', float)) + Fill value label name and value type(s) + (default=('fill', (float, int, str))) kwargs : dict Dictionary containing optional label attributes, where the keys are the attribute names and the values are tuples containing the label name and @@ -1696,9 +1747,9 @@ class MetaLabels(object): def __init__(self, metadata=None, units=('units', str), name=('long_name', str), notes=('notes', str), - desc=('desc', str), min_val=('value_min', float), - max_val=('value_max', float), fill_val=('fill', float), - **kwargs): + desc=('desc', str), min_val=('value_min', (float, int)), + max_val=('value_max', (float, int)), + fill_val=('fill', (float, int, str)), **kwargs): """Initialize the MetaLabels class.""" # Initialize the coupled metadata @@ -1715,6 +1766,9 @@ def __init__(self, metadata=None, units=('units', str), min_val[0]: 'min_val', max_val[0]: 'max_val', fill_val[0]: 'fill_val'} + # Ensure the label types include numpy 64 bit types when appropriate + self._update_label_types() + # Ensure all standard label types are valid for label in self.label_type.keys(): if not self._eval_label_type(self.label_type[label]): @@ -1797,6 +1851,41 @@ def __str__(self): return out_str + def _update_label_types(self): + """Update the `label_type` attribute, adjusting types as appropriate.""" + + # Update the types as necessary + for lkey in self.label_type.keys(): + if self.label_type[lkey] == float: + self.label_type[lkey] = (float, np.float64, np.float32) + elif self.label_type[lkey] == int: + self.label_type[lkey] = (int, np.int64, np.int32, np.int16, + np.int8, bool) + elif self.label_type[lkey] == str: + self.label_type[lkey] = (str, np.str_) + elif self.label_type[lkey] == bool: + self.label_type[lkey] = (bool, np.bool_) + elif isinstance(self.label_type[lkey], tuple): + ltypes = list(self.label_type[lkey]) + + if float in ltypes: + ltypes.extend([np.float64, np.float32]) + + if int in ltypes: + ltypes.extend([np.int64, np.int32, np.int16, np.int8, bool]) + + if str in ltypes: + ltypes.append(np.str_) + + if bool in ltypes: + ltypes.append(np.bool_) + + # This may result in duplicate numpy types, but order is more + # important than carrying around a duplicate type, as the first + # type in the provided tuple is the default type + self.label_type[lkey] = tuple(ltypes) + return + def _eval_label_type(self, val_type): """Evaluate the label type for validity. @@ -1828,13 +1917,15 @@ def _eval_label_type(self, val_type): return valid - def default_values_from_type(self, val_type): + def default_values_from_type(self, val_type, data_type=None): """Retrieve the default values for each label based on their type. Parameters ---------- val_type : type Variable type for the value to be assigned to a MetaLabel + data_type : type or NoneType + Type for the data values or None if not specified (default=None) Returns ------- @@ -1844,6 +1935,12 @@ def default_values_from_type(self, val_type): """ + if isinstance(val_type, tuple): + if data_type in val_type: + val_type = data_type + else: + val_type = val_type[0] + # Perform some pre-checks on type, checks that could error with # unexpected input. try: @@ -1886,13 +1983,15 @@ def default_values_from_type(self, val_type): return default_val - def default_values_from_attr(self, attr_name): + def default_values_from_attr(self, attr_name, data_type=None): """Retrieve the default values for each label based on their type. Parameters ---------- attr_name : str Label attribute name (e.g., max_val) + data_type : type or NoneType + Type for the data values or None if not specified (default=None) Returns ------- @@ -1917,7 +2016,7 @@ def default_values_from_attr(self, attr_name): default_val = 'linear' else: default_val = self.default_values_from_type( - self.label_type[attr_name]) + self.label_type[attr_name], data_type=data_type) if default_val is None: mstr = ' '.join(('A problem may have been', 'encountered with the user', diff --git a/pysat/constellations/__init__.py b/pysat/constellations/__init__.py index e628df3f9..ec57f8a80 100644 --- a/pysat/constellations/__init__.py +++ b/pysat/constellations/__init__.py @@ -4,7 +4,7 @@ package. """ -__all__ = ['testing', 'testing_empty', 'single_test'] +__all__ = ['testing', 'testing_empty', 'testing_partial', 'single_test'] for const in __all__: exec("from pysat.constellations import {:}".format(const)) diff --git a/pysat/constellations/single_test.py b/pysat/constellations/single_test.py index eba1d063d..c080cfedd 100644 --- a/pysat/constellations/single_test.py +++ b/pysat/constellations/single_test.py @@ -10,4 +10,4 @@ import pysat instruments = [pysat.Instrument('pysat', 'testing', clean_level='clean', - update_files=True)] + num_samples=10, update_files=True)] diff --git a/pysat/constellations/testing.py b/pysat/constellations/testing.py index 46bddcccd..1b7917e80 100644 --- a/pysat/constellations/testing.py +++ b/pysat/constellations/testing.py @@ -5,16 +5,16 @@ instruments : list List of pysat.Instrument objects +Note +---- +Each instrument has a different sample size to test the common_index + """ import pysat instruments = [pysat.Instrument('pysat', 'testing', clean_level='clean', num_samples=10, use_header=True), - pysat.Instrument('pysat', 'testing2d', clean_level='clean', - use_header=True), pysat.Instrument('pysat', 'ndtesting', clean_level='clean', - use_header=True), - pysat.Instrument('pysat', 'testing_xarray', clean_level='clean', - use_header=True), + num_samples=16, use_header=True), pysat.Instrument('pysat', 'testmodel', clean_level='clean', - use_header=True)] + num_samples=18, use_header=True)] diff --git a/pysat/constellations/testing_partial.py b/pysat/constellations/testing_partial.py new file mode 100644 index 000000000..800e9850c --- /dev/null +++ b/pysat/constellations/testing_partial.py @@ -0,0 +1,14 @@ +"""Create a constellation where not all instruments have loadable data. + +Attributes +---------- +instruments : list + List of pysat.Instrument objects + +""" +import pysat + +instruments = [pysat.Instrument('pysat', 'testing', clean_level='clean', + num_samples=10, use_header=True), + pysat.Instrument('pysat', 'testing', tag='no_download', + clean_level='clean', use_header=True)] diff --git a/pysat/instruments/methods/general.py b/pysat/instruments/methods/general.py index 2a8c3dcd4..732163702 100644 --- a/pysat/instruments/methods/general.py +++ b/pysat/instruments/methods/general.py @@ -135,9 +135,11 @@ def list_files(tag='', inst_id='', data_path='', format_str=None, and out_month.year == emonth.year): out_month = emonth - mrange = pds.date_range(start=out_month, periods=2, freq='MS') - irange = pds.date_range(*mrange.values, freq="D").values[:-1] - new_out[irange] = out.loc[out_month] + crange = pds.date_range(start=out_month, periods=2, + freq=file_cadence) + irange = pds.date_range(*crange.values, freq="D").values[:-1] + sel_range = new_out.index.intersection(irange) + new_out[sel_range] = out.loc[out_month] # Assign the non-NaN files to out and add days to the filenames out = new_out.dropna() diff --git a/pysat/instruments/methods/testing.py b/pysat/instruments/methods/testing.py index 2f8e2b2f7..bf9cbeef1 100644 --- a/pysat/instruments/methods/testing.py +++ b/pysat/instruments/methods/testing.py @@ -100,7 +100,14 @@ def initialize_test_meta(epoch_name, data_keys): """ # Create standard metadata for all parameters - meta = pysat.Meta() + data_types = {'uts': float, 'mlt': float, 'slt': float, 'longitude': float, + 'latitude': float, 'altitude': float, 'orbit_num': int, + 'dummy1': int, 'dummy2': int, 'dummy3': int, 'dummy4': int, + 'unicode_dummy': str, 'string_dummy': str, + 'dummy_drifts': float, 'int8_dummy': int, 'int16_dummy': int, + 'int32_dummy': int, 'int64_dummy': int, 'profiles': int, + 'series_profiles': float} + meta = pysat.Meta(data_types=data_types) meta['uts'] = {'units': 's', 'long_name': 'Universal Time', 'desc': 'Number of seconds since mindight UT', 'value_min': 0.0, 'value_max': 86400.0} @@ -171,6 +178,10 @@ def initialize_test_meta(epoch_name, data_keys): # Children metadata required for 2D pandas. # TODO(#789): Delete after removal of Meta children. + data_types = {'density': float, 'fraction': float, 'alt_profiles': float, + 'variable_profiles': float, 'profile_height': int, + 'variable_profile_height': int, 'images': int, 'x': int, + 'y': int, 'z': int, 'image_lat': float, 'image_lon': float} alt_profile_meta = pysat.Meta() alt_profile_meta['density'] = {'desc': 'Simulated density values.', 'units': 'Log N/cc', @@ -650,3 +661,65 @@ def create_files(inst, start, stop, freq='1D', use_doy=True, if timeout is not None: time.sleep(timeout) return + + +def non_monotonic_index(index): + """Adjust the index to be non-monotonic. + + Parameters + ---------- + index : pds.DatetimeIndex + The index generated in an instrument test file. + + Returns + ------- + new_index : pds.DatetimeIndex + A non-montonic index + + """ + + new_index = index.tolist() + + # Create a non-monotonic index + new_index[6:9], new_index[3:6] = new_index[3:6], new_index[6:9] + + # Convert back to DatetimeIndex + new_index = pds.to_datetime(new_index) + + return new_index + + +def non_unique_index(index): + """Adjust the index to be non-unique. + + Parameters + ---------- + index : pds.DatetimeIndex + The index generated in an instrument test file. + + Returns + ------- + new_index : pds.DatetimeIndex + A non-unique index + + """ + + new_index = index.tolist() + + # Create a non-unique index + new_index[1:3] = [new_index[1]] * 2 + + # Convert back to DatetimeIndex + new_index = pds.to_datetime(new_index) + + return new_index + + +def _warn_malformed_kwarg(): + """Warn user that kwarg has been deprecated.""" + + dstr = ' '.join(['The kwarg malformed_index has been deprecated and', + 'will be removed in pysat 3.2.0+. Please use', + 'non_monotonic_index or non_unique_index to specify', + 'desired behaviour.']) + warnings.warn(dstr, DeprecationWarning, stacklevel=2) diff --git a/pysat/instruments/pysat_ndtesting.py b/pysat/instruments/pysat_ndtesting.py index 67e7fad00..581f44446 100644 --- a/pysat/instruments/pysat_ndtesting.py +++ b/pysat/instruments/pysat_ndtesting.py @@ -30,9 +30,9 @@ preprocess = mm_test.preprocess -def load(fnames, tag='', inst_id='', malformed_index=False, - start_time=None, num_samples=864, test_load_kwarg=None, - max_latitude=90.): +def load(fnames, tag='', inst_id='', non_monotonic_index=False, + non_unique_index=False, malformed_index=False, start_time=None, + num_samples=864, test_load_kwarg=None, max_latitude=90.): """Load the test files. Parameters @@ -45,8 +45,13 @@ def load(fnames, tag='', inst_id='', malformed_index=False, inst_id : str Instrument ID used to identify particular data set to be loaded. This input is nominally provided by pysat itself. (default='') + non_monotonic_index : bool + If True, time index will be non-monotonic (default=False) + non_unique_index : bool + If True, time index will be non-unique (default=False) malformed_index : bool - If True, the time index will be non-unique and non-monotonic. + If True, the time index will be non-unique and non-monotonic. Deprecated + and scheduled for removal in pysat 3.2.0. (default=False) start_time : dt.timedelta or NoneType Offset time of start time since midnight UT. If None, instrument data @@ -81,15 +86,18 @@ def load(fnames, tag='', inst_id='', malformed_index=False, # Using 100s frequency for compatibility with seasonal analysis unit tests uts, index, dates = mm_test.generate_times(fnames, num_samples, freq='100S', start_time=start_time) - + # TODO(#1094): Remove in pysat 3.2.0 if malformed_index: - index = index.tolist() + # Warn that kwarg is deprecated and set new kwargs. + mm_test._warn_malformed_kwarg() + non_monotonic_index = True + non_unique_index = True - # Create a non-monotonic index - index[0:3], index[3:6] = index[3:6], index[0:3] + if non_monotonic_index: + index = mm_test.non_monotonic_index(index) + if non_unique_index: + index = mm_test.non_unique_index(index) - # Create a non-unique index - index[6:9] = [index[6]] * 3 data = xr.Dataset({'uts': ((epoch_name), uts)}, coords={epoch_name: index}) diff --git a/pysat/instruments/pysat_netcdf.py b/pysat/instruments/pysat_netcdf.py index 9fd9f4231..2f42de4a1 100644 --- a/pysat/instruments/pysat_netcdf.py +++ b/pysat/instruments/pysat_netcdf.py @@ -132,16 +132,9 @@ def download(date_array, tag, inst_id, data_path=None): def load(fnames, tag='', inst_id='', strict_meta=False, file_format='NETCDF4', epoch_name=None, epoch_unit='ms', epoch_origin='unix', - pandas_format=True, decode_timedelta=False, - load_labels={'units': ('units', str), 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'plot': ('plot_label', str), 'axis': ('axis', str), - 'scale': ('scale', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), - 'fill_val': ('fill', np.float64)}, - meta_processor=None, - meta_translation=None, drop_meta_labels=None, decode_times=None): + pandas_format=True, decode_timedelta=False, meta_kwargs=None, + load_labels=None, meta_processor=None, meta_translation=None, + drop_meta_labels=None, decode_times=None): """Load pysat-created NetCDF data and meta data. Parameters @@ -187,13 +180,13 @@ def load(fnames, tag='', inst_id='', strict_meta=False, file_format='NETCDF4', Used for xarray data (`pandas_format` is False). If True, variables with unit attributes that are 'timelike' ('hours', 'minutes', etc) are converted to `np.timedelta64`. (default=False) - load_labels : dict + meta_kwargs : dict or NoneType + Dict to specify custom Meta initialization or None to use Meta + defaults (default=None) + load_labels : dict or NoneType Dict where keys are the label attribute names and the values are tuples - that have the label values and value types in that order. - (default={'units': ('units', str), 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), 'fill_val': ('fill', np.float64)}) + that have the label values and value types in that order or None to use + Meta defaults. Deprecated, use `meta_kwargs` instead. (default=None) meta_processor : function or NoneType If not None, a dict containing all of the loaded metadata will be passed to `meta_processor` which should return a filtered version @@ -234,6 +227,7 @@ def load(fnames, tag='', inst_id='', strict_meta=False, file_format='NETCDF4', epoch_origin=epoch_origin, pandas_format=pandas_format, decode_timedelta=decode_timedelta, + meta_kwargs=meta_kwargs, labels=load_labels, meta_processor=meta_processor, meta_translation=meta_translation, diff --git a/pysat/instruments/pysat_testing.py b/pysat/instruments/pysat_testing.py index 7247db767..c96dccdfe 100644 --- a/pysat/instruments/pysat_testing.py +++ b/pysat/instruments/pysat_testing.py @@ -26,7 +26,7 @@ inst_ids = {'': [tag for tag in tags.keys()]} _test_dates = {'': {tag: dt.datetime(2009, 1, 1) for tag in tags.keys()}} _test_download = {'': {'no_download': False}} -_test_load_opt = {'': {'': {'num_samples': 13}}} +_test_load_opt = {'': {'': [{'num_samples': 13}, {'num_samples': 15}]}} # Init method init = mm_test.init @@ -39,9 +39,9 @@ def load(fnames, tag='', inst_id='', sim_multi_file_right=False, - sim_multi_file_left=False, root_date=None, malformed_index=False, - start_time=None, num_samples=86400, test_load_kwarg=None, - max_latitude=90.): + sim_multi_file_left=False, root_date=None, non_monotonic_index=False, + non_unique_index=False, malformed_index=False, start_time=None, + num_samples=86400, test_load_kwarg=None, max_latitude=90.): """Load the test files. Parameters @@ -63,8 +63,13 @@ def load(fnames, tag='', inst_id='', sim_multi_file_right=False, root_date : NoneType Optional central date, uses _test_dates if not specified. (default=None) + non_monotonic_index : bool + If True, time index will be non-monotonic (default=False) + non_unique_index : bool + If True, time index will be non-unique (default=False) malformed_index : bool - If True, time index will be non-unique and non-monotonic (default=False) + If True, the time index will be non-unique and non-monotonic. Deprecated + and scheduled for removal in pysat 3.2.0. (default=False) start_time : dt.timedelta or NoneType Offset time of start time since midnight UT. If None, instrument data will begin at midnight. (default=None) @@ -159,15 +164,20 @@ def load(fnames, tag='', inst_id='', sim_multi_file_right=False, data['int32_dummy'] = np.ones(len(data), dtype=np.int32) data['int64_dummy'] = np.ones(len(data), dtype=np.int64) - # Activate for testing malformed_index, and for instrument_test_class. - if malformed_index or tag == 'non_strict': - index = index.tolist() + # TODO(#1094): Remove in pysat 3.2.0 + if malformed_index: + # Warn that kwarg is deprecated and set new kwargs. + mm_test._warn_malformed_kwarg() + non_monotonic_index = True + non_unique_index = True - # Create a non-monotonic index - index[0:3], index[3:6] = index[3:6], index[0:3] + # Activate if non-monotonic index is needed. + if np.any([non_monotonic_index, (tag == 'non_strict')]): + index = mm_test.non_monotonic_index(index) - # Create a non-unique index - index[6:9] = [index[6]] * 3 + # Activate if non-unique index is needed. + if np.any([non_unique_index, (tag == 'non_strict')]): + index = mm_test.non_unique_index(index) data.index = index data.index.name = 'Epoch' @@ -175,8 +185,11 @@ def load(fnames, tag='', inst_id='', sim_multi_file_right=False, # Set the meta data meta = mm_test.initialize_test_meta('Epoch', data.keys()) + # TODO(#1120): Move logic up so that empty data is returned first. if tag == 'default_meta': return data, pysat.Meta() + elif tag == 'no_download': + return pds.DataFrame(), pysat.Meta() else: return data, meta diff --git a/pysat/instruments/pysat_testing2d.py b/pysat/instruments/pysat_testing2d.py index 7e5eb0042..76819a6a4 100644 --- a/pysat/instruments/pysat_testing2d.py +++ b/pysat/instruments/pysat_testing2d.py @@ -152,13 +152,9 @@ def load(fnames, tag='', inst_id='', malformed_index=False, data['int64_dummy'] = np.ones(len(data), dtype=np.int64) if malformed_index: - index = index.tolist() - - # Create a non-monotonic index - index[0:3], index[3:6] = index[3:6], index[0:3] - - # Create a non-unique index - index[6:9] = [index[6]] * 3 + mm_test._warn_malformed_kwarg() + index = mm_test.non_monotonic_index(index) + index = mm_test.non_unique_index(index) data.index = index data.index.name = 'Epoch' @@ -180,8 +176,8 @@ def load(fnames, tag='', inst_id='', malformed_index=False, series_profiles = [] # Frame indexed by date times - frame = pds.DataFrame({'density': data.loc[data.index[0:num_profiles], - 'mlt'].values.copy(), + frame = pds.DataFrame({'density': + data.iloc[0:num_profiles]['mlt'].values.copy(), 'dummy_str': ['test'] * num_profiles, 'dummy_ustr': [u'test'] * num_profiles}, index=data.index[0:num_profiles], diff --git a/pysat/instruments/pysat_testing_xarray.py b/pysat/instruments/pysat_testing_xarray.py index 054330951..0edd7365b 100644 --- a/pysat/instruments/pysat_testing_xarray.py +++ b/pysat/instruments/pysat_testing_xarray.py @@ -61,9 +61,9 @@ def init(self, test_init_kwarg=None): def load(fnames, tag='', inst_id='', sim_multi_file_right=False, - sim_multi_file_left=False, malformed_index=False, - start_time=None, num_samples=86400, test_load_kwarg=None, - max_latitude=90.): + sim_multi_file_left=False, non_monotonic_index=False, + non_unique_index=False, malformed_index=False, start_time=None, + num_samples=86400, test_load_kwarg=None, max_latitude=90.): """Load the test files. Parameters @@ -82,8 +82,13 @@ def load(fnames, tag='', inst_id='', sim_multi_file_right=False, sim_multi_file_left : bool Adjusts date range to be 12 hours in the past or twelve hours before `root_date`. (default=False) + non_monotonic_index : bool + If True, time index will be non-monotonic (default=False) + non_unique_index : bool + If True, time index will be non-unique (default=False) malformed_index : bool - If True, time index will be non-unique and non-monotonic. + If True, the time index will be non-unique and non-monotonic. Deprecated + and scheduled for removal in pysat 3.2.0. (default=False) start_time : dt.timedelta or NoneType Offset time of start time since midnight UT. If None, instrument data will begin at midnight. (default=None) @@ -124,14 +129,17 @@ def load(fnames, tag='', inst_id='', sim_multi_file_right=False, else: root_date = dt.datetime(2009, 1, 1) + # TODO(#1094): Remove in pysat 3.2.0 if malformed_index: - index = index.tolist() - - # Create a non-monotonic index - index[0:3], index[3:6] = index[3:6], index[0:3] - - # Create a non-unique index - index[6:9] = [index[6]] * 3 + # Warn that kwarg is deprecated and set new kwargs. + mm_test._warn_malformed_kwarg() + non_monotonic_index = True + non_unique_index = True + + if non_monotonic_index: + index = mm_test.non_monotonic_index(index) + if non_unique_index: + index = mm_test.non_unique_index(index) data = xr.Dataset({'uts': ((epoch_name), uts)}, coords={epoch_name: index}) diff --git a/pysat/instruments/pysat_testmodel.py b/pysat/instruments/pysat_testmodel.py index bcf694dcb..f781bdcb1 100644 --- a/pysat/instruments/pysat_testmodel.py +++ b/pysat/instruments/pysat_testmodel.py @@ -164,8 +164,10 @@ def load(fnames, tag='', inst_id='', start_time=None, num_samples=96, # Adjust metadata from overall defaults meta['dummy1'] = {'value_min': -2**32 + 2, 'value_max': 2**32 - 1, 'fill': -2**32 + 1} - meta['dummy2'] = {'value_min': -2**32 + 2, 'value_max': 2**32 - 1, - 'fill': -2**32 + 1} + if tag == '': + # Assign metadata unique to default tag + meta['dummy2'] = {'value_min': -2**32 + 2, 'value_max': 2**32 - 1, + 'fill': -2**32 + 1} if tag == 'pressure_levels': # Assigning new metadata for altitude since it differs from default info @@ -180,14 +182,15 @@ def load(fnames, tag='', inst_id='', start_time=None, num_samples=96, # Assigning metadata for meridional ion drifts since it differs from # default info. - meta['iv_mer'] = {meta.labels.units: 'm/s', - meta.labels.name: 'Meridional Ion Drift', - meta.labels.min_val: -250., - meta.labels.max_val: 250., - meta.labels.desc: ' '.join(('Non-physical meridional', - 'ion drifts.')), - meta.labels.notes: '', - meta.labels.fill_val: np.nan} + meta['dummy_drifts'] = {meta.labels.units: 'm/s', + meta.labels.name: 'Meridional Ion Drift', + meta.labels.min_val: -250., + meta.labels.max_val: 250., + meta.labels.desc: ' '.join(('Non-physical', + 'meridional', + 'ion drifts.')), + meta.labels.notes: '', + meta.labels.fill_val: np.nan} # Assign metadata for the new coordinate axis here, `lev` and `ilev`. meta['lev'] = {meta.labels.units: '', diff --git a/pysat/instruments/templates/template_instrument.py b/pysat/instruments/templates/template_instrument.py index 54e45c434..4890fdf77 100644 --- a/pysat/instruments/templates/template_instrument.py +++ b/pysat/instruments/templates/template_instrument.py @@ -377,7 +377,7 @@ def list_remote_files(tag, inst_id, user=None, password=None): a particular NASA CDAWeb dataset. Parameters - ----------- + ---------- tag : str Denotes type of file to load. Accepted types are . inst_id : str diff --git a/pysat/tests/classes/cls_instrument_access.py b/pysat/tests/classes/cls_instrument_access.py index f162ca46e..edc8b759a 100644 --- a/pysat/tests/classes/cls_instrument_access.py +++ b/pysat/tests/classes/cls_instrument_access.py @@ -26,6 +26,7 @@ import pysat from pysat.utils import testing +from pysat.utils.time import filter_datetime_input class InstAccessTests(object): @@ -93,7 +94,8 @@ def eval_successful_load(self, end_date=None): """ # Test that the first loaded time matches the first requested time assert self.testInst.index[0] == self.ref_time, \ - "First loaded time is incorrect" + "First loaded time is incorrect {:} != {:}".format( + self.testInst.index[0], self.ref_time) # Test that the Instrument date is set to the requested start date self.out = dt.datetime(self.ref_time.year, self.ref_time.month, @@ -130,8 +132,16 @@ def test_basic_instrument_load(self, kwargs): self.eval_successful_load() return - def test_basic_instrument_load_no_data(self, caplog): - """Test Instrument load with no data for appropriate log messages.""" + @pytest.mark.parametrize('pad', [None, dt.timedelta(days=1)]) + def test_basic_instrument_load_no_data(self, caplog, pad): + """Test Instrument load with no data for appropriate log messages. + + Parameters + ---------- + pad : dt.timedelta, pds.DateOffset, NoneType + Pad input for load call + + """ # Get a date that is not covered by an Instrument object. no_data_d = self.testInst.files.files.index[0] - dt.timedelta(weeks=10) @@ -142,7 +152,7 @@ def test_basic_instrument_load_no_data(self, caplog): # Test doesn't check against loading by filename since that produces # an error if there is no file. Loading by yr, doy no different # than date in this case. - self.testInst.load(date=no_data_d, use_header=True) + self.testInst.load(date=no_data_d, pad=pad, use_header=True) # Confirm by checking against caplog that metadata was # not assigned. @@ -346,31 +356,34 @@ def test_basic_instrument_load_data(self): def test_basic_instrument_load_leap_year(self): """Test if the correct day is being loaded (Leap-Year).""" + if self.check_nonstandard_cadence(): + pytest.skip("Test only makes sense for daily cadence") + self.ref_time = dt.datetime(2008, 12, 31) self.ref_doy = 366 self.testInst.load(self.ref_time.year, self.ref_doy, use_header=True) self.eval_successful_load() return - @pytest.mark.parametrize("operator,ref_time", - [('next', dt.datetime(2008, 1, 1)), - ('prev', dt.datetime(2010, 12, 31))]) - def test_file_load_default(self, operator, ref_time): + @pytest.mark.parametrize("operator,ref_ind", + [('next', 0), ('prev', -1)]) + def test_file_load_default(self, operator, ref_ind): """Test if correct day loads by default when first invoking iteration. Parameters ---------- operator : str Name of iterator to use. - ref_time : dt.datetime - Expected date to load when iteration is first invoked. + ref_time : int + Expected index to load when iteration is first invoked. """ getattr(self.testInst, operator)() # Modify ref time since iterator changes load date. - self.ref_time = ref_time + self.ref_time = filter_datetime_input( + self.testInst.files.files.index[ref_ind]) self.eval_successful_load() return @@ -385,10 +398,10 @@ def test_file_load_bad_start_file(self, operator): """ - self.testInst.load(fname=self.testInst.files[12], use_header=True) + self.testInst.load(fname=self.testInst.files[1], use_header=True) # Set new bounds that do not include this date. - self.testInst.bounds = (self.testInst.files[9], self.testInst.files[20], + self.testInst.bounds = (self.testInst.files[0], self.testInst.files[2], 2, 1) testing.eval_bad_input(getattr(self.testInst, operator), StopIteration, 'Unable to find loaded filename ') @@ -426,9 +439,7 @@ def test_basic_fname_instrument_load(self): self.eval_successful_load() return - @pytest.mark.parametrize("operator,direction", - [('next', 1), - ('prev', -1)]) + @pytest.mark.parametrize("operator,direction", [('next', 1), ('prev', -1)]) def test_fname_load_default(self, operator, direction): """Test correct day loads when moving by day, starting with `fname`. @@ -448,7 +459,9 @@ def test_fname_load_default(self, operator, direction): getattr(self.testInst, operator)() # Modify ref time since iterator changes load date. - self.ref_time = self.ref_time + direction * dt.timedelta(days=1) + foff = pds.tseries.frequencies.to_offset( + self.testInst.files.files.index.freqstr) + self.ref_time = self.ref_time + direction * foff self.eval_successful_load() return @@ -463,19 +476,23 @@ def test_filename_load(self): def test_filenames_load(self): """Test if files are loadable by filename range.""" - stop_fname = self.ref_time + dt.timedelta(days=1) + foff = pds.tseries.frequencies.to_offset( + self.testInst.files.files.index.freqstr) + stop_fname = self.ref_time + foff stop_fname = stop_fname.strftime('%Y-%m-%d.nofile') self.testInst.load(fname=self.ref_time.strftime('%Y-%m-%d.nofile'), stop_fname=stop_fname, use_header=True) assert self.testInst.index[0] == self.ref_time - assert self.testInst.index[-1] >= self.ref_time + dt.timedelta(days=1) - assert self.testInst.index[-1] <= self.ref_time + dt.timedelta(days=2) + assert self.testInst.index[-1] >= self.ref_time + foff + assert self.testInst.index[-1] <= self.ref_time + (2 * foff) return def test_filenames_load_out_of_order(self): """Test error raised if fnames out of temporal order.""" - stop_fname = self.ref_time + dt.timedelta(days=1) + foff = pds.tseries.frequencies.to_offset( + self.testInst.files.files.index.freqstr) + stop_fname = self.ref_time + foff stop_fname = stop_fname.strftime('%Y-%m-%d.nofile') check_fname = self.ref_time.strftime('%Y-%m-%d.nofile') estr = '`stop_fname` must occur at a later date ' @@ -573,8 +590,10 @@ def test_concat_data(self, prepend, sort_dim_toggle): """ # Load a data set to concatonate - self.testInst.load(self.ref_time.year, self.ref_doy + 1, - use_header=True) + ref_time2 = self.ref_time + pds.tseries.frequencies.to_offset( + self.testInst.files.files.index.freqstr) + doy2 = int(ref_time2.strftime('%j')) + self.testInst.load(ref_time2.year, doy2, use_header=True) data2 = self.testInst.data len2 = len(self.testInst.index) @@ -696,11 +715,24 @@ def test_data_access_by_indices_and_name(self, index): == self.testInst.data['mlt'][index]) return + def test_data_access_by_row_slicing(self): + """Check that each variable is downsampled.""" + + self.testInst.load(self.ref_time.year, self.ref_doy, use_header=True) + result = self.testInst[0:10] + for variable, array in result.items(): + assert len(array) == len(self.testInst.data[variable].values[0:10]) + assert np.all(array == self.testInst.data[variable].values[0:10]) + return + def test_data_access_by_row_slicing_and_name_slicing(self): """Check that each variable is downsampled.""" + if not self.testInst.pandas_format: + pytest.skip("name slicing not implemented for xarray") + self.testInst.load(self.ref_time.year, self.ref_doy, use_header=True) - result = self.testInst[0:10, :] + result = self.testInst[0:10, 'uts':'mlt'] for variable, array in result.items(): assert len(array) == len(self.testInst.data[variable].values[0:10]) assert np.all(array == self.testInst.data[variable].values[0:10]) @@ -766,6 +798,7 @@ def test_setting_data_by_name_single_element(self): self.testInst.load(self.ref_time.year, self.ref_doy, use_header=True) self.testInst['doubleMLT'] = 2. assert np.all(self.testInst['doubleMLT'] == 2.) + assert len(self.testInst['doubleMLT']) == len(self.testInst.index) self.testInst['nanMLT'] = np.nan assert np.all(np.isnan(self.testInst['nanMLT'])) diff --git a/pysat/tests/classes/cls_instrument_integration.py b/pysat/tests/classes/cls_instrument_integration.py index 9cddab67d..e76a0f7e8 100644 --- a/pysat/tests/classes/cls_instrument_integration.py +++ b/pysat/tests/classes/cls_instrument_integration.py @@ -37,6 +37,9 @@ class InstIntegrationTests(object): def test_no_stale_data_paths(self, caplog): """Ensure stale data paths aren't retained by pysat.Instrument.files.""" + if 'file_date_range' in self.testInst.kwargs['list_files']: + pytest.skip("Cannot run eval on pds.DatetimeIndex") + inst_str = repr(self.testInst) inst_str = inst_str.replace('update_files=True', 'update_files=False') self.testInst = eval(inst_str) diff --git a/pysat/tests/classes/cls_instrument_iteration.py b/pysat/tests/classes/cls_instrument_iteration.py index 7129367d1..2a960f758 100644 --- a/pysat/tests/classes/cls_instrument_iteration.py +++ b/pysat/tests/classes/cls_instrument_iteration.py @@ -49,7 +49,35 @@ def generate_fname(self, date): fname = '{year:04d}-{month:02d}-{day:02d}.nofile' return fname.format(year=date.year, month=date.month, day=date.day) - def eval_iter_list(self, start, stop, dates=False, freq=None): + def get_fnames_times(self, inds=None): + """Get file names and times (date only) by index. + + Parameters + ---------- + inds : list or NoneType + List of indices to return filename and file time values + + Returns + ------- + fnames : list + List of filenames + ftimes : list + List of datetimes corresponding to the files + + """ + + fnames = list() + ftimes = list() + + if inds is not None: + for i in inds: + fnames.append(self.testInst.files.files[i]) + ftimes.append(filter_datetime_input(pds.to_datetime( + self.testInst.files.files.index[i]).to_pydatetime())) + + return fnames, ftimes + + def eval_iter_list(self, start, stop, freq, dates=False): """Evaluate successful generation of iter_list for `self.testInst`. Parameters @@ -58,15 +86,15 @@ def eval_iter_list(self, start, stop, dates=False, freq=None): Start date for generating iter_list. stop : dt.datetime or list of dt.datetime start date for generating iter_list. + freq : str + Frequency string, following pandas conventions dates : bool If True, checks each date. If False, checks against the _iter_list (default=False) - freq : int or NoneType - Frequency in days. If None, use pandas default. (default=None) """ - - kwargs = {'freq': '{:}D'.format(freq)} if freq else {} + # Set the frequency + kwargs = {'freq': freq} if isinstance(start, dt.datetime): out = pds.date_range(start, stop, **kwargs).tolist() @@ -74,14 +102,15 @@ def eval_iter_list(self, start, stop, dates=False, freq=None): out = list() for (istart, istop) in zip(start, stop): out.extend(pds.date_range(istart, istop, **kwargs).tolist()) + if dates: - dates = [] - for inst in self.testInst: - dates.append(inst.date) - pysat.utils.testing.assert_lists_equal(dates, out) + file_dates = [filter_datetime_input(ftime) + for ftime in self.testInst.files.files.index] + dates = [inst.date for inst in self.testInst + if inst.date in file_dates] + testing.assert_lists_equal(dates, out) else: - pysat.utils.testing.assert_lists_equal(self.testInst._iter_list, - out) + testing.assert_lists_equal(self.testInst._iter_list, out) return def support_iter_evaluations(self, starts, stops, step, width, @@ -94,12 +123,12 @@ def support_iter_evaluations(self, starts, stops, step, width, The start date for iterations, or dates for iteration over multiple segments. stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple + The end date for iterations, or dates for iteration over multiple segments. - step : int - The step size for the iteration bounds. - width : int - The width of the iteration bounds. + step : int or str + The step size for the iteration bounds. If int, days are assumed. + width : int or str + The width of the iteration bounds. If int, days are assumed. for_loop : bool If True, iterate via for loop. If False, iterate via while. (default=False) @@ -118,8 +147,13 @@ def support_iter_evaluations(self, starts, stops, step, width, if by_date: # Convert step and width to string and timedelta. - step = '{:}D'.format(step) - width = dt.timedelta(days=width) + if type(step) in [int, np.int32, np.int64]: + step = '{:}D'.format(step) + if type(width) in [int, np.int32, np.int64]: + width = dt.timedelta(days=width) + else: + width = pds.tseries.frequencies.to_offset(width) + self.testInst.bounds = (starts, stops, step, width) else: # Convert start and stop to filenames. @@ -127,21 +161,36 @@ def support_iter_evaluations(self, starts, stops, step, width, stop_files = [self.generate_fname(date) for date in stops] self.testInst.bounds = (start_files, stop_files, step, width) + # Convert step and width for future use + if type(step) in [int, np.int32, np.int64]: + step = '{:}D'.format(step) + + if type(width) in [int, np.int32, np.int64]: + wstr = '{:d}{:s}'.format( + width, self.testInst.files.files.index.freqstr) + width = pds.tseries.frequencies.to_offset(wstr) + # Iterate until we run out of bounds + file_dates = [filter_datetime_input(ftime) + for ftime in self.testInst.files.files.index] dates = [] time_range = [] if for_loop: # Iterate via for loop option for inst in self.testInst: - dates.append(inst.date) - time_range.append((inst.index[0], - inst.index[-1])) + if inst.date in file_dates: + dates.append(inst.date) + if len(inst.index) > 0: + time_range.append((inst.index[0], inst.index[-1])) + else: + time_range.append(()) else: # Iterate forwards or backwards using `.next()` or `.prev()` if reverse: iterator = self.testInst.prev else: iterator = self.testInst.next + try: while True: iterator() @@ -154,30 +203,27 @@ def support_iter_evaluations(self, starts, stops, step, width, # Deal with file or date iteration, make file inputs same as date for # verification purposes. - if isinstance(step, int): - step = str(step) + 'D' - if isinstance(width, int): - width = dt.timedelta(days=width) - out = [] + foff = pds.tseries.frequencies.to_offset( + self.testInst.files.files.index.freqstr) for start, stop in zip(starts, stops): - tdate = stop - width + dt.timedelta(days=1) - out.extend(pds.date_range(start, tdate, freq=step).tolist()) + if start in file_dates: + tdate = stop - width + foff + out.extend(pds.date_range(start, tdate, freq=step).tolist()) + if reverse: # Ensure time order is consistent for verify methods. out = out[::-1] - pysat.utils.testing.assert_lists_equal(dates, out) - - output = {} - output['expected_times'] = out - output['observed_times'] = time_range - output['starts'] = starts - output['stops'] = stops - output['width'] = width - output['step'] = step + testing.assert_lists_equal(dates, out) + + # Assign the output + output = {'expected_times': out, 'observed_times': time_range, + 'starts': starts, 'stops': stops, 'width': width, + 'step': step} + return output - def verify_iteration(self, out, reverse=False, inclusive=True): + def verify_iteration(self, out, reverse=False): """Verify loaded dates for iteration, forward or backward. Parameters @@ -185,15 +231,17 @@ def verify_iteration(self, out, reverse=False, inclusive=True): reverse : bool If True, use move backwards through the list. If False, move forwards. (default=False) - inclusive : bool - If True, check that end of bounds is included in iterated dates. - If False, check that end of bounds is excluded from iterated dates. - (default=True) """ - # Inclusive checks require shifting some expected dates by 1. - delta_inc = dt.timedelta(days=1) if inclusive else dt.timedelta(days=0) + # Inclusive checks require shifting some expected dates + check_inc = pds.tseries.frequencies.to_offset( + self.testInst.files.files.index.freqstr) + + # Arithmetic operations must be performed on datetime objects, + # not timedelta or DateOffset objects. + delta_inc = pds.tseries.frequencies.to_offset( + out['width']) + out['starts'][0] + check_inc - out['starts'][0] # Verify range of loaded data for each iteration step. for i, trange in enumerate(out['observed_times']): @@ -204,43 +252,51 @@ def verify_iteration(self, out, reverse=False, inclusive=True): # Check that loaded range is correct. assert trange[0] == out['expected_times'][i], \ - "Loaded start time is not correct" + "Loaded start time is not correct: {:} != {:}".format( + trange[0], out['expected_times'][i]) - check = out['expected_times'][i] + out['width'] - check -= dt.timedelta(days=1) - assert trange[1] > check, "End time lower than expected" + check = out['expected_times'][i] + out['width'] - check_inc + assert trange[1] > check, \ + "End time lower than expected: {:} <= {:}".format( + trange[1], check) check = out['stops'][b_range] + delta_inc - assert trange[1] < check, "End time higher than expected" + assert trange[1] < check, \ + "End time higher than expected {:} >= {:}".format( + trange[1], check) + + end_of_range = out['stops'][b_range] + dt.timedelta(days=1) + assert trange[1] < end_of_range, "End time higher than expected" if reverse: - end_of_range = out['stops'][b_range] + dt.timedelta(days=1) - assert trange[1] < end_of_range, "End time higher than expected" if i == 0: # Check that first load is before end of bounds. - check = out['stops'][b_range] - out['width'] - check += dt.timedelta(days=1) - - if inclusive: - assert trange[0] == check, \ - "Incorrect start time" - assert trange[1] > out['stops'][b_range], \ - "Stop time lower than expected" - else: - assert trange[0] < check, \ - "Start time higher than expected" + check = out['stops'][b_range] - out['width'] + check_inc + + assert trange[0] <= check, \ + "Start time is too high: {:} >= {:}: {:}".format( + trange[0], check, out) + + tdate = filter_datetime_input(trange[1]) + assert tdate <= out['stops'][b_range], \ + "Stop time higher than expected: {:} > {:}".format( + tdate, out['stops'][b_range]) check = out['stops'][b_range] + delta_inc assert trange[1] < check, \ - "Stop time higher than expected" + "Stop time higher than expected: {:} >= {:}".format( + trange[1], check) elif i == (len(out['observed_times']) - 1): # Check that last load is at start of bounds. assert trange[0] == out['starts'][b_range], \ - "Loaded start time is not correct" + "Loaded start time is wrong: {:} != {:}".format( + trange[0], out['starts'][b_range]) assert trange[1] > out['starts'][b_range], \ - "End time lower than expected" + "End time lower than expected: {:} <= {:}".format( + trange[1], out['starts'][b_range]) assert trange[1] < out['starts'][b_range] + out['width'], \ - "End time higher than expected" + "End time higher than expected: {:} <= {:}".format( + trange[1], out['starts'][b_range]) return @@ -255,8 +311,9 @@ def test_file_load_empty_iteration(self, operator): """ - self.testInst.bounds = (None, None, '10000D', - dt.timedelta(days=10000)) + start_time = self.testInst.files.files.index[-1] + dt.timedelta(days=1) + end_time = start_time + dt.timedelta(days=1) + self.testInst.bounds = (start_time, end_time) testing.eval_bad_input(getattr(self.testInst, operator), StopIteration, 'File list is empty. ') @@ -287,9 +344,9 @@ def test_set_bounds_with_frequency(self): start = self.ref_time stop = self.ref_time + dt.timedelta(days=14) - self.testInst.bounds = (start, stop, 'M') + self.testInst.bounds = (start, stop, 'MS') assert np.all(self.testInst._iter_list - == pds.date_range(start, stop, freq='M').tolist()) + == pds.date_range(start, stop, freq='MS').tolist()) return def test_iterate_bounds_with_frequency(self): @@ -297,8 +354,9 @@ def test_iterate_bounds_with_frequency(self): start = self.ref_time stop = self.ref_time + dt.timedelta(days=15) - self.testInst.bounds = (start, stop, '2D') - self.eval_iter_list(start, stop, dates=True, freq=2) + freq = '2{:s}'.format(self.testInst.files.files.index.freqstr) + self.testInst.bounds = (start, stop, freq) + self.eval_iter_list(start, stop, freq, dates=True) return def test_set_bounds_with_frequency_and_width(self): @@ -307,234 +365,117 @@ def test_set_bounds_with_frequency_and_width(self): start = self.ref_time stop = self.ref_time + pds.DateOffset(months=11, days=25) stop = stop.to_pydatetime() - self.testInst.bounds = (start, stop, '10D', dt.timedelta(days=10)) - assert np.all(self.testInst._iter_list - == pds.date_range(start, stop, freq='10D').tolist()) + freq = '2{:s}'.format(self.testInst.files.files.index.freqstr) + self.testInst.bounds = (start, stop, freq, + pds.tseries.frequencies.to_offset(freq)) + testing.assert_lists_equal(self.testInst._iter_list, + pds.date_range(start, stop, + freq=freq).tolist()) return - # TODO(#863): Remove hardwired dates and streamline here and below - # TODO(#902): Combine inclusive and exclusive tests via parametrize - @pytest.mark.parametrize( - "starts,stops,step,width", - [(dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 3), 2, 2), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 4), 2, 3), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 5), 3, 1), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 17), 5, 1)]) - @pytest.mark.parametrize("by_date", [True, False]) - def test_iterate_bounds_with_frequency_and_width(self, starts, stops, step, - width, by_date): - """Test iterate via date with mixed step/width, excludes stop date. + def test_iterate_index_error(self): + """Test iterate raises IndexError when there are no dates to iterate.""" - Parameters - ---------- - starts : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - step : int - The step size for the iteration bounds. - width : int - The width of the iteration bounds. - by_date : bool - If True, iterate by date. If False, iterate by filename. - - """ - - out = self.support_iter_evaluations(starts, stops, step, width, - for_loop=True, - by_date=by_date) - self.verify_iteration(out, reverse=False, inclusive=False) + _, ftimes = self.get_fnames_times(inds=[0, 2]) + step = '1{:s}'.format(self.testInst.files.files.index.freqstr) + width = '4{:s}'.format(self.testInst.files.files.index.freqstr) + input_args = [*ftimes, step, width] + input_kwargs = {'for_loop': True, 'by_date': True} + testing.eval_bad_input(self.support_iter_evaluations, IndexError, + "No dates to iterate over", input_args, + input_kwargs) return - @pytest.mark.parametrize( - "starts,stops,step,width", - [(dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 4), 2, 2), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 4), 3, 1), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 4), 1, 4), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 5), 4, 1), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 5), 2, 3), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 5), 3, 2)]) + @pytest.mark.parametrize("file_inds", [([0, 2]), ([0, 3]), ([0, 4])]) + @pytest.mark.parametrize("step,width", [(2, 2), (2, 3), (3, 1)]) @pytest.mark.parametrize("by_date", [True, False]) - def test_iterate_bounds_with_frequency_and_width_incl(self, starts, stops, - step, width, by_date): - """Test iterate via date with mixed step/width, includes stop date. + @pytest.mark.parametrize("reverse,for_loop", + [(True, False), (False, False), (False, True)]) + def test_iterate_bounds_with_frequency_and_width(self, file_inds, step, + width, by_date, + reverse, for_loop): + """Test iterate via date with mixed step/width. Parameters ---------- - starts : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. + file_inds : list + List of indices for the start and stop file times step : int The step size for the iteration bounds. width : int The width of the iteration bounds. by_date : bool If True, iterate by date. If False, iterate by filename. - - """ - - out = self.support_iter_evaluations(starts, stops, step, width, - for_loop=True, by_date=by_date) - self.verify_iteration(out, reverse=False, inclusive=True) - - return - - @pytest.mark.parametrize( - "starts,stops,step,width", - [(dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10), 2, 2), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 9), 4, 1), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 11), 1, 3), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 11), 1, 11)]) - @pytest.mark.parametrize("reverse", [True, False]) - @pytest.mark.parametrize("by_date", [True, False]) - def test_iterate_with_frequency_and_width_incl(self, starts, stops, step, - width, reverse, by_date): - """Test iteration via date step/width >1, includes stop date. - - Parameters - ---------- - starts : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - step : int - The step size for the iteration bounds. - width : int - The width of the iteration bounds. reverse : bool - If True, iterate backwards. If False, iterate forwards. - by_date : bool - If True, iterate by date. If False, iterate by filename. + If True, iterate backwards. If False, iterate forwards. Only used + when `for_loop=False`. + for_loop : bool + If True, iterate via for loop. If False, iterate via while. """ + # Get the desired file times + _, ftimes = self.get_fnames_times(inds=file_inds) - out = self.support_iter_evaluations(starts, stops, step, width, - reverse=reverse, by_date=by_date) - self.verify_iteration(out, reverse=reverse, inclusive=True) - - return - - @pytest.mark.parametrize( - "starts,stops,step,width", - [(dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 11), 2, 2), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 12), 2, 3), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 13), 3, 2), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 3), 4, 2), - (dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 12), 2, 1)]) - @pytest.mark.parametrize("reverse", [True, False]) - @pytest.mark.parametrize("by_date", [True, False]) - def test_iterate_with_frequency_and_width(self, starts, stops, step, width, - reverse, by_date): - """Test iteration with step and width excluding stop date. - - Parameters - ---------- - starts : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - step : int - The step size for the iteration bounds. - width : int - The width of the iteration bounds. - reverse : bool - If True, iterate backwards. If False, iterate forwards. - by_date : bool - If True, iterate by date. If False, iterate by filename. - - """ + # Convert integer steps/widths to strings, allowing multiple freq types + step = '{:d}{:s}'.format(step, self.testInst.files.files.index.freqstr) + if by_date: + width = '{:d}{:s}'.format(width, + self.testInst.files.files.index.freqstr) - out = self.support_iter_evaluations(starts, stops, step, width, - reverse=reverse, by_date=by_date) - self.verify_iteration(out, reverse=reverse, inclusive=False) + # Evaluate and verify the iterations + out = self.support_iter_evaluations(*ftimes, step, width, + for_loop=for_loop, by_date=by_date, + reverse=reverse) + self.verify_iteration(out, reverse=reverse) return - @pytest.mark.parametrize( - "starts,stops,step,width", - [([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 4), dt.datetime(2009, 1, 13)], 2, 2), - ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 7), dt.datetime(2009, 1, 16)], 3, 1), - ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 6), dt.datetime(2009, 1, 15)], 2, 4)]) - @pytest.mark.parametrize("reverse", [True, False]) + @pytest.mark.parametrize("start_inds,stop_inds", + [([0], [2]), ([0, 3], [1, 4])]) + @pytest.mark.parametrize("step,width", [(2, 2), (5, 1), (3, 2)]) @pytest.mark.parametrize("by_date", [True, False]) - def test_iterate_season_frequency_and_width_incl(self, starts, stops, step, - width, reverse, by_date): - """Test iteration via date season step/width > 1, include stop date. + @pytest.mark.parametrize("reverse,for_loop", + [(True, False), (False, False), (False, True)]) + def test_iterate_seasonal_bounds_with_frequency_and_width( + self, start_inds, stop_inds, step, width, by_date, reverse, + for_loop): + """Test iterate via date with mixed step/width. Parameters ---------- - starts : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. + start_inds : list + The index(es) corresponding to the start file(s) + stop_inds : list + The index(es) corresponding to the stop file(s) step : int The step size for the iteration bounds. width : int The width of the iteration bounds. - reverse : bool - If True, iterate backwards. If False, iterate forwards. by_date : bool If True, iterate by date. If False, iterate by filename. - - """ - - out = self.support_iter_evaluations(starts, stops, step, width, - reverse=reverse, by_date=by_date) - self.verify_iteration(out, reverse=reverse, inclusive=True) - - return - - @pytest.mark.parametrize( - "starts,stops,step,width", - [([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 3), dt.datetime(2009, 1, 12)], 2, 2), - ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 6), dt.datetime(2009, 1, 15)], 3, 1), - ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 7), dt.datetime(2009, 1, 16)], 2, 4)]) - @pytest.mark.parametrize("reverse", [True, False]) - @pytest.mark.parametrize("by_date", [True, False]) - def test_iterate_season_frequency_and_width(self, starts, stops, step, - width, reverse, by_date): - """Test iteration via date season step/width>1, exclude stop date. - - Parameters - ---------- - starts : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - step : int - The step size for the iteration bounds. - width : int - The width of the iteration bounds. reverse : bool - If True, iterate backwards. If False, iterate forwards. - by_date : bool - If True, iterate by date. If False, iterate by filename. + If True, iterate backwards. If False, iterate forwards. Only used + when `for_loop=False`. + for_loop : bool + If True, iterate via for loop. If False, iterate via while. """ + # Get the desired file times + _, start_times = self.get_fnames_times(inds=start_inds) + _, stop_times = self.get_fnames_times(inds=stop_inds) - out = self.support_iter_evaluations(starts, stops, step, width, - reverse=reverse, by_date=by_date) - self.verify_iteration(out, reverse=reverse, inclusive=False) + # Convert integer steps/widths to strings, allowing multiple freq types + step = '{:d}{:s}'.format(step, self.testInst.files.files.index.freqstr) + if by_date: + width = '{:d}{:s}'.format(width, + self.testInst.files.files.index.freqstr) + + # Evaluate and verify the iterations + out = self.support_iter_evaluations(start_times, stop_times, step, + width, for_loop=for_loop, + by_date=by_date, reverse=reverse) + self.verify_iteration(out, reverse=reverse) return @@ -554,7 +495,8 @@ def test_iterate_season_frequency_and_width(self, starts, stops, step, ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 1), '1D', dt.timedelta(days=1), False], - 'Too many input arguments.')]) + 'Too many input arguments.'), + ([1.0, 1.0], 'Input is not a known type')]) def test_set_bounds_error_message(self, new_bounds, errmsg): """Test ValueError when setting bounds with wrong inputs. @@ -567,276 +509,254 @@ def test_set_bounds_error_message(self, new_bounds, errmsg): in new_bounds. """ - + # Use pytest evaluation, as properties do not act like functions with pytest.raises(ValueError) as verr: self.testInst.bounds = new_bounds + assert str(verr).find(errmsg) >= 0 return - def test_set_bounds_string_default_start(self): - """Test set bounds with default start.""" + @pytest.mark.parametrize("type_ind", [(0), (1)]) + def test_set_bounds_default_start(self, type_ind): + """Test set bounds with default start. - self.testInst.bounds = [None, '2009-01-01.nofile'] - assert self.testInst.bounds[0][0] == self.testInst.files[0] - return + Parameters + ---------- + type_ind : int + Index where 0 takes the file names and 1 takes the file times - def test_set_bounds_string_default_stop(self): - """Test set bounds with default stop.""" + """ - self.testInst.bounds = ['2009-01-01.nofile', None] - assert self.testInst.bounds[1][0] == self.testInst.files[-1] - return + # Get the desired ending time and file + fvals = self.get_fnames_times([1]) - def test_set_bounds_by_default_dates(self): - """Verify bounds behavior with default date related inputs.""" + # Set the bounds with either the time or the value + self.testInst.bounds = [None, fvals[type_ind][0]] - start = self.testInst.files.start_date - stop = self.testInst.files.stop_date - self.testInst.bounds = (None, None) - self.eval_iter_list(start, stop) - self.testInst.bounds = None - self.eval_iter_list(start, stop) - self.testInst.bounds = (start, None) - self.eval_iter_list(start, stop) - self.testInst.bounds = (None, stop) - self.eval_iter_list(start, stop) + # Get the first time or file and check the bounds iteration type + if type_ind == 0: + assert self.testInst._iter_type == "file", "Not iterating by file" + check = self.testInst.files[0] + else: + assert self.testInst._iter_type == "date", "Not iterating by date" + check = filter_datetime_input(self.testInst.files.files.index[0]) + + # Evaluate the bounds starting value + assert self.testInst.bounds[0][0] == check, "Unexpected starting bound" return - @pytest.mark.parametrize("start,stop", [(dt.datetime(2009, 1, 1), - dt.datetime(2009, 1, 15)), - ([dt.datetime(2009, 1, 1), - dt.datetime(2009, 2, 1)], - [dt.datetime(2009, 1, 15), - dt.datetime(2009, 2, 15)])]) - def test_set_bounds_by_date(self, start, stop): - """Test setting bounds with datetimes over simple range and season. + @pytest.mark.parametrize("type_ind", [(0), (1)]) + def test_set_bounds_default_stop(self, type_ind): + """Test set bounds with default stop. Parameters ---------- - start : dt.datetime or list of dt.datetime - The start of the new bounds. - stop : dt.datetime or list of dt.datetime - The stop of the new bounds. + type_ind : int + Index where 0 takes the file names and 1 takes the file times """ + # Get the desired ending time and file + fvals = self.get_fnames_times([1]) - self.testInst.bounds = (start, stop) - self.eval_iter_list(start, stop) + # Set the bounds with either the time or the value + self.testInst.bounds = [fvals[type_ind][0], None] + + # Get the first time or file and check the bounds iteration type + if type_ind == 0: + assert self.testInst._iter_type == "file", "Not iterating by file" + check = self.testInst.files[-1] + else: + assert self.testInst._iter_type == "date", "Not iterating by date" + check = filter_datetime_input(self.testInst.files.files.index[-1]) + + assert self.testInst.bounds[1][0] == check, "Unexpeding ending bound" return - @pytest.mark.parametrize("start,stop", [(dt.datetime(2009, 1, 15), - dt.datetime(2009, 1, 1)), - ([dt.datetime(2009, 1, 1), - dt.datetime(2009, 2, 1)], - [dt.datetime(2009, 1, 12), - dt.datetime(2009, 1, 15)])]) - def test_set_bounds_by_date_wrong_order(self, start, stop): - """Test error if bounds assignment has stop date before start. + @pytest.mark.parametrize("bound_val", [(None, None), None]) + def test_set_bounds_by_default_dates(self, bound_val): + """Verify bounds behavior with default date related inputs. Parameters ---------- - start : dt.datetime or list of dt.datetime - The start of the new bounds. - stop : dt.datetime or list of dt.datetime - The stop of the new bounds. + bound_val : tuple or NoneType + Values to set equal to the Instrument.bounds attribute """ - with pytest.raises(ValueError) as err: - self.testInst.bounds = (start, stop) - estr = 'Bounds must be set in increasing' - assert str(err).find(estr) >= 0 + self.testInst.bounds = bound_val + self.eval_iter_list(self.testInst.files.start_date, + self.testInst.files.stop_date, + self.testInst.files.files.index.freqstr) return - @pytest.mark.parametrize( - "start,stop", [(dt.datetime(2009, 1, 1, 1, 10), - dt.datetime(2009, 1, 15, 1, 10)), - ([dt.datetime(2009, 1, 1, 1, 10), - dt.datetime(2009, 2, 1, 1, 10)], - [dt.datetime(2009, 1, 15, 1, 10), - dt.datetime(2009, 2, 15, 1, 10)])]) - def test_set_bounds_by_date_extra_time(self, start, stop): - """Test set bounds by date with extra time. - - Note - ---- - Only the date portion is retained, hours and shorter timespans are - dropped. + @pytest.mark.parametrize("start_inds,stop_inds", + [([0], [2]), ([0, 3], [2, 4])]) + def test_set_bounds_by_date(self, start_inds, stop_inds): + """Test setting bounds with datetimes over simple range and season. Parameters ---------- - start : dt.datetime or list of dt.datetime - The start of the new bounds. - stop : dt.datetime or list of dt.datetime - The stop of the new bounds. + start_inds : list + The start indices of the new bounds. + stop_inds : list + The stop indices of the new bounds. """ + _, start_times = self.get_fnames_times(start_inds) + _, stop_times = self.get_fnames_times(stop_inds) - self.testInst.bounds = (start, stop) - start = filter_datetime_input(start) - stop = filter_datetime_input(stop) - self.eval_iter_list(start, stop) + self.testInst.bounds = (start_times, stop_times) + self.eval_iter_list(start_times, stop_times, + self.testInst.files.files.index.freqstr) return - @pytest.mark.parametrize("start,stop", [(dt.datetime(2010, 12, 1), - dt.datetime(2010, 12, 31)), - (dt.datetime(2009, 1, 1), - dt.datetime(2009, 1, 15)), - ([dt.datetime(2009, 1, 1), - dt.datetime(2009, 2, 1)], - [dt.datetime(2009, 1, 15), - dt.datetime(2009, 2, 15)]), - ([dt.datetime(2009, 1, 1, 1, 10), - dt.datetime(2009, 2, 1, 1, 10)], - [dt.datetime(2009, 1, 15, 1, 10), - dt.datetime(2009, 2, 15, 1, 10)]) - ]) - def test_iterate_over_bounds_set_by_date(self, start, stop): - """Test iterate over bounds via single date range. + @pytest.mark.parametrize("start_inds,stop_inds", [([1], [0]), + ([0, 2], [1, 1])]) + def test_set_bounds_by_date_wrong_order(self, start_inds, stop_inds): + """Test error if bounds assignment has stop date before start. Parameters ---------- - start : dt.datetime or list of dt.datetime - The start of the new bounds. - stop : dt.datetime or list of dt.datetime - The stop of the new bounds. + start_inds : list + The start indices of the new bounds. + stop_inds : list + The stop indices of the new bounds. """ + _, start_times = self.get_fnames_times(start_inds) + _, stop_times = self.get_fnames_times(stop_inds) - self.testInst.bounds = (start, stop) - # Filter time inputs. - start = filter_datetime_input(start) - stop = filter_datetime_input(stop) - self.eval_iter_list(start, stop, dates=True) - return - - def test_iterate_over_default_bounds(self): - """Test iterate over default bounds.""" + # Use pytest evaluation, as properties do not act like functions + with pytest.raises(ValueError) as verr: + self.testInst.bounds = (start_times, stop_times) - date_range = pds.date_range(self.ref_time, - self.ref_time + dt.timedelta(days=10)) - self.testInst.kwargs['list_files']['file_date_range'] = date_range - self.testInst.files.refresh() - self.testInst.bounds = (None, None) - self.eval_iter_list(date_range[0], date_range[-1], dates=True) + assert str(verr).find('Bounds must be set in increasing') >= 0 return - @pytest.mark.parametrize( - "starts,stops,step,width", - [([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 3), dt.datetime(2009, 1, 12)], 2, 2), - ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 6), dt.datetime(2009, 1, 15)], 3, 1), - ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 7), dt.datetime(2009, 1, 16)], 2, 4)]) - @pytest.mark.parametrize("by_date", [True, False]) - def test_iterate_over_bounds_season_step_width(self, starts, stops, step, - width, by_date): - """Test iterate over season, step/width > 1, exclude stop bounds. + @pytest.mark.parametrize("start_inds,stop_inds", + [([0], [2]), ([0, 1], [1, 2])]) + @pytest.mark.parametrize("set_date", [True, False]) + def test_set_bounds_by_date_extra_time(self, start_inds, stop_inds, + set_date): + """Test set bounds by date with extra time. Parameters ---------- - starts : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - step : int - The step size for the iteration bounds. - width : int - The width of the iteration bounds. - by_date : bool - If True, iterate by date. If False, iterate by filename. + start_inds : list + The start indices of the new bounds. + stop_inds : list + The stop indices of the new bounds. + set_date : bool + Set by date or not - """ - - out = self.support_iter_evaluations(starts, stops, step, width, - for_loop=True, by_date=by_date) - self.verify_iteration(out, reverse=False, inclusive=False) - - return + Note + ---- + Only the date portion is retained, hours and shorter timespans are + dropped. - @pytest.mark.parametrize( - "starts,stops,step,width", - [([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 4), dt.datetime(2009, 1, 13)], 2, 2), - ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 7), dt.datetime(2009, 1, 16)], 3, 1), - ([dt.datetime(2009, 1, 1), dt.datetime(2009, 1, 10)], - [dt.datetime(2009, 1, 6), dt.datetime(2009, 1, 15)], 2, 4)]) - @pytest.mark.parametrize("by_date", [True, False]) - def test_iterate_bounds_season_step_width_incl(self, starts, stops, step, - width, by_date): - """Test iterate over season, step/width > 1, includes stop bounds. + """ + # Set the bounds + _, start_times = self.get_fnames_times(start_inds) + start_times = [stime + dt.timedelta(seconds=3650) + for stime in start_times] + _, stop_times = self.get_fnames_times(stop_inds) + stop_times = [stime + dt.timedelta(seconds=3650) + for stime in stop_times] + self.testInst.bounds = (start_times, stop_times) + + # Evaluate the results + start = filter_datetime_input(start_times) + stop = filter_datetime_input(stop_times) + + self.eval_iter_list( + start, stop, self.testInst.files.files.index.freqstr, + dates=set_date) + return + + @pytest.mark.parametrize("start_inds,stop_inds", + [([[0], [1]]), ([[1], [2]]), ([0, 3], [1, 4])]) + def test_iterate_over_bounds_set_by_date(self, start_inds, stop_inds): + """Test iterate over bounds via single date range. Parameters ---------- - starts : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - stops : dt.datetime or list of dt.datetime - The start date for iterations, or dates for iteration over multiple - segments. - step : int - The step size for the iteration bounds. - width : int - The width of the iteration bounds. - by_date : bool - If True, iterate by date. If False, iterate by filename. + start_inds : list + The start indices of the new bounds. + stop_inds : list + The stop indices of the new bounds. """ + _, start_times = self.get_fnames_times(start_inds) + _, stop_times = self.get_fnames_times(stop_inds) - out = self.support_iter_evaluations(starts, stops, step, width, - for_loop=True, by_date=by_date) - self.verify_iteration(out, reverse=False, inclusive=True) + self.testInst.bounds = (start_times, stop_times) + + # Filter time inputs. + start = filter_datetime_input(start_times) + stop = filter_datetime_input(stop_times) + self.eval_iter_list(start, stop, + self.testInst.files.files.index.freqstr, dates=True) return - def test_set_bounds_by_fname(self): - """Test set bounds by fname.""" + def test_iterate_over_default_bounds(self): + """Test iterate over default bounds.""" - start = '2009-01-01.nofile' - stop = '2009-01-03.nofile' - self.testInst.bounds = (start, stop) - assert np.all(self.testInst._iter_list - == ['2009-01-01.nofile', '2009-01-02.nofile', - '2009-01-03.nofile']) + # Establish a date range + date_range = pds.date_range( + self.ref_time, self.ref_time + pds.tseries.frequencies.to_offset( + '3{:s}'.format(self.testInst.files.files.index.freqstr)), + freq=self.testInst.files.files.index.freqstr) + + # Update the list of files + self.testInst.kwargs['list_files']['file_date_range'] = date_range + self.testInst.files.refresh() + self.testInst.bounds = (None, None) + + self.eval_iter_list(date_range[0], date_range[-1], date_range.freqstr, + dates=True) return def test_iterate_over_bounds_set_by_fname(self): """Test iterate over bounds set by fname.""" - start = '2009-01-01.nofile' - stop = '2009-01-15.nofile' - start_d = dt.datetime(2009, 1, 1) - stop_d = dt.datetime(2009, 1, 15) - self.testInst.bounds = (start, stop) - self.eval_iter_list(start_d, stop_d, dates=True) + fnames, ftimes = self.get_fnames_times(inds=[0, 2]) + self.testInst.bounds = tuple(fnames) + self.eval_iter_list(*ftimes, self.testInst.files.files.index.freqstr, + dates=True) + return - @pytest.mark.parametrize("start,stop", [('2009-01-13.nofile', - '2009-01-01.nofile'), - (['2009-01-01.nofile', - '2009-02-03.nofile'], - ['2009-01-03.nofile', - '2009-02-01.nofile'])]) - def test_set_bounds_by_fname_wrong_order(self, start, stop): + @pytest.mark.parametrize("start_inds,stop_inds", + [([2], [0]), ([0, 2], [1, 1])]) + def test_set_bounds_by_fname_wrong_order(self, start_inds, stop_inds): """Test for error if stop file before start file. Parameters ---------- - start : str or list of strs - The starting filename(s) for the new bounds. - stop : str or list of strs - The stop filename(s) for the new bounds. + start_inds : list + The index(es) corresponding to the start file(s) + stop_inds : list + The index(es) corresponding to the stop file(s) """ + start_names, _ = self.get_fnames_times(inds=start_inds) + stop_names, _ = self.get_fnames_times(inds=stop_inds) + + # If this is a length one list, convert to a string + if len(start_names) == 1: + start_names = start_names[0] + if len(stop_names) == 1: + stop_names = stop_names[0] + + # Evaluate the error raised and its message with pytest.raises(ValueError) as err: - self.testInst.bounds = (start, stop) + self.testInst.bounds = (start_names, stop_names) + estr = 'Bounds must be in increasing date' assert str(err).find(estr) >= 0 + return @pytest.mark.parametrize("operator", ['next', 'prev']) @@ -849,12 +769,9 @@ def test_iterate_over_bounds_set_by_fname_via_attr(self, operator): Name of iterator to use. """ + fnames, ftimes = self.get_fnames_times(inds=[0, 1]) - start = '2009-01-01.nofile' - stop = '2009-01-15.nofile' - start_d = dt.datetime(2009, 1, 1) - stop_d = dt.datetime(2009, 1, 15) - self.testInst.bounds = (start, stop) + self.testInst.bounds = tuple(fnames) dates = [] loop_next = True while loop_next: @@ -863,42 +780,44 @@ def test_iterate_over_bounds_set_by_fname_via_attr(self, operator): dates.append(self.testInst.date) except StopIteration: loop_next = False - out = pds.date_range(start_d, stop_d).tolist() - pysat.utils.testing.assert_lists_equal(dates, out) + out = pds.date_range(*ftimes, + freq=self.testInst.files.files.index.freq).tolist() + testing.assert_lists_equal(dates, out) return def test_set_bounds_by_fname_season(self): """Test set bounds by fname season.""" + fnames, _ = self.get_fnames_times(inds=[0, 4, 2, 5]) + start = [fnames[0], fnames[1]] + stop = [fnames[2], fnames[3]] - start = ['2009-01-01.nofile', '2009-02-01.nofile'] - stop = ['2009-01-03.nofile', '2009-02-03.nofile'] + check_list = self.testInst.files.files[0:6].tolist() + check_list.pop(3) self.testInst.bounds = (start, stop) - assert np.all(self.testInst._iter_list - == ['2009-01-01.nofile', '2009-01-02.nofile', - '2009-01-03.nofile', '2009-02-01.nofile', - '2009-02-02.nofile', '2009-02-03.nofile']) + testing.assert_lists_equal(self.testInst._iter_list, check_list) return def test_iterate_over_bounds_set_by_fname_season(self): """Test set bounds using multiple filenames.""" + fnames, ftimes = self.get_fnames_times(inds=[0, 4, 2, 5]) - start = ['2009-01-01.nofile', '2009-02-01.nofile'] - stop = ['2009-01-15.nofile', '2009-02-15.nofile'] - start_d = [dt.datetime(2009, 1, 1), dt.datetime(2009, 2, 1)] - stop_d = [dt.datetime(2009, 1, 15), dt.datetime(2009, 2, 15)] + start = [fnames[0], fnames[1]] + stop = [fnames[2], fnames[3]] + start_d = [ftimes[0], ftimes[1]] + stop_d = [ftimes[2], ftimes[3]] self.testInst.bounds = (start, stop) - self.eval_iter_list(start_d, stop_d, dates=True) + self.eval_iter_list(start_d, stop_d, + self.testInst.files.files.index.freqstr, dates=True) + return def test_set_bounds_fname_with_frequency(self): """Test set bounds using filenames and non-default step.""" - start = '2009-01-01.nofile' - start_date = dt.datetime(2009, 1, 1) - stop = '2009-01-03.nofile' - stop_date = dt.datetime(2009, 1, 3) - self.testInst.bounds = (start, stop, 2) - out = pds.date_range(start_date, stop_date, freq='2D').tolist() + fnames, ftimes = self.get_fnames_times(inds=[0, 2]) + self.testInst.bounds = (*fnames, 2) + freq = '2{:s}'.format(self.testInst.files.files.index.freqstr) + out = pds.date_range(*ftimes, freq=freq).tolist() # Convert filenames in list to a date for i, item in enumerate(self.testInst._iter_list): @@ -910,51 +829,60 @@ def test_set_bounds_fname_with_frequency(self): def test_iterate_bounds_fname_with_frequency(self): """Test iterate over bounds using filenames and non-default step.""" - start = '2009-01-01.nofile' - start_date = dt.datetime(2009, 1, 1) - stop = '2009-01-03.nofile' - stop_date = dt.datetime(2009, 1, 3) - self.testInst.bounds = (start, stop, 2) + fnames, ftimes = self.get_fnames_times(inds=[0, 2]) + freq = '2{:s}'.format(self.testInst.files.files.index.freqstr) + self.testInst.bounds = (*fnames, 2) - self.eval_iter_list(start_date, stop_date, dates=True, freq=2) + self.eval_iter_list(*ftimes, freq, dates=True) return def test_set_bounds_fname_with_frequency_and_width(self): """Test set fname bounds with step/width > 1.""" - start = '2009-01-01.nofile' - start_date = dt.datetime(2009, 1, 1) - stop = '2009-01-03.nofile' - stop_date = dt.datetime(2009, 1, 3) - self.testInst.bounds = (start, stop, 2, 2) - out = pds.date_range(start_date, stop_date - dt.timedelta(days=1), - freq='2D').tolist() + fnames, ftimes = self.get_fnames_times(inds=[0, 2]) + freq = '2{:s}'.format(self.testInst.files.files.index.freqstr) + self.testInst.bounds = (*fnames, 2, 2) + out = pds.date_range(filter_datetime_input(ftimes[0]), + filter_datetime_input(ftimes[1] + - dt.timedelta(days=1)), + freq=freq).tolist() + # Convert filenames in list to a date date_list = [] for item in self.testInst._iter_list: snip = item.split('.')[0] date_list.append(dt.datetime.strptime(snip, '%Y-%m-%d')) - assert np.all(date_list == out) + + # Evaluate the date components of the files and bounds + testing.assert_lists_equal(date_list, out) return def test_iteration_in_list_comprehension(self): """Test list comprehensions for length, uniqueness, iteration.""" + if self.testInst.files.files.index.shape[0] >= 10: + last_ind = 9 + else: + last_ind = self.testInst.files.files.index.shape[0] - 1 self.testInst.bounds = (self.testInst.files.files.index[0], - self.testInst.files.files.index[9]) + self.testInst.files.files.index[last_ind]) # Ensure no data to begin assert self.testInst.empty # Perform comprehension and ensure there are as many as there should be - insts = [inst for inst in self.testInst] - assert len(insts) == 10 + file_dates = [filter_datetime_input(ftime) + for ftime in self.testInst.files.files.index] + insts = [inst for inst in self.testInst if inst.date in file_dates] + assert len(insts) == last_ind + 1, \ + 'Found {:d} Instruments instead of {:d}'.format(len(insts), + last_ind + 1) # Get list of dates dates = pds.Series([inst.date for inst in insts]) assert dates.is_monotonic_increasing # Dates are unique - assert np.all(np.unique(dates) == dates.values) + testing.assert_lists_equal(np.unique(dates), dates.values) # Iteration instruments are not the same as original for inst in insts: diff --git a/pysat/tests/classes/cls_instrument_library.py b/pysat/tests/classes/cls_instrument_library.py index 1faa57461..bb047209e 100644 --- a/pysat/tests/classes/cls_instrument_library.py +++ b/pysat/tests/classes/cls_instrument_library.py @@ -32,6 +32,7 @@ class TestInstruments(InstLibTests): import datetime as dt from importlib import import_module +import sys import tempfile import warnings @@ -109,7 +110,12 @@ def setup_class(self): """Initialize the testing setup once before all tests are run.""" # Use a temporary directory so that the user's setup is not altered. - self.tempdir = tempfile.TemporaryDirectory() + # TODO(#974): Remove if/else when support for Python 3.9 is dropped. + if sys.version_info.minor >= 10: + self.tempdir = tempfile.TemporaryDirectory( + ignore_cleanup_errors=True) + else: + self.tempdir = tempfile.TemporaryDirectory() self.saved_path = pysat.params['data_dirs'] pysat.params._set_data_dirs(path=self.tempdir.name, store=False) return @@ -118,7 +124,15 @@ def teardown_class(self): """Clean up downloaded files and parameters from tests.""" pysat.params._set_data_dirs(self.saved_path, store=False) - self.tempdir.cleanup() + # Remove the temporary directory. In Windows, this occasionally fails + # by raising a wide variety of different error messages. Python 3.10+ + # can handle this, but lower Python versions cannot. + # TODO(#974): Remove try/except when support for Python 3.9 is dropped. + try: + self.tempdir.cleanup() + except Exception: + pass + del self.saved_path, self.tempdir return diff --git a/pysat/tests/classes/cls_instrument_property.py b/pysat/tests/classes/cls_instrument_property.py index e53da816d..554cd7fef 100644 --- a/pysat/tests/classes/cls_instrument_property.py +++ b/pysat/tests/classes/cls_instrument_property.py @@ -7,6 +7,7 @@ """ import datetime as dt +import functools from importlib import reload import logging import numpy as np @@ -17,6 +18,7 @@ import xarray as xr import pysat +from pysat.instruments.methods import testing as ps_meth from pysat.utils import testing from pysat.utils.time import filter_datetime_input @@ -146,6 +148,21 @@ def test_remote_functions(self, remote_func, num): assert filter_datetime_input(self.out[-1]) == stop return + def test_remote_file_list_with_default_dates(self): + """Test setting the start / stop dates as default kwargs.""" + + # Set new defaults for kwargs + start = dt.datetime(2018, 1, 1) + stop = dt.datetime(2018, 2, 1) + + # Update remote_file_list default dates + self.testInst._list_remote_files_rtn = functools.partial( + ps_meth.list_remote_files, start=start, stop=stop) + + files = self.testInst.remote_file_list() + assert filter_datetime_input(files.index[0]) == start + assert filter_datetime_input(files.index[-1]) == stop + @pytest.mark.parametrize("no_remote_files", [True, False]) @pytest.mark.parametrize("download_keys", [ (["start"]), (["start", "stop"]), (["date_array"]), ([])]) @@ -619,6 +636,9 @@ def test_instrument_function_keyword_liveness(self, caplog, func, kwarg): def test_error_undefined_input_keywords(self): """Test for error if undefined keywords provided at instantiation.""" + if 'file_date_range' in self.testInst.kwargs['list_files']: + pytest.skip("Cannot run eval on pds.DatetimeIndex") + # Add a new keyword self.testInst.kwargs['load']['undefined_keyword1'] = True self.testInst.kwargs['load']['undefined_keyword2'] = False diff --git a/pysat/tests/test_constellation.py b/pysat/tests/test_constellation.py index d64d929c6..5dda00d34 100644 --- a/pysat/tests/test_constellation.py +++ b/pysat/tests/test_constellation.py @@ -7,6 +7,7 @@ import datetime as dt import logging +import numpy as np import pandas as pds import pytest @@ -260,12 +261,18 @@ def setup_method(self): "bounds", "empty", "empty_partial", "index_res", "common_index", "date", "yr", "doy", "yesterday", "today", "tomorrow", "variables"] + self.inst_attrs = ['platform', 'name', 'tag', 'inst_id', 'clean_level', + 'pandas_format', "empty", "yr", 'pad', 'date', + 'doy', 'acknowledgements', 'references'] + self.dims = ['time', 'x', 'y', 'z', 'profile_height', 'latitude', + 'longitude', 'altitude'] return def teardown_method(self): """Clean up the unit test environment after each method.""" - del self.inst, self.const, self.ref_time, self.attrs + del self.inst, self.const, self.ref_time, self.attrs, self.dims + del self.inst_attrs return def test_has_required_attrs(self): @@ -334,8 +341,9 @@ def test_empty_flag_data_empty(self): def test_empty_flag_data_empty_partial_load(self): """Test the status of the empty flag for partially loaded data.""" - # Load only one instrument and test the status flag - self.const.instruments[0].load(date=self.ref_time, use_header=True) + self.const = pysat.Constellation( + const_module=constellations.testing_partial, use_header=True) + self.const.load(date=self.ref_time) assert self.const.empty_partial assert not self.const.empty return @@ -343,8 +351,9 @@ def test_empty_flag_data_empty_partial_load(self): def test_empty_flag_data_not_empty_partial_load(self): """Test the alt status of the empty flag for partially loaded data.""" - # Load only one instrument and test the status flag for alternate flag - self.const.instruments[0].load(date=self.ref_time, use_header=True) + self.const = pysat.Constellation( + const_module=constellations.testing_partial, use_header=True) + self.const.load(date=self.ref_time) assert not self.const._empty(all_inst=False) return @@ -429,3 +438,142 @@ def test_bad_call_inst_method(self): testing.eval_bad_input(self.const._call_inst_method, AttributeError, "unknown method", ['not a method']) return + + @pytest.mark.parametrize('common_coord', [True, False]) + @pytest.mark.parametrize('fill_method', [None, 'nearest', 'linear']) + def test_to_inst_xarray(self, common_coord, fill_method): + """Test conversion of Constellation of mixed type to xarray Instrument. + + Parameters + ---------- + common_coord : bool + For Constellations with any xarray.Dataset Instruments, True to + include locations where all coordinate arrays cover, False to use + the maximum location range from the list of coordinates + fill_method : str or NoneType + Fill method if common data coordinates do not match exactly. If + one of 'nearest', 'pad'/'ffill', 'backfill'/'bfill', or None then + no interpolation will occur. If 'linear', 'zero', 'slinear', + 'quadratic', 'cubic', or 'polynomial' are used, then 1D or ND + interpolation will be used. + + """ + + self.const.load(date=self.ref_time) + out_inst = self.const.to_inst(common_coord, fill_method) + + # Test the output instrument attributes + assert not out_inst.pandas_format + assert out_inst.platform == 'pysat' + assert out_inst.tag == '' + assert out_inst.inst_id == '' + assert out_inst.clean_level == 'clean' + assert out_inst.pad is None + assert out_inst.date == self.ref_time + assert out_inst.doy == int(self.ref_time.strftime('%j')) + assert np.all([out_inst.name.find(iname) >= 0 + for iname in self.const.names]) + + # Test the output instrument data + testing.assert_lists_equal(self.dims, list(out_inst.data.dims.keys())) + testing.assert_list_contains(self.dims, + list(out_inst.data.coords.keys())) + testing.assert_list_contains(['variable_profile_height', 'image_lon', + 'image_lat'], + list(out_inst.data.coords.keys())) + + for cinst in self.const.instruments: + for var in cinst.variables: + var_name = '_'.join([var, cinst.platform, cinst.name]) + assert (var in out_inst.variables + or var_name in out_inst.variables), \ + "missing variable: {:s} or {:s}".format(var, var_name) + assert (var == 'time' or var in out_inst.meta + or var_name in out_inst.meta), \ + "missing variable in metadata: {:s} or {:s}".format( + var, var_name) + + # Test the output instrument index + testing.assert_lists_equal(list(out_inst.index), list(self.const.index)) + + return + + def test_to_inst_pandas_w_pad(self): + """Test Constellation `to_inst` with single, padded pandas Instrument. + + """ + # Redefine the Instrument and constellation + self.inst = pysat.Instrument( + inst_module=pysat.instruments.pysat_testing, use_header=True, + pad=pds.DateOffset(hours=1), num_samples=10) + self.const = pysat.Constellation(instruments=[self.inst], + use_header=True) + + # Load the data + self.inst.load(date=self.ref_time) + self.const.load(date=self.ref_time) + + # Convert the Constellation into an Instrument equivalent to `self.inst` + out_inst = self.const.to_inst() + + # Test the output instrument attributes + assert out_inst.pandas_format + + for iattr in self.inst_attrs: + assert getattr(out_inst, iattr) == getattr(self.inst, iattr), \ + "Unexpected value for Instrument attribute {:}".format(iattr) + + # Test the output instrument data + testing.assert_lists_equal(self.inst.variables, out_inst.variables) + assert np.all(out_inst.data == self.inst.data) + + # Test the output instrument metadata + assert out_inst.meta == self.inst.meta + + # Test the output instrument index + testing.assert_lists_equal(list(out_inst.index), list(self.inst.index)) + + return + + def test_to_inst_mult_pad_clean(self): + """Test Constellation `to_inst` with multiple clean levels and pads.""" + # Redefine the Instrument and constellation + clean_level = 'dirty' + pad = pds.DateOffset(hours=1) + self.inst = [ + pysat.Instrument(inst_module=pysat.instruments.pysat_testing, + use_header=True, pad=pad, num_samples=10), + pysat.Instrument(inst_module=pysat.instruments.pysat_testing, + use_header=True, pad=2 * pad, + clean_level=clean_level, num_samples=10)] + self.const = pysat.Constellation(instruments=self.inst, use_header=True) + + # Load the Instrument and Constellation data + self.inst[-1].load(date=self.ref_time) + self.const.load(date=self.ref_time) + + # Convert the Constellation into an Instrument equivalent to `self.inst` + out_inst = self.const.to_inst() + + # Test the output instrument attributes + assert out_inst.pandas_format + + for iattr in self.inst_attrs: + assert getattr(out_inst, iattr) == getattr(self.inst[1], iattr), \ + "Unexpected value for Instrument attribute {:}".format(iattr) + + # Test the output instrument data and metadata + for var in self.inst[1].variables: + out_var = "_".join([var, self.inst[1].platform, self.inst[1].name]) + assert out_var in out_inst.variables, \ + "missing data variable: {:s}".format(out_var) + assert out_var in out_inst.meta, \ + "missing metadata variable: {:s}".format(out_var) + assert np.all(out_inst[out_var] == self.inst[1][var]), \ + "mismatched data for: {:s}".format(var) + + # Test the output instrument index + testing.assert_lists_equal(list(out_inst.index), + list(self.inst[1].index)) + + return diff --git a/pysat/tests/test_files.py b/pysat/tests/test_files.py index 164137c29..be2b38daa 100644 --- a/pysat/tests/test_files.py +++ b/pysat/tests/test_files.py @@ -596,12 +596,14 @@ def test_instrument_has_files(self): root_fname = ''.join(('pysat_testing_junk_{year:04d}_gold_{day:03d}_' 'stuff_{month:02d}_{hour:02d}_{minute:02d}_' '{second:02d}.pysat_testing_file')) - # create a bunch of files by year and doy + + # Create a group of files by year and doy start = dt.datetime(2007, 12, 31) stop = dt.datetime(2008, 1, 10) create_files(self.testInst, start, stop, freq='100min', use_doy=False, root_fname=root_fname) - # create the same range of dates + + # Create a DatetimeIndex with the same range of dates as the new files dates = pysat.utils.time.create_date_range(start, stop, freq='100min') pysat.instruments.pysat_testing.list_files = functools.partial( diff --git a/pysat/tests/test_instrument.py b/pysat/tests/test_instrument.py index 24bcf3a15..705d3eaa5 100644 --- a/pysat/tests/test_instrument.py +++ b/pysat/tests/test_instrument.py @@ -1,8 +1,10 @@ # -*- coding: utf-8 -*- """Tests the pysat Instrument object and methods.""" +import datetime as dt from importlib import reload import numpy as np +import pandas as pds import pytest import warnings import xarray as xr @@ -18,6 +20,7 @@ from pysat.tests.classes.cls_instrument_iteration import InstIterationTests from pysat.tests.classes.cls_instrument_property import InstPropertyTests from pysat.utils import testing +from pysat.utils.time import filter_datetime_input class TestBasics(InstAccessTests, InstIntegrationTests, InstIterationTests, @@ -64,6 +67,112 @@ def teardown_method(self): del self.testInst, self.out, self.ref_time, self.ref_doy return + def check_nonstandard_cadence(self): + """Check for nonstandard cadence in tests.""" + + if hasattr(self, 'freq'): + min_freq = pds.tseries.frequencies.to_offset('D') + return pds.tseries.frequencies.to_offset(self.freq) != min_freq + else: + # Uses standard frequency + return False + + +class TestInstCadence(TestBasics): + """Unit tests for pysat.Instrument objects with the default file cadance.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing) + self.ref_time = pysat.instruments.pysat_testing._test_dates[''][''] + self.freq = 'D' + + date_range = pds.date_range(self.ref_time - pds.DateOffset(years=1), + self.ref_time + pds.DateOffset(years=2) + - pds.DateOffset(days=1), freq=self.freq) + self.testInst = pysat.Instrument(platform='pysat', name='testing', + num_samples=10, + clean_level='clean', + update_files=True, + use_header=True, + file_date_range=date_range, + **self.testing_kwargs) + self.ref_doy = int(self.ref_time.strftime('%j')) + self.out = None + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.out, self.ref_time, self.ref_doy, self.freq + return + + +class TestInstMonthlyCadence(TestInstCadence): + """Unit tests for pysat.Instrument objects with a monthly file cadance.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing) + self.ref_time = pysat.instruments.pysat_testing._test_dates[''][''] + self.freq = 'MS' + + date_range = pds.date_range(self.ref_time - pds.DateOffset(years=1), + self.ref_time + + pds.DateOffset(years=2, days=-1), + freq=self.freq) + self.testInst = pysat.Instrument(platform='pysat', name='testing', + num_samples=10, + clean_level='clean', + update_files=True, + use_header=True, + file_date_range=date_range, + **self.testing_kwargs) + self.ref_doy = int(self.ref_time.strftime('%j')) + self.out = None + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.out, self.ref_time, self.ref_doy, self.freq + return + + +class TestInstYearlyCadence(TestInstCadence): + """Unit tests for pysat.Instrument objects with a monthly file cadance.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing) + self.ref_time = pysat.instruments.pysat_testing._test_dates[''][''] + self.freq = 'AS' + + # Since these are yearly files, use a longer date range + date_range = pds.date_range(self.ref_time - pds.DateOffset(years=1), + self.ref_time + + pds.DateOffset(years=5, days=-1), + freq=self.freq) + self.testInst = pysat.Instrument(platform='pysat', name='testing', + num_samples=10, + clean_level='clean', + update_files=True, + use_header=True, + file_date_range=date_range, + **self.testing_kwargs) + self.ref_doy = int(self.ref_time.strftime('%j')) + self.out = None + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.out, self.ref_time, self.ref_doy, self.freq + return + class TestBasicsInstModule(TestBasics): """Basic tests for instrument instantiated via inst_module.""" @@ -80,7 +189,7 @@ def setup_method(self): use_header=True, **self.testing_kwargs) self.ref_time = imod._test_dates[''][''] - self.ref_doy = 1 + self.ref_doy = int(self.ref_time.strftime('%j')) self.out = None return @@ -106,9 +215,9 @@ def setup_method(self): update_files=True, use_header=True, **self.testing_kwargs) - self.ref_time = \ - pysat.instruments.pysat_testing_xarray._test_dates[''][''] - self.ref_doy = 1 + self.ref_time = pysat.instruments.pysat_testing_xarray._test_dates[ + ''][''] + self.ref_doy = int(self.ref_time.strftime('%j')) self.out = None return @@ -134,7 +243,7 @@ def setup_method(self): use_header=True, **self.testing_kwargs) self.ref_time = pysat.instruments.pysat_testing2d._test_dates[''][''] - self.ref_doy = 1 + self.ref_doy = int(self.ref_time.strftime('%j')) self.out = None return @@ -165,9 +274,8 @@ def setup_method(self): update_files=True, use_header=True, **self.testing_kwargs) - self.ref_time = \ - pysat.instruments.pysat_ndtesting._test_dates[''][''] - self.ref_doy = 1 + self.ref_time = pysat.instruments.pysat_ndtesting._test_dates[''][''] + self.ref_doy = int(self.ref_time.strftime('%j')) self.out = None return @@ -177,6 +285,30 @@ def teardown_method(self): del self.testInst, self.out, self.ref_time, self.ref_doy return + def test_setting_data_as_tuple(self): + """Test setting data as a tuple.""" + + self.testInst.load(self.ref_time.year, self.ref_doy, use_header=True) + self.testInst['doubleMLT'] = ('time', 2. * self.testInst['mlt'].values) + assert np.all(self.testInst['doubleMLT'] == 2. * self.testInst['mlt']) + return + + def test_xarray_not_empty_notime(self): + """Test that xarray empty is False even if there is no time data.""" + # Load data and confirm it exists + self.testInst.load(date=self.ref_time) + assert not self.testInst.empty + + # Downselect to no time data + self.testInst.data = self.testInst[self.ref_time + dt.timedelta(days=1): + self.ref_time + dt.timedelta(days=2)] + assert not self.testInst.empty + assert len(self.testInst.index) == 0 + for dim in self.testInst.data.dims.keys(): + if dim != 'time': + assert len(self.testInst[dim]) > 0 + return + @pytest.mark.parametrize("index", [(0), ([0, 1, 2, 3]), (slice(0, 10)), @@ -277,6 +409,76 @@ def test_xarray_empty_conditions(self, data, target): self.testInst.data = data assert self.testInst.empty == target + return + + @pytest.mark.parametrize("val,warn_msg", + [([], "broadcast as NaN"), + (27., "Broadcast over epoch"), + (np.array([27.]), "Broadcast over epoch")]) + def test_set_xarray_single_value_warnings(self, val, warn_msg): + """Check for warning messages when setting xarray values. + + Parameters + ---------- + val : float or iterable + Value to be added as a new data variable. + warn_msg : str + Excerpt from expected warning message. + + """ + + warnings.simplefilter("always") + + self.testInst.load(date=self.ref_time, use_header=True) + + with warnings.catch_warnings(record=True) as self.war: + self.testInst["new_val"] = val + testing.eval_warnings(self.war, warn_msg, warn_type=UserWarning) + + def test_set_xarray_single_value_errors(self): + """Check for warning messages when setting xarray values. + + Parameters + ---------- + val : float or iterable + Value to be added as a new data variable. + warn_msg : str + Excerpt from expected warning message. + + """ + + self.testInst.load(date=self.ref_time, use_header=True) + self.testInst.data = self.testInst.data.assign_coords( + {'preset_val': np.array([1.0, 2.0])}) + + with pytest.raises(ValueError) as verr: + self.testInst['preset_val'] = 1.0 + + estr = 'Shape of input does not match' + assert str(verr).find(estr) > 0 + return + + @pytest.mark.parametrize("new_val", [3.0, np.array([3.0])]) + def test_set_xarray_single_value_broadcast(self, new_val): + """Check that single values are correctly broadcast. + + Parameters + ---------- + new_val : float or iterable + Should be a single value, potentially an array with one element. + + """ + + self.testInst.load(date=self.ref_time, use_header=True) + self.testInst.data = self.testInst.data.assign_coords( + {'preset_val': 1.0}) + + self.testInst['preset_val'] = new_val + self.testInst['new_val'] = new_val + # Existing coords should be not be broadcast + assert self.testInst['preset_val'].size == 1 + # New variables broadcast over time + assert len(self.testInst['new_val']) == len(self.testInst.index) class TestBasicsShiftedFileDates(TestBasics): @@ -295,7 +497,7 @@ def setup_method(self): use_header=True, **self.testing_kwargs) self.ref_time = pysat.instruments.pysat_testing._test_dates[''][''] - self.ref_doy = 1 + self.ref_doy = int(self.ref_time.strftime('%j')) self.out = None return @@ -411,6 +613,66 @@ def eval_warnings(self): testing.eval_warnings(self.war, self.warn_msgs) return + def test_instrument_labels(self): + """Test deprecation of `labels` kwarg in Instrument.""" + self.in_kwargs['labels'] = { + 'units': ('units', str), 'name': ('long_name', str), + 'notes': ('notes', str), 'desc': ('desc', str), + 'min_val': ('value_min', float), 'max_val': ('value_max', float), + 'fill_val': ('fill', float)} + + # Catch the warnings + with warnings.catch_warnings(record=True) as self.war: + tinst = pysat.Instrument(use_header=True, **self.in_kwargs) + + self.warn_msgs = np.array(["`labels` is deprecated, use `meta_kwargs`"]) + + # Evaluate the warning output + self.eval_warnings() + + # Evaluate the performance + assert float in tinst.meta.labels.label_type['fill_val'] + return + + @pytest.mark.parametrize('use_kwargs', [True, False]) + def test_instrument_meta_labels(self, use_kwargs): + """Test deprecation of `meta_labels` attribute in Instrument. + + Parameters + ---------- + use_kwargs : bool + If True, specify labels on input. If False, use defaults. + + """ + if use_kwargs: + self.in_kwargs['meta_kwargs'] = {'labels': { + 'units': ('units', str), 'name': ('long_name', str), + 'notes': ('notes', str), 'desc': ('desc', str), + 'min_val': ('value_min', float), + 'max_val': ('value_max', float), 'fill_val': ('fill', float)}} + + # Catch the warnings + with warnings.catch_warnings(record=True) as self.war: + tinst = pysat.Instrument(use_header=True, **self.in_kwargs) + labels = tinst.meta_labels + + self.warn_msgs = np.array(["Deprecated attribute, returns `meta_kwarg"]) + + # Evaluate the warning output + self.eval_warnings() + + # Evaluate the performance + if not use_kwargs: + self.in_kwargs['meta_kwargs'] = {'labels': { + 'units': ('units', str), 'name': ('long_name', str), + 'notes': ('notes', str), 'desc': ('desc', str), + 'min_val': ('value_min', (float, int)), + 'max_val': ('value_max', (float, int)), + 'fill_val': ('fill', (float, int, str))}} + + assert labels == self.in_kwargs['meta_kwargs']['labels'] + return + def test_generic_meta_translator(self): """Test deprecation of `generic_meta_translator`.""" diff --git a/pysat/tests/test_instrument_index.py b/pysat/tests/test_instrument_index.py index 9d633d459..ea41140a5 100644 --- a/pysat/tests/test_instrument_index.py +++ b/pysat/tests/test_instrument_index.py @@ -2,6 +2,8 @@ import datetime as dt from importlib import reload +import numpy as np +import warnings import pytest @@ -9,60 +11,131 @@ from pysat.utils import testing -class TestMalformedIndex(object): - """Unit tests for pandas `pysat.Instrument` with malformed index.""" +class TestIndex(object): + """Unit tests for pandas `pysat.Instrument` index checks.""" def setup_method(self): """Set up the unit test environment for each method.""" reload(pysat.instruments.pysat_testing) - self.testInst = pysat.Instrument(platform='pysat', name='testing', - num_samples=10, - clean_level='clean', - malformed_index=True, - update_files=True, - strict_time_flag=True, - use_header=True) - self.ref_time = dt.datetime(2009, 1, 1) - self.ref_doy = 1 + self.name = 'testing' + self.ref_time = pysat.instruments.pysat_testing._test_dates[''][''] return def teardown_method(self): """Clean up the unit test environment after each method.""" - del self.testInst, self.ref_time, self.ref_doy + del self.ref_time, self.name return - def test_ensure_unique_index(self): - """Ensure that if Instrument index not-unique error is raised.""" + @pytest.mark.parametrize("kwargs,msg", + [({'non_monotonic_index': True}, + 'Loaded data is not monotonic'), + ({'non_unique_index': True}, + 'Loaded data is not unique')]) + def test_index_error_messages(self, kwargs, msg): + """Ensure that a bad Instrument index will raise correct error. + + Parameters + ---------- + kwargs : dict + Keywords and arguments to pass through for instrument instantiation. + Kwargs should trigger an error message when used on a test + instrument. + msg : str + Excerpt of expected error message. + + """ + + test_inst = pysat.Instrument(platform='pysat', + name=self.name, + num_samples=10, + clean_level='clean', + update_files=True, + strict_time_flag=True, + use_header=True, + **kwargs) + year, doy = pysat.utils.time.getyrdoy(self.ref_time) + testing.eval_bad_input(test_inst.load, ValueError, msg, + input_args=[year, doy]) + return + + +class TestIndexXArray(TestIndex): + """Unit tests for xarray `pysat.Instrument` index checks.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" - testing.eval_bad_input(self.testInst.load, ValueError, - 'Loaded data is not unique.', - input_args=[self.ref_time.year, self.ref_doy]) + self.name = 'ndtesting' + self.ref_time = pysat.instruments.pysat_testing._test_dates[''][''] + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.ref_time, self.name return -class TestMalformedIndexXArray(TestMalformedIndex): - """Basic tests for xarray `pysat.Instrument` with shifted file dates.""" +class TestDeprecation(object): + """Unit test for deprecation warnings for index.""" def setup_method(self): """Set up the unit test environment for each method.""" - reload(pysat.instruments.pysat_testing_xarray) - self.testInst = pysat.Instrument(platform='pysat', - name='testing_xarray', - num_samples=10, - clean_level='clean', - malformed_index=True, - update_files=True, - strict_time_flag=True, - use_header=True) - self.ref_time = dt.datetime(2009, 1, 1) - self.ref_doy = 1 + warnings.simplefilter("always", DeprecationWarning) + self.ref_time = pysat.instruments.pysat_testing._test_dates[''][''] + self.warn_msgs = [] + self.war = "" return def teardown_method(self): """Clean up the unit test environment after each method.""" - del self.testInst, self.ref_time, self.ref_doy + del self.ref_time, self.warn_msgs, self.war + return + + def eval_warnings(self): + """Evaluate the number and message of the raised warnings.""" + + # Ensure the minimum number of warnings were raised. + assert len(self.war) >= len(self.warn_msgs) + + # Test the warning messages, ensuring each attribute is present. + testing.eval_warnings(self.war, self.warn_msgs) + return + + # TODO(#1094): Remove in pysat 3.2.0, potentially with class + @pytest.mark.parametrize('name', ['testing', 'ndtesting', 'testing_xarray', + 'testing2d']) + def test_kwarg_malformed_index(self, name): + """Test deprecation of `malformed_index` kwarg. + + Parameters + ---------- + name : str + name of instrument that uses the deprecated `malformed_index` kwarg. + + """ + + test_inst = pysat.Instrument(platform='pysat', + name=name, + strict_time_flag=False, + use_header=True, + malformed_index=True) + + # Catch the warnings + with warnings.catch_warnings(record=True) as self.war: + test_inst.load(date=self.ref_time) + + self.warn_msgs = np.array([" ".join(["The kwarg malformed_index has", + "been deprecated"])]) + + # Evaluate the warning output + self.eval_warnings() + + # Check that resulting index is both non-monotonic and non-unique + assert not test_inst.index.is_monotonic_increasing + assert not test_inst.index.is_unique return diff --git a/pysat/tests/test_instrument_padding.py b/pysat/tests/test_instrument_padding.py index 404c82167..89610277b 100644 --- a/pysat/tests/test_instrument_padding.py +++ b/pysat/tests/test_instrument_padding.py @@ -469,6 +469,31 @@ def test_data_padding_removal(self): return +class TestDataPaddingNonMonotonic(TestDataPadding): + """Unit tests for non-montonic pandas `pysat.Instrument` with data pad.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing) + self.testInst = pysat.Instrument(platform='pysat', name='testing', + clean_level='clean', + pad={'minutes': 5}, + non_monotonic_index=True, + update_files=True, + use_header=True) + self.ref_time = dt.datetime(2009, 1, 2) + self.ref_doy = 2 + self.delta = dt.timedelta(minutes=5) + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.ref_time, self.ref_doy, self.delta + return + + class TestDataPaddingXArray(TestDataPadding): """Unit tests for xarray `pysat.Instrument` with data padding.""" @@ -494,6 +519,32 @@ def teardown_method(self): return +class TestDataPaddingXArrayNonMonotonic(TestDataPadding): + """Unit tests for non-montonic xarray `pysat.Instrument` with data pad.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing_xarray) + self.testInst = pysat.Instrument(platform='pysat', + name='testing_xarray', + clean_level='clean', + pad={'minutes': 5}, + non_monotonic_index=True, + update_files=True, + use_header=True) + self.ref_time = dt.datetime(2009, 1, 2) + self.ref_doy = 2 + self.delta = dt.timedelta(minutes=5) + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.ref_time, self.ref_doy, self.delta + return + + class TestMultiFileRightDataPaddingBasics(TestDataPadding): """Unit tests for pandas `pysat.Instrument` with right offset data pad.""" @@ -520,6 +571,33 @@ def teardown_method(self): return +class TestMultiFileRightDataPaddingBasicsNonMonotonic(TestDataPadding): + """Tests for non-monotonic pandas `pysat.Instrument` with right data pad.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing) + self.testInst = pysat.Instrument(platform='pysat', name='testing', + clean_level='clean', + update_files=True, + sim_multi_file_right=True, + non_monotonic_index=True, + pad={'minutes': 5}, + use_header=True) + self.testInst.multi_file_day = True + self.ref_time = dt.datetime(2009, 1, 2) + self.ref_doy = 2 + self.delta = dt.timedelta(minutes=5) + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.ref_time, self.ref_doy, self.delta + return + + class TestMultiFileRightDataPaddingBasicsXarray(TestDataPadding): """Unit tests for xarray `pysat.Instrument` with right offset data pad.""" @@ -547,6 +625,34 @@ def teardown_method(self): return +class TestMultiFileRightDataPaddingBasicsXarrayNonMonotonic(TestDataPadding): + """Tests for non-monotonic xarray `pysat.Instrument` with right data pad.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing_xarray) + self.testInst = pysat.Instrument(platform='pysat', + name='testing_xarray', + clean_level='clean', + update_files=True, + sim_multi_file_right=True, + non_monotonic_index=True, + pad={'minutes': 5}, + use_header=True) + self.testInst.multi_file_day = True + self.ref_time = dt.datetime(2009, 1, 2) + self.ref_doy = 2 + self.delta = dt.timedelta(minutes=5) + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.ref_time, self.ref_doy, self.delta + return + + class TestMultiFileLeftDataPaddingBasics(TestDataPadding): """Unit tests for pandas `pysat.Instrument` with left offset data pad.""" @@ -574,6 +680,34 @@ def teardown_method(self): return +class TestMultiFileLeftDataPaddingBasicsNonMonotonic(TestDataPadding): + """Tests for non-monotonic pandas `pysat.Instrument` with left data pad.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing) + self.testInst = pysat.Instrument(platform='pysat', + name='testing', + clean_level='clean', + update_files=True, + sim_multi_file_left=True, + non_monotonic_index=True, + pad={'minutes': 5}, + use_header=True) + self.testInst.multi_file_day = True + self.ref_time = dt.datetime(2009, 1, 2) + self.ref_doy = 2 + self.delta = dt.timedelta(minutes=5) + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.ref_time, self.ref_doy, self.delta + return + + class TestMultiFileLeftDataPaddingBasicsXarray(TestDataPadding): """Unit tests for xarray `pysat.Instrument` with left offset data pad.""" @@ -599,3 +733,31 @@ def teardown_method(self): del self.testInst, self.ref_time, self.ref_doy, self.delta return + + +class TestMultiFileLeftDataPaddingBasicsXarrayNonMonotonic(TestDataPadding): + """Tests for non-monotonic xarray `pysat.Instrument` with left data pad.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + reload(pysat.instruments.pysat_testing_xarray) + self.testInst = pysat.Instrument(platform='pysat', + name='testing_xarray', + clean_level='clean', + update_files=True, + sim_multi_file_left=True, + non_monotonic_index=True, + pad={'minutes': 5}, + use_header=True) + self.testInst.multi_file_day = True + self.ref_time = dt.datetime(2009, 1, 2) + self.ref_doy = 2 + self.delta = dt.timedelta(minutes=5) + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.testInst, self.ref_time, self.ref_doy, self.delta + return diff --git a/pysat/tests/test_meta.py b/pysat/tests/test_meta.py index 0bd35e0ba..a4ec9aec8 100644 --- a/pysat/tests/test_meta.py +++ b/pysat/tests/test_meta.py @@ -33,9 +33,9 @@ def setup_method(self): 'name': ('Long_Name', str), 'desc': ('Desc', str), 'notes': ('Notes', str), - 'min_val': ('Minimum', np.float64), - 'max_val': ('Maximum', np.float64), - 'fill_val': ('Fill_Value', np.float64)} + 'min_val': ('Minimum', (float, int)), + 'max_val': ('Maximum', (float, int)), + 'fill_val': ('Fill_Value', (float, int, str))} self.dval = None self.default_name = ['long_name'] self.default_nan = ['fill', 'value_min', 'value_max'] @@ -79,8 +79,15 @@ class `mutable` attribute. (default=False) self.meta.mutable = self.mutable return - def eval_meta_settings(self): - """Test the Meta settings for a specified value.""" + def eval_meta_settings(self, isfloat=True): + """Test the Meta settings for a specified value. + + Parameters + ---------- + isfloat : bool + True if data type is float, False if it is int, str, or other + + """ # Test the Meta data for the data value, self.dval for lkey in self.default_name: assert self.meta[self.dval, lkey] == self.dval, \ @@ -381,7 +388,7 @@ def test_set_meta_with_wrong_type_drop(self, bad_val): assert 'Metadata with type' in str(war[0].message) assert 'Dropping input' in str(war[0].message) - # Check that meta is blank + # Check that meta is set to the expected default assert np.isnan(self.meta['fake_var']['value_max']) return @@ -404,8 +411,9 @@ def test_set_meta_with_wrong_type_cast(self, bad_val, caplog): # Test the warning captured = caplog.text - assert captured.find('Metadata with type') >= 0 - assert captured.find('Recasting input') >= 0 + estr = "missing expected message in: {:}".format(captured) + assert captured.find('Metadata with type') >= 0, estr + assert captured.find('Recasting input') >= 0, estr # Check that meta is set if hasattr(bad_val, "__iter__"): @@ -731,8 +739,8 @@ def test_meta_assignment(self, custom_attr, assign_type): self.dval = 'test_meta_dict_assignment' self.default_val = { getattr(self.meta.labels, mattr): ' '.join(['test', mattr]) - if self.meta.labels.label_type[mattr] == str else -47 - for mattr in self.meta.labels.label_type.keys()} + if str in pysat.utils.listify(self.meta.labels.label_type[mattr]) + else -47 for mattr in self.meta.labels.label_type.keys()} self.default_name = [] self.default_nan = [] @@ -764,9 +772,9 @@ def test_multiple_meta_assignment(self, custom_attr, assign_type): dvals = ['mult1', 'mult2'] default_vals = { getattr(self.meta.labels, mattr): [ - ' '.join(['test', mattr, self.dval]) - if self.meta.labels.label_type[mattr] == str else -47 - for self.dval in dvals] + ' '.join(['test', mattr, self.dval]) if str + in pysat.utils.listify(self.meta.labels.label_type[mattr]) + else -47 for self.dval in dvals] for mattr in self.meta.labels.label_type.keys()} self.default_name = [] self.default_nan = [] @@ -1015,7 +1023,7 @@ def test_inst_data_assign_meta(self, labels, vals): self.default_val[slabel] = vals[i] set_dict[slabel] = vals[i] - # Initialize the Meta data + # Initialize the Meta data using the new data type self.testInst[self.dval] = set_dict self.meta = self.testInst.meta @@ -1974,9 +1982,9 @@ def setup_method(self): 'name': ('Long_Name', str), 'desc': ('Desc', str), 'notes': ('Notes', str), - 'min_val': ('Minimum', np.float64), - 'max_val': ('Maximum', np.float64), - 'fill_val': ('Fill_Value', np.float64)} + 'min_val': ('Minimum', (float, int)), + 'max_val': ('Maximum', (float, int)), + 'fill_val': ('Fill_Value', (float, int, str))} self.dval = None self.default_name = ['long_name'] self.default_nan = ['fill', 'value_min', 'value_max'] diff --git a/pysat/tests/test_utils.py b/pysat/tests/test_utils.py index eaf5f0ecc..a6869bf4c 100644 --- a/pysat/tests/test_utils.py +++ b/pysat/tests/test_utils.py @@ -21,6 +21,116 @@ from pysat import utils +class TestUpdateFill(object): + """Tests for the core utility `update_fill_values`.""" + + def setup_method(self): + """Set up the test enviroment.""" + self.ref_time = pysat.instruments.pysat_testing._test_dates[''][''] + self.new_fill_val = -47.0 + return + + def teardown_method(self): + """Clean up the test environment.""" + del self.ref_time, self.new_fill_val + return + + @pytest.mark.parametrize("name", ["ndtesting", "testing", "testmodel"]) + @pytest.mark.parametrize("variables", [('mlt'), (['mlt'])]) + def test_update_fill_values_numbers(self, name, variables): + """Test `update_fill_values` for the desired behaviour. + + Parameters + ---------- + name : str + Instrument name + variables : str or list-like + Variables to update (should be int or float type) + + """ + + # Initalize the instrument + inst = pysat.Instrument('pysat', name, use_header=True) + inst.load(date=self.ref_time) + + # Ensure there are fill values to check + test_vars = pysat.utils.listify(variables) + for var in test_vars: + inst[var].values[0] = inst.meta[var, inst.meta.labels.fill_val] + + # Update the fill values + pysat.utils.update_fill_values(inst, variables, self.new_fill_val) + + # Ensure the fill values are updated + for var in test_vars: + assert inst.meta[var, + inst.meta.labels.fill_val] == self.new_fill_val, \ + "meta fill value not updated for {:}".format(var) + assert np.all(inst[var].values[0] == self.new_fill_val), \ + "filled data values not updated for {:}".format(var) + return + + @pytest.mark.parametrize("name", ["ndtesting", "testing", "testmodel"]) + def test_update_fill_values_by_type(self, name): + """Test `update_fill_values` for the desired behaviour. + + Parameters + ---------- + name : str + Instrument name + + """ + + # Initalize the instrument + inst = pysat.Instrument('pysat', name, use_header=True) + inst.load(date=self.ref_time) + + # Ensure there are fill values to check + str_vars = [var for var in inst.variables if var in inst.meta.keys() + and isinstance(inst[var].values[0], str) + and inst.meta[var, inst.meta.labels.fill_val] is not None] + num_types = [int, float, np.float64, np.int64] + if inst.pandas_format: + num_vars = [var for var in inst.variables if var in inst.meta.keys() + and inst[var].dtype.type in num_types + and inst.meta[var, inst.meta.labels.fill_val] + is not None] + else: + num_vars = [var for var in inst.variables if var in inst.meta.keys() + and var not in inst.data.coords.keys() + and inst[var].dtype.type in num_types + and inst.meta[var, inst.meta.labels.fill_val] + is not None] + + for var in num_vars: + inst[var].values[0] = inst.meta[var, inst.meta.labels.fill_val] + + for var in str_vars: + inst[var].values[0] = str(inst.meta[var, inst.meta.labels.fill_val]) + + # Update and check the numeric fill values + pysat.utils.update_fill_values(inst, num_vars, self.new_fill_val) + + for var in num_vars: + assert inst.meta[var, + inst.meta.labels.fill_val] == self.new_fill_val, \ + "meta fill value not updated for {:}".format(var) + assert np.all(inst[var].values[0] == self.new_fill_val), \ + "filled data values not updated for {:}".format(var) + + # Update and check the string fill values + self.new_fill_val = 'fill' + pysat.utils.update_fill_values(inst, str_vars, self.new_fill_val) + + for var in str_vars: + assert inst.meta[var, + inst.meta.labels.fill_val] == self.new_fill_val, \ + "meta fill value not updated for {:}".format(var) + assert np.all(inst[var].values[0] == self.new_fill_val), \ + "filled data values not updated for {:}".format(var) + return + + class TestCIonly(object): """Tests where we mess with local settings. @@ -633,6 +743,37 @@ def test_user_info_pass_through(self): assert ('user_info' not in inst.keys()) return + def test_list_kwargs_passthrough(self): + """Test that kwargs are passed through to lists correctly.""" + + # Iterate over unique instruments gathered + for inst in self.inst_list['download']: + # kwargs should not be passed to download + assert ('kwargs' not in inst.keys()) + + # Construct list of tests with optional load kwargs for this + # instrument. + list_kwargs = [] + for opt_inst in self.inst_list['load_options']: + if inst['inst_module'] == opt_inst['inst_module']: + if 'kwargs' in opt_inst.keys(): + list_kwargs.append(opt_inst['kwargs'].copy()) + + # Check if instrument has optional load kwargs + if hasattr(inst['inst_module'], '_test_load_opt'): + load_opt = getattr(inst['inst_module'], '_test_load_opt') + try: + load_opt = load_opt[inst['inst_id']][inst['tag']] + # Check that options specified in module match generated + # test list. + utils.testing.assert_lists_equal(list_kwargs, load_opt) + except KeyError: + # Optional load kwargs not defined for this tag / inst_id + # combination. + pass + + return + class TestDeprecation(object): """Unit test for deprecation warnings.""" diff --git a/pysat/tests/test_utils_coords.py b/pysat/tests/test_utils_coords.py index 5fac07cbd..bd08e9917 100644 --- a/pysat/tests/test_utils_coords.py +++ b/pysat/tests/test_utils_coords.py @@ -296,3 +296,257 @@ def test_single_lon_calc_solar_local_time(self): assert self.py_inst['slt'].min() >= 0.0 assert self.py_inst['slt'].shape == self.py_inst.index.shape return + + +class TestEstCommonCoord(object): + """Unit tests for the `establish_common_coord` function.""" + + def setup_method(self): + """Set up the unit test environment.""" + self.res = 1.0 + self.long_coord = np.arange(0, 360, self.res) + self.short_coord = np.arange(10, 350, 10.0 * self.res) + return + + def teardown_method(self): + """Clean up the unit test environment.""" + del self.long_coord, self.short_coord, self.res + return + + def test_establish_common_coord_overlap(self): + """Test `establish_common_coord` with common=True.""" + + out = coords.establish_common_coord([self.long_coord, self.short_coord]) + out_res = np.unique(out[1:] - out[:-1]) + + assert self.short_coord.min() == out.min(), "unexpected minimum value" + assert self.short_coord.max() == out.max(), "unexpected maximum value" + assert len(out_res) == 1, "inconsistend coordinate resolution" + assert out_res[0] == self.res, "unexpected coordinate resolution" + return + + def test_establish_common_coord_max_range(self): + """Test `establish_common_coord` with common=False.""" + + out = coords.establish_common_coord([self.short_coord, self.long_coord], + common=False) + out_res = np.unique(out[1:] - out[:-1]) + + assert self.long_coord.min() == out.min(), "unexpected minimum value" + assert self.long_coord.max() == out.max(), "unexpected maximum value" + assert len(out_res) == 1, "inconsistend coordinate resolution" + assert out_res[0] == self.res, "unexpected coordinate resolution" + return + + def test_establish_common_coord_single_val(self): + """Test `establish_common_coord` where one coordinate is a value.""" + + out = coords.establish_common_coord([self.short_coord[0], + self.long_coord], common=False) + out_res = np.unique(out[1:] - out[:-1]) + + assert self.long_coord.min() == out.min(), "unexpected minimum value" + assert self.long_coord.max() == out.max(), "unexpected maximum value" + assert len(out_res) == 1, "inconsistend coordinate resolution" + assert out_res[0] == self.res, "unexpected coordinate resolution" + return + + def test_establish_common_coord_single_val_only(self): + """Test `establish_common_coord` where tje coordinate is a value.""" + + out = coords.establish_common_coord([self.short_coord[0]]) + + assert self.short_coord[0] == out[0], "unexpected value" + assert len(out) == 1, "unexpected coordinate length" + return + + +class TestExpandXarrayDims(object): + """Unit tests for the `expand_xarray_dims` function.""" + + def setup_method(self): + """Set up the unit test environment.""" + self.test_inst = pysat.Instrument( + inst_module=pysat.instruments.pysat_ndtesting, use_header=True) + self.start_time = pysat.instruments.pysat_ndtesting._test_dates[''][''] + self.data_list = [] + self.out = None + self.meta = None + return + + def teardown_method(self): + """Clean up the unit test environment.""" + del self.test_inst, self.start_time, self.data_list, self.meta, self.out + return + + def set_data_meta(self, dims_equal): + """Set the input data list and meta data. + + Parameters + ---------- + dims_equal : bool + If True, the dimension variables for the data sets should be the + same; if False they should have different dimensions apart from + the 'time' dimension + + """ + + self.test_inst.load(date=self.start_time) + self.data_list.append(self.test_inst.data) + self.meta = self.test_inst.meta + + # The second data set should have half the time samples + num_samples = int(self.test_inst.index.shape[0] / 2) + + if dims_equal: + # Load a second data set with half the time samples + self.test_inst = pysat.Instrument( + inst_module=self.test_inst.inst_module, + num_samples=num_samples, use_header=True) + else: + # Load a second data set with different dimensions apart from time + self.test_inst = pysat.Instrument( + inst_module=pysat.instruments.pysat_testmodel, + num_samples=num_samples, use_header=True) + + self.test_inst.load(date=self.start_time + dt.timedelta(days=1)) + self.data_list.append(self.test_inst.data) + + return + + def eval_dims(self, dims_equal, exclude_dims=None, default_fill_val=None): + """Set the input data list and meta data. + + Parameters + ---------- + dims_equal : bool + If True, the dimension variables for the data sets should be the + same; if False they should have different dimensions apart from + the 'time' dimension + exclude_dims : list-like or NoneType + A list of dimensions that have the same name, but can have different + values or None if all the dimensions with the same name should + have the same shape. (default=None) + default_fill_val : any + The expected fill value for data variables not present in self.meta + (default=None) + + """ + if exclude_dims is None: + exclude_dims = [] + + # Define the reference Dataset + ref_dims = list(self.out[0].dims.keys()) + + # Cycle through the remaining Datasets + for i, xdata in enumerate(self.out[1:]): + test_dims = list(xdata.dims.keys()) + + # Test that the expected dimension names overlap between datasets + if dims_equal: + testing.assert_lists_equal(test_dims, ref_dims) + else: + for tdim in test_dims: + assert (tdim == 'time' if tdim in ref_dims else tdim + != 'time'), "unexpected dimension: {:}".format(tdim) + + # Test the dimensions shapes for expected (lack of) differences + for tdim in test_dims: + if tdim in ref_dims: + if tdim in exclude_dims: + assert xdata[tdim].shape != self.out[0][tdim].shape + else: + assert xdata[tdim].shape == self.out[0][tdim].shape + + if xdata[tdim].shape != self.data_list[ + i + 1][tdim].shape: + # This data set is smaller, test for fill values + for dvar in xdata.data_vars.keys(): + if tdim in xdata[dvar].dims: + if dvar in self.meta: + fill_val = self.meta[ + dvar, self.meta.labels.fill_val] + else: + fill_val = default_fill_val + + try: + if np.isnan(fill_val): + assert np.isnan( + xdata[dvar].values).any() + else: + assert np.any(xdata[dvar].values + == fill_val) + except TypeError: + # This is a string or object + estr = "".join([ + "Bad or missing fill values for ", + dvar, ": ({:} not in {:})".format( + fill_val, xdata[dvar].values)]) + if fill_val is None: + assert fill_val in xdata[ + dvar].values, estr + else: + assert np.any(xdata[dvar].values + == fill_val), estr + + return + + @pytest.mark.parametrize('dims_equal', [True, False]) + @pytest.mark.parametrize('exclude_dims', [None, ['time']]) + def test_expand_xarray_dims(self, dims_equal, exclude_dims): + """Test successful padding of dimensions for xarray data. + + Parameters + ---------- + dims_equal : bool + If True, the dimension variables for the data sets should be the + same; if False they should have different dimensions apart from + the 'time' dimension + exclude_dims : list-like or NoneType + A list of dimensions that have the same name, but can have different + values or None if all the dimensions with the same name should + have the same shape. (default=None) + + """ + + # Set the input parameters + self.set_data_meta(dims_equal) + + # Run the dimension expansion + self.out = coords.expand_xarray_dims(self.data_list, self.meta, + dims_equal=dims_equal, + exclude_dims=exclude_dims) + + # Test the results + self.eval_dims(dims_equal, exclude_dims) + + return + + @pytest.mark.parametrize('new_data_type', [int, float, str, bool, None]) + def test_missing_meta(self, new_data_type): + """Test success if variable is missing from meta. + + Parameters + ---------- + new_data_type : type + Data type for the new data that will be missing from `self.meta` + + """ + + # Set the input parameters + self.set_data_meta(True) + + # Add a data variable to one of the data sets + self.data_list[1]['new_variable'] = self.data_list[1]['mlt'].astype( + new_data_type) + + # Run the dimension expansion + self.out = coords.expand_xarray_dims(self.data_list, self.meta, + dims_equal=True) + + # Test the results + fill_val = self.meta.labels.default_values_from_type( + self.meta.labels.label_type['fill_val'], new_data_type) + self.eval_dims(True, default_fill_val=fill_val) + + return diff --git a/pysat/tests/test_utils_files.py b/pysat/tests/test_utils_files.py index 485425d68..87ba21683 100644 --- a/pysat/tests/test_utils_files.py +++ b/pysat/tests/test_utils_files.py @@ -374,18 +374,30 @@ def test_get_file_information(self): return - def test_check_and_make_path_exists(self): - """Test successful pass at creating existing directory.""" + @pytest.mark.parametrize("use_cwd", [True, False]) + def test_check_and_make_path_exists(self, use_cwd): + """Test successful pass at creating existing directory. - # Create a temporary directory - tempdir = tempfile.TemporaryDirectory() - assert os.path.isdir(tempdir.name) + Parameters + ---------- + use_cwd : bool + Use current working directory or a temporary directory + + """ + if use_cwd: + dir_name = "" + else: + # Create a temporary directory + tempdir = tempfile.TemporaryDirectory() + dir_name = tempdir.name + assert os.path.isdir(tempdir.name) # Assert check_and_make_path does not re-create the directory - assert not pysat.utils.files.check_and_make_path(tempdir.name) + assert not pysat.utils.files.check_and_make_path(dir_name) - # Clean up temporary directory - tempdir.cleanup() + if not use_cwd: + # Clean up temporary directory + tempdir.cleanup() return @pytest.mark.parametrize("trailer", [None, '', 'extra', diff --git a/pysat/tests/test_utils_io.py b/pysat/tests/test_utils_io.py index 7b53821c4..7ac9c1e11 100644 --- a/pysat/tests/test_utils_io.py +++ b/pysat/tests/test_utils_io.py @@ -190,6 +190,42 @@ def test_basic_write_and_read_netcdf_mixed_case_meta_format(self): return + @pytest.mark.parametrize("kwargs,target", + [({}, True), + ({'export_pysat_info': True}, True), + ({'export_pysat_info': False}, False)]) + def test_basic_write_and_read_netcdf_export_pysat_info(self, kwargs, + target): + """Test basic netCDF4 read/write with optional pysat info export. + + Parameters + ---------- + kwargs : dict + Specify value of `export_pysat_info`. An empty dict sets to + default value. + target : bool + True indicates that pysat info should be written to the file. + + """ + # Create a bunch of files by year and doy + outfile = os.path.join(self.tempdir.name, 'pysat_test_ncdf.nc') + self.testInst.load(date=self.stime, use_header=True) + + io.inst_to_netcdf(self.testInst, fname=outfile, preserve_meta_case=True, + epoch_name=default_epoch_name, **kwargs) + + tkwargs = decode_times_val(self.testInst.pandas_format) + + self.loaded_inst, meta = io.load_netcdf( + outfile, pandas_format=self.testInst.pandas_format, + epoch_name=default_epoch_name, **tkwargs) + + for key in ['platform', 'name', 'tag', 'inst_id', 'acknowledgements', + 'references']: + assert hasattr(meta.header, key) == target + + return + @pytest.mark.parametrize("add_path", [(''), ('unknown_dir')]) def test_inst_write_and_read_netcdf(self, add_path): """Test Instrument netCDF4 read/write, including non-existent paths. @@ -346,8 +382,9 @@ def test_read_netcdf4_bad_epoch_name(self, write_epoch, err_msg, err_type): @pytest.mark.parametrize("write_epoch,war_msg", [('epoch', 'is not a dimension.')]) + @pytest.mark.parametrize("strict_dim_check", [True, False]) def test_read_netcdf4_epoch_not_xarray_dimension(self, caplog, write_epoch, - war_msg): + war_msg, strict_dim_check): """Test netCDF4 load `epoch_name` not a dimension. Parameters @@ -356,6 +393,8 @@ def test_read_netcdf4_epoch_not_xarray_dimension(self, caplog, write_epoch, Label used for datetime data when writing file. war_msg : str Warning message to test for. + strict_dim_check : bool + If True, raises warning. If False, does not raise warning. """ @@ -374,10 +413,13 @@ def test_read_netcdf4_epoch_not_xarray_dimension(self, caplog, write_epoch, io.load_netcdf(outfile, epoch_name='slt', pandas_format=self.testInst.pandas_format, - **tkwargs) + strict_dim_check=strict_dim_check, **tkwargs) self.out = caplog.text - assert self.out.find(war_msg) + if strict_dim_check: + assert self.out.find(war_msg) >= 0 + else: + assert self.out.find(war_msg) < 0 return @pytest.mark.parametrize("wkwargs, lkwargs", [ @@ -897,7 +939,7 @@ def test_filter_netcdf4_metadata(self, remove, check_type, export_nan, if dvar.find('int8') >= 0: data_type = bool else: - data_type = type(self.testInst[dvar][0]) + data_type = type(self.testInst[dvar].values[0]) # Get the filtered output with warnings.catch_warnings(record=True) as war: @@ -949,12 +991,17 @@ def test_filter_netcdf4_metadata(self, remove, check_type, export_nan, assert mkey not in export_nan, \ "{:} should have been exported".format(repr(mkey)) else: - if mkey in export_nan and np.isnan(mdict[mkey]): + if(mkey in export_nan and not np.issubdtype(data_type, str) + and np.isnan(mdict[mkey])): assert np.isnan(fdict[mkey]) else: - assert fdict[mkey] == mdict[mkey], \ - "meta data {:} changed".format(repr(mkey)) - + if mkey in check_type and fdict[mkey] != mdict[mkey]: + assert fdict[mkey] == data_type(mdict[mkey]), \ + "unexpected recast meta data {:} value".format( + repr(mkey)) + else: + assert fdict[mkey] == mdict[mkey], \ + "meta data {:} changed".format(repr(mkey)) return @pytest.mark.parametrize('missing', [True, False]) @@ -1576,7 +1623,7 @@ def test_error_duplicated_trans_labels(self, meta_trans): # Apply translation testing.eval_bad_input(io.apply_table_translation_to_file, - ValueError, 'There is a duplicated', + ValueError, 'There are duplicated variable', input_args=(self.test_inst, self.meta_dict, meta_trans)) @@ -1608,9 +1655,13 @@ def test_from_file_table_translation_inconsistent(self, caplog): self.meta_dict) # Shift values of _FillValue but not FillVal + fkey = '_FillValue' for key in self.out.keys(): if '_FillValue' in self.out[key].keys(): - self.out[key]['_FillValue'] += 1 + if isinstance(self.out[key][fkey], str): + self.out[key][fkey] += 'shift' + else: + self.out[key]['_FillValue'] += 1 # Get default inverse translation from_trans = io.default_from_netcdf_translation_table( @@ -1839,3 +1890,68 @@ def teardown_method(self): del self.test_inst, self.test_date, self.out, self.meta_dict return + + +class TestIODeprecation(object): + """Unit tests for deprecation warnings in `utils.io`.""" + + def setup_method(self): + """Set up the test environment.""" + + # Create temporary directory + self.tempdir = tempfile.TemporaryDirectory() + self.saved_path = pysat.params['data_dirs'] + pysat.params['data_dirs'] = self.tempdir.name + + self.outfile = os.path.join(self.tempdir.name, 'pysat_test_ncdf.nc') + self.in_kwargs = {'labels': { + 'units': ('units', str), 'name': ('long_name', str), + 'notes': ('notes', str), 'desc': ('desc', str), + 'min_val': ('value_min', float), 'max_val': ('value_max', float), + 'fill_val': ('fill', float)}} + + return + + def teardown_method(self): + """Clean up the test environment.""" + + pysat.params['data_dirs'] = self.saved_path + + # Remove the temporary directory + self.tempdir.cleanup() + + # Clear the attributes + del self.tempdir, self.saved_path, self.outfile, self.in_kwargs + return + + @pytest.mark.parametrize("inst_name,load_func", [ + ("testing", io.load_netcdf_pandas), + ("ndtesting", io.load_netcdf_xarray)]) + def test_load_netcdf_labels(self, inst_name, load_func): + """Test deprecation of `labels` kwarg in different load functions. + + Parameters + ---------- + inst_name : str + Instrument name for test Instrument + load_func : function + NetCDF load method with deprecation warning + + """ + + # Create a test file + testInst = pysat.Instrument(platform='pysat', name=inst_name, + num_samples=100, update_files=True, + use_header=True) + testInst.load(date=testInst.inst_module._test_dates['']['']) + io.inst_to_netcdf(testInst, fname=self.outfile) + + # Catch the warnings + with warnings.catch_warnings(record=True) as war: + load_func(self.outfile, **self.in_kwargs) + + # Test the warnings + assert len(war) >= 1 + testing.eval_warnings(war, + ["`labels` is deprecated, use `meta_kwargs`"]) + return diff --git a/pysat/utils/__init__.py b/pysat/utils/__init__.py index 739d87036..9badbae4e 100644 --- a/pysat/utils/__init__.py +++ b/pysat/utils/__init__.py @@ -16,6 +16,7 @@ from pysat.utils._core import NetworkLock from pysat.utils._core import scale_units from pysat.utils._core import stringify +from pysat.utils._core import update_fill_values from pysat.utils import coords from pysat.utils import files from pysat.utils import io diff --git a/pysat/utils/_core.py b/pysat/utils/_core.py index a1fcf68f4..1241d34ef 100644 --- a/pysat/utils/_core.py +++ b/pysat/utils/_core.py @@ -152,7 +152,12 @@ def listify(iterable): """ # Cast as an array-like object - arr_iter = np.asarray(iterable) + try: + arr_iter = np.asarray(iterable) + except ValueError: + # This is necessary for Python 3.6 compatibility when using listify + # on slices + arr_iter = np.asarray([iterable]) # Treat output differently based on the array shape if arr_iter.shape == (): @@ -410,10 +415,11 @@ def generate_instrument_list(inst_loc, user_info=None): Dictionary with keys 'names', 'download', 'no_download' that contain lists with different information for each key: 'names' - list of platform_name combinations - 'download' - dict containing 'inst_module', 'tag', and 'inst_id' for - instruments with download routines - 'no_download' - dict containing 'inst_module', 'tag', and 'inst_id' for - instruments without download routines + 'download' - list of dicts containing 'inst_module', 'tag', and + 'inst_id' for instruments with download routines + 'load_options' - list of dicts containing load and download options + 'no_download' - list of dicts containing 'inst_module', 'tag', and + 'inst_id' for instruments without download routines Note ---- @@ -474,7 +480,7 @@ def generate_instrument_list(inst_loc, user_info=None): if not ci_skip: # Check if instrument is configured for download tests. if inst._test_download: - instrument_download.append(in_dict) + instrument_download.append(in_dict.copy()) if hasattr(module, '_test_load_opt'): # Add optional load tests try: @@ -482,7 +488,10 @@ def generate_instrument_list(inst_loc, user_info=None): kw_list = pysat.utils.listify(kw_list) for kwargs in kw_list: in_dict['kwargs'] = kwargs - instrument_optional_load.append(in_dict) + + # Append as copy so kwargs are unique. + instrument_optional_load.append( + in_dict.copy()) except KeyError: # Option does not exist for tag/inst_id # combo @@ -671,6 +680,55 @@ def display_available_instruments(inst_loc=None, show_inst_mod=None, return +def update_fill_values(inst, variables=None, new_fill_val=np.nan): + """Update Instrument data so that the fill value is consistent with Meta. + + Parameters + ---------- + inst : pysat.Instrument + Instrument object with data loaded + variables : str, list, or NoneType + List of variables to update or None to update all (default=None) + new_fill_val : any + New fill value to use (default=np.nan) + + Note + ---- + On Windows OS, this function may not work for data variables that are also + xarray coordinates. + + """ + + if not inst.empty: + # Get the variables (if needed) and ensure they are list-like + if variables is None: + variables = list(inst.variables) + else: + variables = listify(variables) + + for var in variables: + if var in inst.meta.keys(): + # Get the old fill value + old_fill_val = inst.meta[var, inst.meta.labels.fill_val] + + # Update the Meta data + inst.meta[var] = {inst.meta.labels.fill_val: new_fill_val} + + # Update the variable data + try: + if np.isnan(old_fill_val): + ifill = np.where(np.isnan(inst[var].values)) + else: + ifill = np.where(inst[var].values == old_fill_val) + except TypeError: + ifill = np.where(inst[var].values == old_fill_val) + + if len(ifill[0]) > 0: + inst[var].values[ifill] = new_fill_val + + return + + class NetworkLock(Lock): """Unit tests for NetworkLock manager.""" @@ -708,6 +766,7 @@ def __init__(self, *args, **kwargs): super(NetworkLock, self).__init__(timeout=timeout, *args, **kwargs) + return def release(self): """Release the Lock from the file system. @@ -728,3 +787,4 @@ def release(self): pass super(NetworkLock, self).release() + return diff --git a/pysat/utils/coords.py b/pysat/utils/coords.py index e9a162024..504dc3ff8 100644 --- a/pysat/utils/coords.py +++ b/pysat/utils/coords.py @@ -8,6 +8,7 @@ import datetime as dt import numpy as np import pandas as pds +import xarray as xr import pysat @@ -195,3 +196,173 @@ def calc_solar_local_time(inst, lon_name=None, slt_name='slt', inst.meta.labels.fill_val: fill_val} return + + +def establish_common_coord(coord_vals, common=True): + """Create a coordinate array that is appropriate for multiple data sets. + + Parameters + ---------- + coord_vals : list-like + A list of coordinate arrays of the same type: e.g., all geodetic + latitude in degrees + common : bool + True to include locations where all coordinate arrays cover, False to + use the maximum location range from the list of coordinates + (default=True) + + Returns + ------- + out_coord : array-like + An array appropriate for the list of coordinate values + + Note + ---- + Assumes that the supplied coordinates are distinct representations of + the same value in the same units and range (e.g., longitude in degrees + from 0-360). + + """ + + start_val = None + end_val = None + res = None + + for coord_spec in coord_vals: + # Ensure the coordinate specification is array-like + coord_spec = np.asarray(coord_spec) + if coord_spec.shape == (): + coord_spec = np.asarray([coord_spec]) + + if start_val is None: + # Initialize the start and stop values + start_val = coord_spec[0] + end_val = coord_spec[-1] + + # Determine the resolution + if start_val == end_val: + res = np.inf + else: + res = (coord_spec[1:] - coord_spec[:-1]).mean() + else: + # Adjust the start and stop time as appropriate + if common: + if start_val < coord_spec[0]: + start_val = coord_spec[0] + if end_val > coord_spec[-1]: + end_val = coord_spec[-1] + else: + if start_val > coord_spec[0]: + start_val = coord_spec[0] + if end_val < coord_spec[-1]: + end_val = coord_spec[-1] + + # Update the resolution + new_res = (coord_spec[1:] - coord_spec[:-1]).mean() + if new_res < res: + res = new_res + + # Construct the common index + npnts = int((end_val - start_val) / res) + 1 + out_coord = np.linspace(start_val, end_val, npnts) + + return out_coord + + +def expand_xarray_dims(data_list, meta, dims_equal=False, exclude_dims=None): + """Ensure that dimensions do not vary when concatenating data. + + Parameters + ---------- + data_list : list-like + List of xr.Dataset objects with the same dimensions and variables + meta : pysat.Meta + Metadata for the data in `data_list` + dims_equal : bool + Assert that all xr.Dataset objects have the same dimensions if True, + the Datasets in `data_list` may have differing dimensions if False. + (default=False) + exclude_dims : list-like or NoneType + Dimensions to exclude from evaluation or None (default=None) + + Returns + ------- + out_list : list-like + List of xr.Dataset objects with the same dimensions and variables, + and with dimensions that all have the same values and data padded when + needed. + + """ + # Get a list of the dimensions to exclude + if exclude_dims is None: + exclude_dims = [] + else: + exclude_dims = pysat.utils.listify(exclude_dims) + + # Get a list of all the dimensions + if dims_equal: + if len(data_list) > 0: + dims = [dim_key for dim_key in list(data_list[0].dims.keys()) + if dim_key not in exclude_dims] + else: + dims = [] + else: + dims = list() + for sdata in data_list: + if len(dims) == 0: + dims = [dim_key for dim_key in list(sdata.dims.keys()) + if dim_key not in exclude_dims] + else: + for dim in list(sdata.dims.keys()): + if dim not in dims and dim not in exclude_dims: + dims.append(dim) + + # After loading all the data, determine which dimensions may need to be + # expanded, as they could differ in dimensions from file to file + combo_dims = {dim: max([sdata.dims[dim] for sdata in data_list + if dim in sdata.dims]) for dim in dims} + + # Expand the data so that all dimensions are the same shape + out_list = list() + for i, sdata in enumerate(data_list): + # Determine which dimensions need to be updated + fix_dims = [dim for dim in sdata.dims.keys() if dim in combo_dims.keys() + and sdata.dims[dim] < combo_dims[dim]] + + new_data = {} + update_new = False + for dvar in sdata.data_vars.keys(): + # See if any dimensions need to be updated + update_dims = list(set(sdata[dvar].dims) & set(fix_dims)) + + # Save the old data as is, or pad it to have the right dims + if len(update_dims) > 0: + update_new = True + new_shape = list(sdata[dvar].values.shape) + old_slice = [slice(0, ns) for ns in new_shape] + + for dim in update_dims: + idim = list(sdata[dvar].dims).index(dim) + new_shape[idim] = combo_dims[dim] + + # Get the fill value + if dvar in meta: + # If available, take it from the metadata + fill_val = meta[dvar, meta.labels.fill_val] + else: + # Otherwise, use the data type + ftype = type(sdata[dvar].values.flatten()[0]) + fill_val = meta.labels.default_values_from_type( + meta.labels.label_type['fill_val'], ftype) + + # Set the new data for output + new_dat = np.full(shape=new_shape, fill_value=fill_val) + new_dat[tuple(old_slice)] = sdata[dvar].values + new_data[dvar] = (sdata[dvar].dims, new_dat) + else: + new_data[dvar] = sdata[dvar] + + # Get the updated dataset + out_list.append(xr.Dataset(new_data) if update_new else sdata) + + return out_list diff --git a/pysat/utils/files.py b/pysat/utils/files.py index 224f87f2c..ce0c18727 100644 --- a/pysat/utils/files.py +++ b/pysat/utils/files.py @@ -26,14 +26,14 @@ def process_parsed_filenames(stored, two_digit_year_break=None): Parameters ---------- stored : collections.orderedDict - Dict produced by `parse_fixed_width_filenames` or - `parse_delimited_filenames` + Ordered dictionary produced by `parse_fixed_width_filenames` or + `parse_delimited_filenames`, containing date, time, version, and + other information extracted from the filenames. two_digit_year_break : int or NoneType If filenames only store two digits for the year, then '1900' will be added for years >= two_digit_year_break and '2000' will be added for years < two_digit_year_break. - If None, then four-digit years are assumed. - (default=None) + If None, then four-digit years are assumed. (default=None) Returns ------- @@ -47,6 +47,11 @@ def process_parsed_filenames(stored, two_digit_year_break=None): only has one file per datetime. Version is required for this filtering, revision and cycle are optional. + See Also + -------- + pysat.utils.files.parse_fixed_width_filenames, + pysat.utils.files.parse_delimited_filenames + """ search_dict = construct_searchstring_from_format(stored['format_str']) @@ -122,13 +127,13 @@ def process_parsed_filenames(stored, two_digit_year_break=None): def parse_fixed_width_filenames(files, format_str): - """Parse list of files, extracting data identified by `format_str`. + """Extract specified info from a list of files with a fixed name width. Parameters ---------- files : list List of files, typically provided by - `files.search_local_system_formatted_filename`. + `pysat.utils.files.search_local_system_formatted_filename`. format_str : str Provides the naming pattern of the instrument files and the locations of date information so an ordered list may be produced. @@ -153,6 +158,10 @@ def parse_fixed_width_filenames(files, format_str): where to parse out information. Thus, support for the wildcard '*' is limited to locations before the first template variable. + See Also + -------- + pysat.utils.files.search_local_system_formatted_filename + """ # Create storage for data to be parsed from filenames @@ -223,7 +232,7 @@ def parse_fixed_width_filenames(files, format_str): def parse_delimited_filenames(files, format_str, delimiter): - """Parse list of files, extracting data identified by format_str. + """Extract specified info from a list of files using a delimiter. Will parse file using `delimiter` though the function does not require every parsed item to be a variable, and more than one variable @@ -237,7 +246,7 @@ def parse_delimited_filenames(files, format_str, delimiter): ---------- files : list List of files, typically provided by - `files.search_local_system_formatted_filename`. + `pysat.utils.files.search_local_system_formatted_filename`. format_str : str Provides the naming pattern of the instrument files and the locations of date information so an ordered list may be produced. @@ -265,6 +274,10 @@ def parse_delimited_filenames(files, format_str, delimiter): delimiter in between, unless the '*' occurs after the variables. The '*' should not be used to replace the delimited character in the filename. + See Also + -------- + pysat.utils.files.search_local_system_formatted_filename + """ # Create storage for data to be parsed from filenames @@ -537,7 +550,6 @@ def update_data_directory_structure(new_template, test_run=True, from pysat import Instrument # Get a list of supported instruments - # Best solved with an upcoming method in pull #633 insts = available_instruments() if test_run: @@ -739,11 +751,12 @@ def check_and_make_path(path, expand_path=False): Parameters ---------- path : str - Directory path without any file names. Creates all - necessary directories to complete the path. + String specifying a directory path without any file names. All + directories needed to create the full path will be created. expand_path : bool If True, input `path` will be processed through `os.path.expanduser` - and `os.path.expandvars`. + (accounting for `~` and `~user` constructs, if $HOME and user are known) + and `os.path.expandvars` (accounting for environment variables) Returns ------- @@ -753,21 +766,34 @@ def check_and_make_path(path, expand_path=False): Raises ------ ValueError - If input path and internally constructed path are not equal, or - if an invalid path supplied. + If an invalid path is supplied. + RuntimeError + If the input path and internally constructed paths differ. + + See Also + -------- + os.path.expanduser, os.path.expandvars """ if expand_path: - # Account for home references, multi-platform + # Account for home, user, and environment variables references path = os.path.expanduser(path) path = os.path.expandvars(path) - if not os.path.exists(path): - # Make path, checking to see that each level exists before attempting + if len(path) == 0: + # The user wants to write to the current directory + path = os.getcwd() + + # Set the output + made_dir = not os.path.exists(path) + + if made_dir: + # The directory(ies) need to be made, check to see whether or not each + # level exists before attempting to make each directory in the path root_path, local_dir = os.path.split(path) - # Check that we have a remotely valid path + # Check that we have a potentially valid path if len(root_path) == 0: raise ValueError('Invalid path specification.') @@ -792,7 +818,7 @@ def check_and_make_path(path, expand_path=False): while len(make_dir) > 0: local_dir = make_dir.pop() root_path = os.path.join(root_path, local_dir) - if (local_dir != '..') and (local_dir != '.'): + if local_dir != '..' and local_dir != '.': # Deal with case of path='... /path1/../final_path' or # path='... /path1/./final_path' os.mkdir(root_path) @@ -801,15 +827,13 @@ def check_and_make_path(path, expand_path=False): estr = ''.join(['Desired and constructed paths unexpectedly differ', '. Please post an issue at https://github.com/pysa', 't/pysat/issues']) - raise ValueError(estr) + raise RuntimeError(estr) - return True - else: - return False + return made_dir def get_file_information(paths, root_dir=''): - """Create a dict with values from `os.stat` attributes for input path(s). + """Retrieve system statistics for the input path(s). Parameters ---------- @@ -821,15 +845,17 @@ def get_file_information(paths, root_dir=''): Returns ------- file_info : dict - Keyed by file attribute. Each attribute maps to a list + Keyed by file attribute, which uses names that mirror or are expanded + upon those used by `os.stat`. Each attribute maps to a list of values for each file in `paths`. See Also -------- - os.stat : Get variety of file attributes + os.stat """ + # Ensure the input is a list paths = listify(paths) # Mapping of output key to the attribute name returned by `os.stat` diff --git a/pysat/utils/io.py b/pysat/utils/io.py index 8073b5faf..571b6a31d 100644 --- a/pysat/utils/io.py +++ b/pysat/utils/io.py @@ -51,9 +51,10 @@ def pysat_meta_to_xarray_attr(xr_data, pysat_meta, epoch_name): # Cycle through all the pysat MetaData labels and transfer for meta_key in pysat_meta[data_key].keys(): - # Assign attributes - xr_data[xarr_vars[i]].attrs[meta_key] = pysat_meta[ - data_key][meta_key] + # Assign attributes with values that are not None + if pysat_meta[data_key][meta_key] is not None: + xr_data[xarr_vars[i]].attrs[meta_key] = pysat_meta[ + data_key][meta_key] else: wstr = ''.join(['Did not find data for metadata variable ', @@ -64,9 +65,10 @@ def pysat_meta_to_xarray_attr(xr_data, pysat_meta, epoch_name): # MetaData labels and transfer. if epoch_name in pysat_meta.keys(): for meta_key in pysat_meta[epoch_name].keys(): - # Assign attributes - xr_data[epoch_name].attrs[meta_key] = pysat_meta[epoch_name][ - meta_key] + # Assign attributes that are not None + if pysat_meta[epoch_name][meta_key] is not None: + xr_data[epoch_name].attrs[meta_key] = pysat_meta[epoch_name][ + meta_key] return @@ -515,7 +517,7 @@ def apply_table_translation_to_file(inst, meta_dict, trans_table=None): inst : pysat.Instrument Instrument object with data to be written to file. meta_dict : dict - Output starting from `Instrument.meta.to_dict() `supplying attribute + Output starting from `Instrument.meta.to_dict()` supplying attribute data. trans_table : dict or NoneType Keyed by current metalabels containing a list of @@ -529,6 +531,11 @@ def apply_table_translation_to_file(inst, meta_dict, trans_table=None): export_dict : dict A dictionary of the metadata for each variable of an output file. + Raises + ------ + ValueError + If there is a duplicated variable label in the translation table + """ export_dict = {} @@ -537,13 +544,13 @@ def apply_table_translation_to_file(inst, meta_dict, trans_table=None): trans_table = default_to_netcdf_translation_table(inst) # Confirm there are no duplicated translation labels - trans_labels = [trans_table[key] for key in trans_table.keys()] - for i in np.arange(len(trans_labels)): - item = trans_labels.pop(0) - if item in trans_labels: - estr = ''.join(['There is a duplicated variable label value in ', - '`trans_table`: ', item]) - raise ValueError(estr) + trans_labels = list() + for key in trans_table.keys(): + trans_labels.extend(trans_table[key]) + + if np.unique(trans_labels).shape[0] != len(trans_labels): + raise ValueError(''.join(['There are duplicated variable label values', + ' in `trans_table`'])) # Translate each metadata label if a translation is provided for key in meta_dict.keys(): @@ -690,15 +697,10 @@ def meta_array_expander(meta_dict): def load_netcdf(fnames, strict_meta=False, file_format='NETCDF4', epoch_name=None, epoch_unit='ms', epoch_origin='unix', pandas_format=True, decode_timedelta=False, - labels={'units': ('units', str), 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'plot': ('plot_label', str), 'axis': ('axis', str), - 'scale': ('scale', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), - 'fill_val': ('fill', np.float64)}, + combine_by_coords=True, meta_kwargs=None, labels=None, meta_processor=None, meta_translation=None, - drop_meta_labels=None, decode_times=None): + drop_meta_labels=None, decode_times=None, + strict_dim_check=True): """Load netCDF-3/4 file produced by pysat. Parameters @@ -739,13 +741,17 @@ def load_netcdf(fnames, strict_meta=False, file_format='NETCDF4', Used for xarray data (`pandas_format` is False). If True, variables with unit attributes that are 'timelike' ('hours', 'minutes', etc) are converted to `np.timedelta64`. (default=False) - labels : dict + combine_by_coords : bool + Used for xarray data (`pandas_format` is False) when loading a + multi-file dataset. If True, uses `xarray.combine_by_coords`. If False, + uses `xarray.combine_nested`. (default=True) + meta_kwargs : dict or NoneType + Dict to specify custom Meta initialization or None to use Meta + defaults (default=None) + labels : dict or NoneType Dict where keys are the label attribute names and the values are tuples - that have the label values and value types in that order. - (default={'units': ('units', str), 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), 'fill_val': ('fill', np.float64)}) + that have the label values and value types in that order. None to use + meta defaults. Deprecated, use `meta_kwargs` instead. (default=None) meta_processor : function or NoneType If not None, a dict containing all of the loaded metadata will be passed to `meta_processor` which should return a filtered version @@ -768,6 +774,10 @@ def load_netcdf(fnames, strict_meta=False, file_format='NETCDF4', then `epoch_name` will be converted to datetime using `epoch_unit` and `epoch_origin`. If None, will be set to False for backwards compatibility. For xarray only. (default=None) + strict_dim_check : bool + Used for xarray data (`pandas_format` is False). If True, warn the user + that the desired epoch is not present in `xarray.dims`. If False, + no warning is raised. (default=True) Returns ------- @@ -800,7 +810,7 @@ def load_netcdf(fnames, strict_meta=False, file_format='NETCDF4', epoch_name=epoch_name, epoch_unit=epoch_unit, epoch_origin=epoch_origin, - labels=labels, + meta_kwargs=meta_kwargs, labels=labels, meta_processor=meta_processor, meta_translation=meta_translation, drop_meta_labels=drop_meta_labels) @@ -811,27 +821,21 @@ def load_netcdf(fnames, strict_meta=False, file_format='NETCDF4', epoch_unit=epoch_unit, epoch_origin=epoch_origin, decode_timedelta=decode_timedelta, - labels=labels, + combine_by_coords=combine_by_coords, + meta_kwargs=meta_kwargs, labels=labels, meta_processor=meta_processor, meta_translation=meta_translation, drop_meta_labels=drop_meta_labels, - decode_times=decode_times) + decode_times=decode_times, + strict_dim_check=strict_dim_check) return data, meta def load_netcdf_pandas(fnames, strict_meta=False, file_format='NETCDF4', epoch_name='Epoch', epoch_unit='ms', epoch_origin='unix', - labels={'units': ('units', str), - 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'plot': ('plot_label', str), - 'axis': ('axis', str), 'scale': ('scale', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), - 'fill_val': ('fill', np.float64)}, - meta_processor=None, meta_translation=None, - drop_meta_labels=None): + meta_kwargs=None, labels=None, meta_processor=None, + meta_translation=None, drop_meta_labels=None): """Load netCDF-3/4 file produced by pysat in a pandas format. Parameters @@ -863,13 +867,13 @@ def load_netcdf_pandas(fnames, strict_meta=False, file_format='NETCDF4', If ‘julian’, `epoch_unit` must be ‘D’, and origin is set to beginning of Julian Calendar. Julian day number 0 is assigned to the day starting at noon on January 1, 4713 BC. (default='unix') - labels : dict + meta_kwargs : dict or NoneType + Dict to specify custom Meta initialization or None to use Meta + defaults (default=None) + labels : dict or NoneType Dict where keys are the label attribute names and the values are tuples - that have the label values and value types in that order. - (default={'units': ('units', str), 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), 'fill_val': ('fill', np.float64)}) + that have the label values and value types in that order or None to use + Meta defaults. Deprecated, use `meta_kwargs` instead. (default=None) meta_processor : function or NoneType If not None, a dict containing all of the loaded metadata will be passed to `meta_processor` which should return a filtered version @@ -926,7 +930,18 @@ def load_netcdf_pandas(fnames, strict_meta=False, file_format='NETCDF4', running_store = [] two_d_keys = [] two_d_dims = [] - meta = pysat.Meta(labels=labels) + + if meta_kwargs is None: + meta_kwargs = {} + + if labels is not None: + warnings.warn("".join(["`labels` is deprecated, use `meta_kwargs`", + "with the 'labels' key instead. Support ", + "for `labels` will be removed in v3.2.0+"]), + DeprecationWarning, stacklevel=2) + meta_kwargs['labels'] = labels + + meta = pysat.Meta(**meta_kwargs) # Store all metadata in a dict that may be filtered before # assignment to `meta`. @@ -1173,7 +1188,7 @@ def load_netcdf_pandas(fnames, strict_meta=False, file_format='NETCDF4', for key in filt_mdict: if 'meta' in filt_mdict[key].keys(): # Higher order metadata - dim_meta = pysat.Meta(labels=labels) + dim_meta = pysat.Meta(**meta_kwargs) for skey in filt_mdict[key]['meta'].keys(): dim_meta[skey] = filt_mdict[key]['meta'][skey] @@ -1194,18 +1209,10 @@ def load_netcdf_pandas(fnames, strict_meta=False, file_format='NETCDF4', def load_netcdf_xarray(fnames, strict_meta=False, file_format='NETCDF4', epoch_name='time', epoch_unit='ms', epoch_origin='unix', - decode_timedelta=False, - labels={'units': ('units', str), - 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'plot': ('plot_label', str), - 'axis': ('axis', str), - 'scale': ('scale', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), - 'fill_val': ('fill', np.float64)}, - meta_processor=None, meta_translation=None, - drop_meta_labels=None, decode_times=False): + decode_timedelta=False, combine_by_coords=True, + meta_kwargs=None, labels=None, meta_processor=None, + meta_translation=None, drop_meta_labels=None, + decode_times=False, strict_dim_check=True): """Load netCDF-3/4 file produced by pysat into an xarray Dataset. Parameters @@ -1240,13 +1247,17 @@ def load_netcdf_xarray(fnames, strict_meta=False, file_format='NETCDF4', decode_timedelta : bool If True, variables with unit attributes that are 'timelike' ('hours', 'minutes', etc) are converted to `np.timedelta64`. (default=False) - labels : dict + combine_by_coords : bool + Used for xarray data (`pandas_format` is False) when loading a + multi-file dataset. If True, uses `xarray.combine_by_coords`. If False, + uses `xarray.combine_nested`. (default=True) + meta_kwargs : dict or NoneType + Dict to specify custom Meta initialization or None to use Meta + defaults (default=None) + labels : dict or NoneType Dict where keys are the label attribute names and the values are tuples - that have the label values and value types in that order. - (default={'units': ('units', str), 'name': ('long_name', str), - 'notes': ('notes', str), 'desc': ('desc', str), - 'min_val': ('value_min', np.float64), - 'max_val': ('value_max', np.float64), 'fill_val': ('fill', np.float64)}) + that have the label values and value types in that order or None to use + Meta defaults. Deprecated, use `meta_kwargs` instead. (default=None) meta_processor : function or NoneType If not None, a dict containing all of the loaded metadata will be passed to `meta_processor` which should return a filtered version @@ -1269,6 +1280,10 @@ def load_netcdf_xarray(fnames, strict_meta=False, file_format='NETCDF4', then `epoch_name` will be converted to datetime using `epoch_unit` and `epoch_origin`. If None, will be set to False for backwards compatibility. (default=None) + strict_dim_check : bool + Used for xarray data (`pandas_format` is False). If True, warn the user + that the desired epoch is not present in `xarray.dims`. If False, + no warning is raised. (default=True) Returns ------- @@ -1298,7 +1313,17 @@ def load_netcdf_xarray(fnames, strict_meta=False, file_format='NETCDF4', file_format = file_format.upper() # Initialize local variables - meta = pysat.Meta(labels=labels) + if meta_kwargs is None: + meta_kwargs = {} + + if labels is not None: + warnings.warn("".join(["`labels` is deprecated, use `meta_kwargs`", + "with the 'labels' key instead. Support ", + "for `labels` will be removed in v3.2.0+"]), + DeprecationWarning, stacklevel=2) + meta_kwargs['labels'] = labels + + meta = pysat.Meta(**meta_kwargs) # Store all metadata in a dict that may be filtered before # assignment to `meta`. @@ -1314,13 +1339,18 @@ def load_netcdf_xarray(fnames, strict_meta=False, file_format='NETCDF4', else: drop_meta_labels = pysat.utils.listify(drop_meta_labels) + if combine_by_coords: + combine_kw = {'combine': 'by_coords'} + else: + combine_kw = {'combine': 'nested', 'concat_dim': epoch_name} + # Load the data differently for single or multiple files if len(fnames) == 1: data = xr.open_dataset(fnames[0], decode_timedelta=decode_timedelta, decode_times=decode_times) else: data = xr.open_mfdataset(fnames, decode_timedelta=decode_timedelta, - combine='by_coords', decode_times=decode_times) + decode_times=decode_times, **combine_kw) # Need to get a list of all variables, dimensions, and coordinates. all_vars = xarray_all_vars(data) @@ -1332,9 +1362,10 @@ def load_netcdf_xarray(fnames, strict_meta=False, file_format='NETCDF4', data = data.rename({epoch_name: 'time'}) elif epoch_name in all_vars: data = data.rename({epoch_name: 'time'}) - wstr = ''.join(['Epoch label: "', epoch_name, '"', - ' is not a dimension.']) - pysat.logger.warning(wstr) + if strict_dim_check: + wstr = ''.join(['Epoch label: "', epoch_name, '"', + ' is not a dimension.']) + pysat.logger.warning(wstr) else: estr = ''.join(['Epoch label: "', epoch_name, '"', ' was not found in loaded dimensions [', @@ -1437,7 +1468,7 @@ def return_epoch_metadata(inst, epoch_name): basic_labels = [inst.meta.labels.units] for label in basic_labels: - if label not in new_dict or len(new_dict[label]) == 0: + if label not in new_dict or new_dict[label] == '': new_dict[label] = epoch_label # Assign name @@ -1530,8 +1561,8 @@ def xarray_all_vars(data): def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, mode='w', zlib=False, complevel=4, shuffle=True, preserve_meta_case=False, check_type=None, export_nan=None, - unlimited_time=True, meta_translation=None, - meta_processor=None): + export_pysat_info=True, unlimited_time=True, + meta_translation=None, meta_processor=None): """Store pysat data in a netCDF4 file. Parameters @@ -1580,6 +1611,9 @@ def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, included will be written to the file. If not listed and a value is NaN then that attribute simply won't be included in the netCDF4 file. (default=None) + export_pysat_info : bool + Appends the platform, name, tag, and inst_id to the metadata + if True. Otherwise these attributes are lost. (default=True) unlimited_time : bool Flag specifying whether or not the epoch/time dimension should be unlimited; it is when the flag is True. (default=True) @@ -1688,10 +1722,12 @@ def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, if 'Text_Supplement' not in attrb_dict: attrb_dict['Text_Supplement'] = '' + # TODO(#1122): Evaluate whether pop is necessary for all these. # Remove any attributes with the names below. pysat is responsible # for including them in the file. pysat_items = ['Date_End', 'Date_Start', 'File', 'File_Date', - 'Generation_Date', 'Logical_File_ID'] + 'Generation_Date', 'Logical_File_ID', 'acknowledgements', + 'references'] for pitem in pysat_items: if pitem in attrb_dict: pysat.logger.debug('Removing {} attribute and replacing.'.format( @@ -1699,12 +1735,15 @@ def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, attrb_dict.pop(pitem) # Set the general file information - attrb_dict['platform'] = inst.platform - attrb_dict['name'] = inst.name - attrb_dict['tag'] = inst.tag - attrb_dict['inst_id'] = inst.inst_id - attrb_dict['acknowledgements'] = inst.acknowledgements - attrb_dict['references'] = inst.references + if export_pysat_info: + # For operational instruments, these should be set separately. + attrb_dict['platform'] = inst.platform + attrb_dict['name'] = inst.name + attrb_dict['tag'] = inst.tag + attrb_dict['inst_id'] = inst.inst_id + attrb_dict['acknowledgements'] = inst.acknowledgements + attrb_dict['references'] = inst.references + attrb_dict['Date_End'] = dt.datetime.strftime( inst.index[-1], '%a, %d %b %Y, %Y-%m-%dT%H:%M:%S.%f') attrb_dict['Date_End'] = attrb_dict['Date_End'][:-3] + ' UTC' @@ -1756,12 +1795,9 @@ def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, if key not in meta_translation: meta_translation[key] = def_meta_trans[key] - # Get current metadata in dictionary form + # Get current metadata in dictionary form and add epoch metadata export_meta = inst.meta.to_dict() - - # Add in epoch metadata, not normally stored in meta. - epoch_meta = return_epoch_metadata(inst, epoch_name) - export_meta[epoch_name] = epoch_meta + export_meta[epoch_name] = return_epoch_metadata(inst, epoch_name) # Ensure the metadata is set and updated to netCDF4 standards export_meta = add_netcdf4_standards_to_metadict(inst, export_meta, @@ -1861,10 +1897,17 @@ def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, # Use info in coltype to get real datatype of object if coltype == str: + if '_FillValue' in export_meta[lower_key].keys(): + str_fill = export_meta[lower_key]['_FillValue'] + del export_meta[lower_key]['_FillValue'] + else: + str_fill = '' + cdfkey = out_data.createVariable(case_key, coltype, dimensions=epoch_name, complevel=complevel, - shuffle=shuffle) + shuffle=shuffle, + fill_value=str_fill) # Set metadata cdfkey.setncatts(export_meta[lower_key]) @@ -1992,9 +2035,8 @@ def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, # for all of that fancy data. # Get index information - idx = good_data_loc data, coltype, datetime_flag = inst._get_data_info( - inst[key].iloc[idx].index) + inst[key].iloc[good_data_loc].index) # Create dimension variable to store index in netCDF4 cdfkey = out_data.createVariable(case_key, coltype, @@ -2034,7 +2076,8 @@ def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, * 1.0E-6).astype(np.int64) # Update 'time' dimension to `epoch_name` - xr_data = xr_data.rename({'time': epoch_name}) + if epoch_name != 'time': + xr_data = xr_data.rename({'time': epoch_name}) # Transfer metadata pysat_meta_to_xarray_attr(xr_data, export_meta, epoch_name) @@ -2060,6 +2103,12 @@ def inst_to_netcdf(inst, fname, base_instrument=None, epoch_name=None, # Account for possible type for unicode strings if vtype == np.dtype('= 0, \ diff --git a/pysat/version.txt b/pysat/version.txt index 818bd47ab..fd2a01863 100644 --- a/pysat/version.txt +++ b/pysat/version.txt @@ -1 +1 @@ -3.0.6 +3.1.0 diff --git a/test_requirements.txt b/test_requirements.txt index 8ce80ec1a..823ea6406 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,12 +1,12 @@ coveralls<3.3 flake8 flake8-docstrings -hacking>=1.0 +hacking>=1.0,<6.0 ipython m2r2 numpydoc pysatSpaceWeather pytest-cov pytest-ordering -sphinx +sphinx<7.0 sphinx_rtd_theme