Skip to content

Commit

Permalink
Add dataset tests
Browse files Browse the repository at this point in the history
  • Loading branch information
sphamba authored Sep 19, 2023
1 parent e39dc20 commit 2fc2e2b
Show file tree
Hide file tree
Showing 19 changed files with 981 additions and 45 deletions.
15 changes: 15 additions & 0 deletions .coveragerc
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[run]
source =
gpm_api
omit =
*dev*
*docs*
*tutorials*
gpm_api/tests/*
gpm_api/cli/*

[report]
exclude_lines =
pragma: no cover
def __repr__
def __str__
4 changes: 2 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ on: [push, pull_request, workflow_dispatch]

jobs:
tests:
name: ${{ matrix.os }}, Python ${{ matrix.python-version }}${{ matrix.experimental && ', latest deps' || '' }}
name: ${{ matrix.os }}, Python ${{ matrix.python-version }}${{ matrix.experimental && ', latest dependencies' || '' }}
runs-on: ${{ matrix.os }}
defaults:
run:
Expand Down Expand Up @@ -47,7 +47,7 @@ jobs:
- name: Test with pytest
run: |
coverage run -m pytest
pytest
- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v3
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

# Folders
/dask-worker-space/
gpm_api/tests/dataset/assets

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
48 changes: 26 additions & 22 deletions gpm_api/dataset/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,12 +80,6 @@ def get_coords(dt, scan_mode):
return coords


def _set_attrs_dict(ds, attrs_dict):
"""Set dataset attributes for each attrs_dict key."""
for var in attrs_dict:
ds[var].attrs.update(attrs_dict[var])


def _subset_dict_by_dataset(ds, dictionary):
"""Select the relevant dictionary key for a given dataset."""
# Get dataset coords and variables
Expand Down Expand Up @@ -121,35 +115,45 @@ def get_coords_attrs_dict(ds):
"coverage_content_type": "coordinate",
}

attrs_dict["gpm_granule_id"] = {}
attrs_dict["gpm_granule_id"]["long_name"] = "GPM Granule ID"
attrs_dict["gpm_granule_id"]["description"] = "ID number of the GPM Granule"
attrs_dict["gpm_granule_id"]["coverage_content_type"] = "auxiliaryInformation"
attrs_dict["gpm_granule_id"] = {
"long_name": "GPM Granule ID",
"description": "ID number of the GPM Granule",
"coverage_content_type": "auxiliaryInformation",
}

# Define general attributes for time coordinates
attrs_dict["time"] = {"standard_name": "time", "coverage_content_type": "coordinate"}

# Add description of GPM ORBIT coordinates
attrs_dict["gpm_cross_track_id"] = {}
attrs_dict["gpm_cross_track_id"]["long_name"] = "Cross-Track ID"
attrs_dict["gpm_cross_track_id"]["description"] = "Cross-Track ID."
attrs_dict["gpm_cross_track_id"]["coverage_content_type"] = "auxiliaryInformation"
attrs_dict["gpm_cross_track_id"] = {
"long_name": "Cross-Track ID",
"description": "Cross-Track ID.",
"coverage_content_type": "auxiliaryInformation",
}

attrs_dict["gpm_along_track_id"] = {}
attrs_dict["gpm_along_track_id"]["long_name"] = "Along-Track ID"
attrs_dict["gpm_along_track_id"]["description"] = "Along-Track ID."
attrs_dict["gpm_along_track_id"]["coverage_content_type"] = "auxiliaryInformation"
attrs_dict["gpm_along_track_id"] = {
"long_name": "Along-Track ID",
"description": "Along-Track ID.",
"coverage_content_type": "auxiliaryInformation",
}

attrs_dict["gpm_id"] = {}
attrs_dict["gpm_id"]["long_name"] = "Scan ID"
attrs_dict["gpm_id"]["description"] = "Scan ID. Format: '{gpm_granule_id}-{gpm_along_track_id}'"
attrs_dict["gpm_id"]["coverage_content_type"] = "auxiliaryInformation"
attrs_dict["gpm_id"] = {
"long_name": "Scan ID",
"description": "Scan ID. Format: '{gpm_granule_id}-{gpm_along_track_id}'",
"coverage_content_type": "auxiliaryInformation",
}

# Select required attributes
attrs_dict = _subset_dict_by_dataset(ds, attrs_dict)
return attrs_dict


def _set_attrs_dict(ds, attrs_dict):
"""Set dataset attributes for each attrs_dict key."""
for var in attrs_dict:
ds[var].attrs.update(attrs_dict[var])


def set_coords_attrs(ds):
"""Set dataset coordinate attributes."""
# Get attributes dictionary
Expand Down
4 changes: 2 additions & 2 deletions gpm_api/dataset/granule.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def _prefix_dataset_group_variables(ds, group):


def _remove_dummy_variables(ds):
"""Return dummy variables from HDF dataset group."""
"""Remove dummy variables from HDF dataset group."""
dummy_variables = [
"Latitude",
"Longitude",
Expand Down Expand Up @@ -143,7 +143,7 @@ def get_variables_dims(ds):
variables = get_variables(ds)
if len(variables) == 0:
return []
dims = np.unique(np.concatenate([list(ds[var].dims) for var in variables]))
dims = np.unique(np.concatenate([list(ds[var].dims) for var in variables])).tolist()
return dims


Expand Down
8 changes: 7 additions & 1 deletion gpm_api/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import posixpath as pxp
import ntpath as ntp
import gpm_api.configs
import os
from unittest.mock import patch


Expand All @@ -19,7 +20,12 @@ def mock_configuration():
mocked_configuration = {
"gpm_username": "testuser",
"gpm_password": "testuser",
"gpm_base_dir": "data",
"gpm_base_dir": os.path.join(
os.getcwd(),
"gpm_api",
"tests",
"resources",
),
}

with patch.object(
Expand Down
89 changes: 89 additions & 0 deletions gpm_api/tests/dataset/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import numpy as np
import pytest
import h5py
import os
from gpm_api import open_dataset
from gpm_api.dataset.granule import _open_granule

import xarray as xr


class SaneEqualityArray(np.ndarray):
"""Wrapper class for numpy array allowing deep equality tests on objects containing numpy arrays.
From https://stackoverflow.com/a/14276901
"""

def __new__(cls, array):
"""Create a new SaneEqualityArray from array only (instead of shape + type + array)."""

if isinstance(array, list): # No need to wrap regular lists
return array

return np.asarray(array).view(cls)

def __eq__(self, other):
# Only use equal_nan for floats dtypes
equal_nan = np.issubdtype(self.dtype, np.floating)
return (
isinstance(other, np.ndarray)
and self.shape == other.shape
and np.array_equal(self, other, equal_nan=equal_nan)
)


@pytest.fixture()
def sample_dataset() -> xr.Dataset:
"""Return a sample dataset to use for testing
Dataset is a 2A DPR file from 2022-07-05 that has been generated with
test_granule_creation.py to maintain structure but remove data
"""

# os.path.join(
# os.getcwd(),
# "gpm_api",
# "tests",
# "resources",
# "GPM",
# "RS",
# "V07",
# "PMW",
# "1C-MHS-METOPB",
# "2020",
# "08",
# "01",
# "1C.METOPB.MHS.XCAL2016-V.20200801-S102909-E121030.040841.V07A.HDF5",
# )
# )

# ds = open_dataset(
# start_time="2022-07-01T10:29:09",
# end_time="2022-08-01T12:10:30",
# product="1C-MHS-METOPB",
# )

ds = _open_granule(
os.path.join(
os.getcwd(),
"gpm_api",
"tests",
"resources",
"GPM",
"RS",
"V07",
"PMW",
"1C-MHS-METOPB",
"2020",
"08",
"01",
"1C.METOPB.MHS.XCAL2016-V.20200801-S102909-E121030.040841.V07A.HDF5",
)
)

return ds


def pytest_configure():
pytest.SaneEqualityArray = SaneEqualityArray
Empty file.
34 changes: 34 additions & 0 deletions gpm_api/tests/dataset/decoding/test_attrs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from gpm_api.dataset.decoding import attrs as at
import pytest
import numpy as np


def test_convert_string_to_number() -> None:
"""Test that a string is converted to a number"""

assert at.convert_string_to_number("1") == 1
assert at.convert_string_to_number("1.0") == 1.0
assert at.convert_string_to_number("1.0e-3") == 1.0e-3
assert at.convert_string_to_number("1.0e3") == 1.0e3
assert at.convert_string_to_number("1.0e+3") == 1.0e3
assert at.convert_string_to_number("1.0e+03") == 1.0e3
assert at.convert_string_to_number("-999") == -999

with pytest.raises(ValueError):
assert at.convert_string_to_number("notanumber")


def test_ensure_dtype_name() -> None:
"""Test that a dtype is returned as a string name"""

# Test with dtype
assert at.ensure_dtype_name(np.dtype("float32")) == "float32"
assert at.ensure_dtype_name(np.dtype("int32")) == "int32"
assert at.ensure_dtype_name(np.dtype("uint8")) == "uint8"

# Test normal string
assert at.ensure_dtype_name("notadtype") == "notadtype"

# Test not a dtype
with pytest.raises(TypeError):
assert at.ensure_dtype_name(np.dtype("float31"))
38 changes: 38 additions & 0 deletions gpm_api/tests/dataset/decoding/test_coordinates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import pytest
from typing import List
from gpm_api.dataset.decoding import coordinates as co


def test_ensure_valid_coords():
# TODO: Requires sample ds to work
pass


def test_get_pmw_frequency_dict() -> None:
"""Test that a dictionary is returned"""

res = co.get_pmw_frequency_dict()

assert isinstance(res, dict), "Dictionary not returned"


def test_get_pmw_frequency_corra(
products: List[str],
) -> None:
# Try products hardcoded in function
res = co.get_pmw_frequency_corra("2B-GPM-CORRA")
assert len(res) > 0
assert res == co.get_pmw_frequency("GMI", scan_mode="S1") + co.get_pmw_frequency(
"GMI", scan_mode="S2"
)

res = co.get_pmw_frequency_corra("2B-TRMM-CORRA")
assert len(res) > 0
assert res == co.get_pmw_frequency("TMI", scan_mode="S1")

# Test other non-corra products fail
for product in products:
if "corra" not in product.lower():
with pytest.raises(UnboundLocalError):
res = co.get_pmw_frequency_corra(product)
assert len(res) == 0
11 changes: 11 additions & 0 deletions gpm_api/tests/dataset/generate_test_hdf5_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os


assets_dir_path = "assets"

# Change current working directory to the directory of this script
os.chdir(os.path.dirname(os.path.abspath(__file__)))

# Create the assets directory
if not os.path.exists(assets_dir_path):
os.makedirs(assets_dir_path)
Loading

0 comments on commit 2fc2e2b

Please sign in to comment.