Skip to content

Commit

Permalink
Updates for numpy 2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
ghiggi committed Jun 26, 2024
1 parent a817363 commit c96822a
Show file tree
Hide file tree
Showing 16 changed files with 81 additions and 52 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest]
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
experimental: [false]
include:
- os: ubuntu-latest
python-version: "3.11"
python-version: "3.12"
experimental: true
- os: ubuntu-latest
python-version: "3.11"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests_windows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
fail-fast: false
matrix:
os: [windows-latest]
python-version: ["3.9", "3.10", "3.11"]
python-version: ["3.9", "3.10", "3.11", "3.12"]
experimental: [false]
steps:
- uses: actions/checkout@v4
Expand Down
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.5.0
ci:
- autofix_prs: false
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand Down
2 changes: 1 addition & 1 deletion docs/source/00_introduction.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ If you plan to use PR measurements, it's essential to consider the satellite orb
Following the boost, the swath coverage has widened to 220 km but the spatial resolution of PR footprints has increased from 4.3 km to 5 km
and the PR sensitivity has reduced by more than 1 dBZ.

Please note that PR routine operations has ended on October 7th, 2014, although PR data are still available till January 15th 2015
Please note that PR routine operations has ended on October 7th, 2014, although PR data are still available till January 15th, 2015
while TRMM descended to the decommissioning altitude of 335 km.


Expand Down
14 changes: 9 additions & 5 deletions gpm/tests/test_bucket/test_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,9 @@ def test_without_partitions(self, tmp_path):
partitions=None,
)
# Assert structure
assert os.listdir(tmp_path) == ["prefix_dask_partition_0_0.parquet", "prefix_dask_partition_1_0.parquet"]
assert sorted(os.listdir(tmp_path)) == sorted(
["prefix_dask_partition_0_0.parquet", "prefix_dask_partition_1_0.parquet"],
)
parquet_file = pq.ParquetFile(os.path.join(tmp_path, "prefix_dask_partition_1_0.parquet"))
assert parquet_file.metadata.row_group(0).num_rows == 26
assert parquet_file.metadata.num_rows == 26
Expand Down Expand Up @@ -346,10 +348,12 @@ def test_with_partitions(self, tmp_path):
row_group_size="100MB", # enforce computation of first dask partition
)
# Assert generated files
assert os.listdir(os.path.join(tmp_path, "0")) == [
"prefix_dask_partition_0_0.parquet",
"prefix_dask_partition_1_0.parquet",
]
assert os.listdir(os.path.join(tmp_path, "0")) == sorted(
[
"prefix_dask_partition_0_0.parquet",
"prefix_dask_partition_1_0.parquet",
],
)
# Assert can be read with dask using metadata
df = read_dask_partitioned_dataset(base_dir=tmp_path)
assert isinstance(df.compute(), pd.DataFrame)
Expand Down
2 changes: 1 addition & 1 deletion gpm/tests/test_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ def test_is_orbit(
n_y = 20
x = np.arange(n_x)
y = np.arange(n_y)
data = np.random.rand(n_x, n_y)
data = np.random.default_rng().random((n_x, n_y))
invalid_da = xr.DataArray(data, coords={"x": x, "y": y})
assert not is_orbit(invalid_da)

Expand Down
19 changes: 11 additions & 8 deletions gpm/tests/test_dataset/test_coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@

def get_random_datetime_array_and_dataset(n_values):
"""Return random datetimes as numpy array and xarrray dataset."""
timestamps = np.random.randint(0, MAX_TIMESTAMP, size=n_values)
rng = np.random.default_rng()
timestamps = rng.integers(0, MAX_TIMESTAMP, size=n_values)
datetimes = pd.to_datetime(timestamps, unit="s")
ds = xr.Dataset(
{
Expand All @@ -65,12 +66,14 @@ def get_random_datetime_array_and_dataset(n_values):
def test_get_orbit_coords():
"""Test get_orbit_coords."""
scan_mode = "S1"
granule_id = np.random.randint(0, 100000)
shape = (10, 3)
rng = np.random.default_rng()
granule_id = rng.integers(0, 100000)

# Create random datatree
lon = xr.DataArray(np.random.rand(*shape), dims=["along_track", "cross_track"])
lat = xr.DataArray(np.random.rand(*shape), dims=["along_track", "cross_track"])
rng = np.random.default_rng()
lon = xr.DataArray(rng.random(shape), dims=["along_track", "cross_track"])
lat = xr.DataArray(rng.random(shape), dims=["along_track", "cross_track"])
time_array, time_ds = get_random_datetime_array_and_dataset(shape[0])

dt = DataTree.from_dict({scan_mode: DataTree.from_dict({"ScanTime": time_ds})})
Expand Down Expand Up @@ -101,10 +104,10 @@ def test_get_grid_coords():
n_values = 10

# Create random datatree
# time = np.random.randint(0, MAX_TIMESTAMP)
lon = np.random.rand(n_values)
lat = np.random.rand(n_values)
timestamp = np.random.randint(0, MAX_TIMESTAMP)
rng = np.random.default_rng()
lon = rng.random(n_values)
lat = rng.random(n_values)
timestamp = rng.integers(0, MAX_TIMESTAMP)
time_formated = datetime.fromtimestamp(timestamp).strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"

ds = xr.Dataset()
Expand Down
10 changes: 6 additions & 4 deletions gpm/tests/test_dataset/test_granule.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,10 @@ def construct_dataset_and_check_validation(input_datetimes, expected_datetimes):

def get_sample_orbit_dataset():
# Return a default dataset
da = xr.DataArray(np.random.rand(1, 2, 2), dims=("other", "along_track", "cross_track"))
lon = xr.DataArray(np.random.rand(2, 2), dims=("cross_track", "along_track"))
lat = xr.DataArray(np.random.rand(2, 2), dims=("cross_track", "along_track"))
rng = np.random.default_rng()
da = xr.DataArray(rng.random((1, 2, 2)), dims=("other", "along_track", "cross_track"))
lon = xr.DataArray(rng.random((2, 2)), dims=("cross_track", "along_track"))
lat = xr.DataArray(rng.random((2, 2)), dims=("cross_track", "along_track"))
start_time = datetime(2018, 1, 1, 12, 30, 0)
end_time = datetime(2018, 1, 1, 12, 32, 0)
time = [start_time, end_time]
Expand All @@ -328,7 +329,8 @@ def get_sample_orbit_dataset():

def get_sample_grid_dataset():
# Return a default dataset
da = xr.DataArray(np.random.rand(1, 1, 1, 1), dims=("lat", "lon", "time", "other"))
rng = np.random.default_rng()
da = xr.DataArray(rng.random((1, 1, 1, 1)), dims=("lat", "lon", "time", "other"))
lon = xr.DataArray([1], dims=("lon"))
lat = xr.DataArray([2], dims=("lat"))
time = [0]
Expand Down
3 changes: 2 additions & 1 deletion gpm/tests/test_utils/test_manipulations.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,8 @@ def test_get_bright_band_mask() -> None:
class TestGetPhaseMask:
"""Test get_liquid_phase_mask and get_solid_phase_mask functions."""

height_zero_deg = np.random.randint(3, 6, size=(5, 6)) * 8
rng = np.random.default_rng()
height_zero_deg = rng.integers(3, 6, size=(5, 6)) * 8
da_height_zero_deg = xr.DataArray(height_zero_deg, dims=["cross_track", "along_track"])

@pytest.fixture()
Expand Down
19 changes: 13 additions & 6 deletions gpm/tests/test_utils/test_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,8 @@ class TestSubsetByTime:

@pytest.fixture()
def data_array(self) -> xr.DataArray:
return xr.DataArray(np.random.rand(len(self.time)), coords={"time": self.time})
rng = np.random.default_rng()
return xr.DataArray(rng.random(len(self.time)), coords={"time": self.time})

def test_no_subset(self, data_array: xr.DataArray) -> None:
returned_da = subset_by_time(data_array, start_time=None, end_time=None)
Expand Down Expand Up @@ -130,7 +131,9 @@ def test_no_dimension(self):
def test_wrong_time_dimension(self):
lat = np.arange(5)
lon = np.arange(5)
time = np.random.rand(len(lat), len(lon)) * 1e9
shape = len(lat), len(lon)
rng = np.random.default_rng()
time = rng.random(shape) * 1e9
time = np.array(time, dtype="datetime64[ns]")
da = xr.DataArray(time, coords=[("lat", lat), ("lon", lon)])
ds = xr.Dataset({"time": da})
Expand All @@ -156,7 +159,8 @@ def test_empty_subsets(
def test_subset_by_time_slice():
"""Test subset_by_time_slice."""
time = get_time_range(0, 23)
da = xr.DataArray(np.random.rand(len(time)), coords={"time": time})
rng = np.random.default_rng()
da = xr.DataArray(rng.random(len(time)), coords={"time": time})
start_time = datetime.datetime(2020, 12, 31, 6, 0, 0)
end_time = datetime.datetime(2020, 12, 31, 18, 0, 0)
time_slice = slice(start_time, end_time)
Expand Down Expand Up @@ -267,7 +271,8 @@ class TestEnsureTimeValidity:
expected_time = create_fake_datetime_array_from_hours_list([1, 2, 3, 4, 5, 6, 7])

def test_with_time_in_dims(self) -> None:
da = xr.DataArray(np.random.rand(len(self.time)), coords={"time": self.time})
rng = np.random.default_rng()
da = xr.DataArray(rng.random(len(self.time)), coords={"time": self.time})
returned_da = ensure_time_validity(da, limit=5)
np.testing.assert_equal(self.expected_time, returned_da["time"])

Expand All @@ -281,7 +286,8 @@ def test_without_time_in_dims(self) -> None:
def create_test_dataset():
"""Create a mock xarray.Dataset for testing."""
times = pd.date_range("2023-01-01", periods=10, freq="D")
data = np.random.rand(10, 2, 2) # Random data for the sake of example
rng = np.random.default_rng()
data = rng.random((10, 2, 2)) # Random data for the sake of example
return xr.Dataset({"my_data": (("time", "x", "y"), data)}, coords={"time": times})


Expand All @@ -304,7 +310,8 @@ def test_get_dataset_start_end_time():
def test_regularize_dataset():
# Create a sample Dataset
times = pd.date_range("2020-01-01", periods=4, freq="2min")
data = np.random.rand(4)
rng = np.random.default_rng()
data = rng.random(4)
ds = xr.Dataset({"data": ("time", data)}, coords={"time": times})

# Regularize the dataset
Expand Down
5 changes: 3 additions & 2 deletions gpm/tests/test_utils/test_utils_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,9 @@ def test_get_along_track_scan_distance() -> None:
lon = np.array([0, 45, 90])

# Stack values for cross track dimension
lat = np.stack((np.random.rand(3), lat, np.random.rand(3)))
lon = np.stack((np.random.rand(3), lon, np.random.rand(3)))
rng = np.random.default_rng()
lat = np.stack((rng.random(3), lat, rng.random(3)))
lon = np.stack((rng.random(3), lon, rng.random(3)))

# Create dataset
ds = xr.Dataset()
Expand Down
26 changes: 17 additions & 9 deletions gpm/tests/test_utils/test_xarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ def test_get_xarray_variable() -> None:

def test_get_dimensions_without() -> None:
"""Test get_dimensions_without function."""
dataarray_3d = xr.DataArray(np.random.rand(2, 2, 2), dims=["range", "cross_track", "along_track"])
rng = np.random.default_rng()
data = rng.random((2, 2, 2))
dataarray_3d = xr.DataArray(data, dims=["range", "cross_track", "along_track"])
# Test with list input
returned_dims = get_dimensions_without(dataarray_3d, ["cross_track", "range"])
expected_dims = ["along_track"]
Expand All @@ -137,10 +139,11 @@ def test_get_dimensions_without() -> None:
def test_ensure_dim_order_dataarray():
"""Test the ensure_dim_order_dataarray function."""
# Create a sample DataArray
data = np.random.rand(2, 3, 4)
rng = np.random.default_rng()
data = rng.random((2, 3, 4))
coords = {"time": [1, 2], "lat": [1, 2, 3], "lon": [10, 20, 30, 40]}
da = xr.DataArray(data, coords=coords, dims=("time", "lat", "lon"))
da = da.assign_coords({"height": (("lon", "lat"), np.random.rand(4, 3))}) # coord with different dim order
da = da.assign_coords({"height": (("lon", "lat"), rng.random((4, 3)))}) # coord with different dim order

# Test with dimensions which are removed
def remove_dimension(da):
Expand Down Expand Up @@ -170,10 +173,11 @@ def bad_function(da):
def test_ensure_dim_order_dataset():
"""Test the ensure_dim_order_dataset function."""
# Create sample DataArray
data = np.random.rand(2, 3, 4)
rng = np.random.default_rng()
data = rng.random((2, 3, 4))
coords = {"time": [1, 2], "lat": [1, 2, 3], "lon": [10, 20, 30, 40]}
da1 = xr.DataArray(data, coords=coords, dims=("time", "lat", "lon"))
da1 = da1.assign_coords({"height": (("lon", "lat"), np.random.rand(4, 3))})
da1 = da1.assign_coords({"height": (("lon", "lat"), rng.random((4, 3)))})
da2 = da1.copy().transpose("lon", "time", "lat")
ds = xr.Dataset({"var1": da1, "var2": da2})

Expand Down Expand Up @@ -207,7 +211,8 @@ def bad_function(da):
def test_xr_ensure_dimension_order():
"""Test the decorator xr_ensure_dimension_order."""
# Create a sample DataArray
data = np.random.rand(2, 3, 4)
rng = np.random.default_rng()
data = rng.random((2, 3, 4))
coords = {"time": [1, 2], "lat": [1, 2, 3], "lon": [10, 20, 30, 40]}
da = xr.DataArray(data, coords=coords, dims=("time", "lat", "lon"))

Expand All @@ -222,7 +227,8 @@ def custom_func(da):
def test_squeeze_unsqueeze_dataarray():
"""Test the squeeze_unsqueeze_dataarray function."""
# Create a sample DataArray
data = np.random.rand(3, 4, 1, 1)
rng = np.random.default_rng()
data = rng.random((3, 4, 1, 1))
coords = {
"lat": [1, 2, 3],
"lon": [10, 20, 30, 40],
Expand Down Expand Up @@ -263,7 +269,8 @@ def bad_function(da):
def test_squeeze_unsqueeze_dataset():
"""Test the squeeze_unsqueeze_dataset function."""
# Create a sample DataArray
data = np.random.rand(3, 4, 1, 1)
rng = np.random.default_rng()
data = rng.random((3, 4, 1, 1))
coords = {
"lat": [1, 2, 3],
"lon": [10, 20, 30, 40],
Expand Down Expand Up @@ -302,7 +309,8 @@ def bad_function(ds):
def test_xr_squeeze_unsqueeze():
"""Test the decorator xr_squeeze_unsqueeze."""
# Create a sample DataArray
data = np.random.rand(3, 4, 1, 1)
rng = np.random.default_rng()
data = rng.random((3, 4, 1, 1))
coords = {
"lat": [1, 2, 3],
"lon": [10, 20, 30, 40],
Expand Down
6 changes: 4 additions & 2 deletions gpm/tests/test_visualization/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,8 @@ def orbit_labels_dataarray(
orbit_dataarray: xr.DataArray,
) -> xr.DataArray:
"""Create an orbit data array with label coordinates."""
labels = np.random.randint(0, 10, orbit_dataarray.shape)
rng = np.random.default_rng(seed=0)
labels = rng.integers(0, 10, size=orbit_dataarray.shape)
return orbit_dataarray.assign_coords(
{self.label_name: (("cross_track", "along_track"), labels)},
)
Expand All @@ -770,7 +771,8 @@ def grid_labels_dataarray(
grid_dataarray: xr.DataArray,
) -> xr.DataArray:
"""Create a grid data array with label coordinates."""
labels = np.random.randint(0, 10, grid_dataarray.shape)
rng = np.random.default_rng(seed=0)
labels = rng.integers(0, 10, size=grid_dataarray.shape)
return grid_dataarray.assign_coords({self.label_name: (("lat", "lon"), labels)})

def test_orbit(
Expand Down
4 changes: 2 additions & 2 deletions gpm/tests/test_visualization/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,6 @@ def expand_dims(
) -> xr.DataArray:
"""Expand dimensions of a dataarray and fill with random data."""
dataarray = dataarray.expand_dims(dim={dim: size}, axis=axis)
np.random.seed(0)
dataarray.data = np.random.rand(*dataarray.data.shape)
rng = np.random.default_rng(seed=0)
dataarray.data = rng.random(dataarray.data.shape)
return dataarray
8 changes: 4 additions & 4 deletions gpm/tests/utils/fake_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,8 +80,8 @@ def get_orbit_dataarray(
n_range=0,
) -> xr.DataArray:
"""Create orbit data array on geodesic band."""
np.random.seed(0)
data = np.random.rand(n_cross_track, n_along_track)
rng = np.random.default_rng(seed=0)
data = rng.random((n_cross_track, n_along_track))
granule_id = np.zeros(n_along_track)
cross_track_id = np.arange(0, n_cross_track)
along_track_id = np.arange(0, n_along_track)
Expand Down Expand Up @@ -127,10 +127,10 @@ def get_grid_dataarray(
n_lat: int,
) -> xr.DataArray:
"""Create grid data array."""
np.random.seed(0)
lon = np.linspace(start_lon, end_lon, n_lon)
lat = np.linspace(start_lat, end_lat, n_lat)
data = np.random.rand(n_lat, n_lon)
rng = np.random.default_rng(seed=0)
data = rng.random((n_lat, n_lon))

# Create data array
return xr.DataArray(data, coords={"lat": lat, "lon": lon})
7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -180,14 +180,13 @@ select = [
# flake8-comprehensions
"C4",
# Ruff custom rules
"RUF"

"RUF",
# NumPy-specific rules (for 2.0)
"NPY",
#---------------------
#### Future rules ####
# flake8-use-pathlib
# "PTH",
# NumPy-specific rules (for 2.0)
# "NPY",
# refurb
# "FURB", # require preview

Expand Down

0 comments on commit c96822a

Please sign in to comment.