Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added icon to nwp providers #72

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion ocf_data_sampler/load/nwp/nwp.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from ocf_data_sampler.load.nwp.providers.ukv import open_ukv
from ocf_data_sampler.load.nwp.providers.ecmwf import open_ifs

from ocf_data_sampler.load.nwp.providers.icon import open_icon_eu

def open_nwp(zarr_path: Path | str | list[Path] | list[str], provider: str) -> xr.DataArray:
"""Opens NWP Zarr
Expand All @@ -17,6 +17,8 @@ def open_nwp(zarr_path: Path | str | list[Path] | list[str], provider: str) -> x
_open_nwp = open_ukv
elif provider.lower() == "ecmwf":
_open_nwp = open_ifs
elif provider.lower() == "icon-eu":
_open_nwp = open_icon_eu
else:
raise ValueError(f"Unknown provider: {provider}")
return _open_nwp(zarr_path)
Expand Down
101 changes: 101 additions & 0 deletions ocf_data_sampler/load/nwp/providers/icon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""DWD ICON Loading"""

import pandas as pd
import xarray as xr
import fsspec

from ocf_data_sampler.load.nwp.providers.utils import open_zarr_paths

def transform_to_channels(nwp : xr.Dataset):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Am I right in thinking that the input here is an xarray Dataset which has multiple data variables for each NWP variable and we want to go from that to a DataArray (e.g. one data variable but an extra channel dimension?)

I think a simpler approach might be to do something like what is done here https://github.com/openclimatefix/ocf_datapipes/blob/main/ocf_datapipes/load/nwp/providers/gfs.py#L26 where we use to_array() on the Dataset to convert it to a DataArray and then rename the variable dimension which is created with to_array() to channel

But I may have misunderstood the intention/need for this function

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are perfectly right, I deleted this and use to_array() instead, thx for pointing it out!

"""
Adds a channel dimension to the NWP data

Args:
nwp: NWP data without channel dimension

Returns:
NWP data with channel dimension
"""

channel_data = []
channel_names = []

for var_name in nwp.data_vars:
data_array = nwp[var_name]
expanded_data = data_array.expand_dims(dim={"channel": [var_name]})

channel_data.append(expanded_data)
channel_names.append(var_name)

nwp_channels = xr.concat(channel_data, dim="channel")
nwp_channels["channel"] = channel_names

return nwp_channels

def remove_isobaric_lelvels_from_coords(nwp: xr.Dataset) -> xr.Dataset:
"""
Removes the isobaric levels from the coordinates of the NWP data

Args:
nwp: NWP data

Returns:
NWP data without isobaric levels in the coordinates
"""
variables_to_drop = [var for var in nwp.data_vars if 'isobaricInhPa' in nwp[var].dims]
return nwp.drop_vars(["isobaricInhPa"] + variables_to_drop)

def open_icon_eu(zarr_path) -> xr.Dataset:
"""
Opens the ICON data

ICON EU Data is on a regular lat/lon grid
It has data on multiple pressure levels, as well as the surface
Each of the variables is its own data variable

Args:
zarr_path: Path to the zarr to open

Returns:
Xarray DataArray of the NWP data
"""
# Open the data
nwp = open_zarr_paths(zarr_path, time_dim="time")
nwp = nwp.rename({"time": "init_time_utc"})
# Sanity checks.
time = pd.DatetimeIndex(nwp.init_time_utc)
assert time.is_unique
assert time.is_monotonic_increasing
nwp = nwp.isel(step=slice(0, 48))
nwp = remove_isobaric_lelvels_from_coords(nwp)
nwp = transform_to_channels(nwp)
print("loaded icon eu data with shape", nwp.shape)
return nwp.transpose('init_time_utc', 'step', 'channel', 'latitude', 'longitude')


def open_icon_global(zarr_path) -> xr.Dataset:
"""
Opens the ICON data

ICON Global Data is on an isohedral grid, so the points are not regularly spaced
It has data on multiple pressure levels, as well as the surface
Each of the variables is its own data variable

Args:
zarr_path: Path to the zarr to open

Returns:
Xarray DataArray of the NWP data
"""
# Open the data
nwp = open_zarr_paths(zarr_path, time_dim="time")
nwp = nwp.rename({"time": "init_time_utc"})
# ICON Global archive script didn't define the values to be
# associated with lat/lon so fixed here
nwp.coords["latitude"] = (("values",), nwp.latitude.values)
nwp.coords["longitude"] = (("values",), nwp.longitude.values)
# Sanity checks.
time = pd.DatetimeIndex(nwp.init_time_utc)
assert time.is_unique
assert time.is_monotonic_increasing
return nwp
5 changes: 5 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
def test_config_filename():
return f"{_top_test_directory}/test_data/configs/test_config.yaml"

@pytest.fixture()
def icon_eu_zarr_path():
icon_sample_data_dir = f"{_top_test_directory}/test_data/icon"
return [os.path.join(icon_sample_data_dir, file) for file in os.listdir(icon_sample_data_dir) if file.endswith(".zarr.zip")]


@pytest.fixture(scope="session")
def config_filename():
Expand Down
8 changes: 8 additions & 0 deletions tests/load/test_load_nwp.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,12 @@ def test_load_ecmwf(nwp_ecmwf_zarr_path):
assert isinstance(da, DataArray)
assert da.dims == ("init_time_utc", "step", "channel", "longitude", "latitude")
assert da.shape == (24 * 7, 15, 3, 15, 12)
assert np.issubdtype(da.dtype, np.number)


def test_load_icon_eu(icon_eu_zarr_path):
da = open_nwp(zarr_path=icon_eu_zarr_path, provider="icon-eu")
assert isinstance(da, DataArray)
assert da.dims == ("init_time_utc", "step", "channel", "latitude", "longitude")
assert da.shape == (2, 48, 2, 100, 100)
assert np.issubdtype(da.dtype, np.number)
Binary file added tests/test_data/icon/20211101_00.zarr.zip
Binary file not shown.
Binary file added tests/test_data/icon/20211101_06.zarr.zip
Binary file not shown.