Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
emfdavid committed Nov 22, 2023
1 parent 0b0de06 commit 66ba4f8
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 11 deletions.
27 changes: 18 additions & 9 deletions kerchunk/grib2.py
Original file line number Diff line number Diff line change
Expand Up @@ -367,14 +367,23 @@ def grib_tree(
remote_options=None,
) -> dict:
"""
Intended to consume scan grib results for a single file
:param message_groups:
:param concat_dims:
:param identical_dims:
:param remote_options:
:return:
Create a single zarr hierarchy from a grib file with multiple levels. ZarrToMutliZarr aggregation requires
picking filters for a particular set of GRIB levels. This method allows reading an entire grib file into
a zarr hiearchy determined by the levels contained in the grib file.
Create a zarr store group hierarchy from a collection of grib message decoded as zarr store like
references
Each grib message group is expected to have one data variable and several coordinate variables.
Data variables are expected to have attributes "GRIB_stepType" and "GRIB_typeOfLevel" which are used to
define the nested groups.
Grib message variable names that decode as "unknown" are dropped
Grib typeOfLevel attributes that decode as unknown are treated as a single group
:param message_groups: a collection of zarr store like dictionaries as produced by scan_grib
:param concat_dims: dimensions to concatenate
:param identical_dims: dimensions known to be identical
:param remote_options: remote options to pass to ZarrToMultiZarr
:return: A new zarr store like dictionary for use as a reference filesystem mapper
"""

# Why can't we import this at the top of the module?
from kerchunk.combine import MultiZarrToZarr

Expand All @@ -392,7 +401,7 @@ def grib_tree(
for msg_ind, group in enumerate(message_groups):
if "version" not in result:
result["version"] = group["version"]
# result["templates"] = group["templates"]
# Drop the templates

gattrs = ujson.loads(group["refs"][".zattrs"])
coordinates = gattrs["coordinates"].split(" ")
Expand Down Expand Up @@ -444,7 +453,7 @@ def grib_tree(
# Add an attribute to give context
zgroup.attrs[key] = value

# add to the list of groups to multizarr
# add to the list of groups to multi-zarr
aggregations[zgroup.path].append(group)

# keep track of the dimensions and their values
Expand Down
16 changes: 14 additions & 2 deletions kerchunk/tests/test_grib.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import numpy as np
import pytest
import xarray as xr

from kerchunk.grib2 import scan_grib, _split_file, GribToZarr
import zarr
from kerchunk.grib2 import scan_grib, _split_file, GribToZarr, grib_tree

cfgrib = pytest.importorskip("cfgrib")
here = os.path.dirname(__file__)
Expand Down Expand Up @@ -83,3 +83,15 @@ def test_subhourly():
fpath = os.path.join(here, "hrrr.wrfsubhf.sample.grib2")
result = scan_grib(fpath)
assert len(result) == 2, "Expected two grib messages"


def test_grib_tree():
fpath = os.path.join(here, "hrrr.wrfsubhf.sample.grib2")
scanned_msg_groups = scan_grib(fpath)
result = grib_tree(
scanned_msg_groups, concat_dims=["valid_time"], identical_dims=["time"]
)
fs = fsspec.filesystem("reference", fo=result)
zg = zarr.open_group(fs.get_mapper(""))
isinstance(zg["instant/atmosphere/refc"], zarr.Array)
isinstance(zg["avg/surface/vbdsf"], zarr.Array)

0 comments on commit 66ba4f8

Please sign in to comment.