diff --git a/CHANGELOG.md b/CHANGELOG.md index 8769af4f..f42b76c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * Added support for appending a dataset of references. @mavaylon1 [#203](https://github.com/hdmf-dev/hdmf-zarr/pull/203) * NWBZarrIO load_namespaces=True by default. @mavaylon1 [#204](https://github.com/hdmf-dev/hdmf-zarr/pull/204) * Added test for opening file with consolidated metadata from DANDI. @mavaylon1 [#206](https://github.com/hdmf-dev/hdmf-zarr/pull/206) +* Add dimension labels compatible with xarray. @mavaylon1 [#207](https://github.com/hdmf-dev/hdmf-zarr/pull/207) ## 0.8.0 (June 4, 2024) ### Bug Fixes diff --git a/pyproject.toml b/pyproject.toml index 7c029a6e..c52a5100 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,9 +29,9 @@ classifiers = [ "Topic :: Scientific/Engineering :: Medical Science Apps." ] dependencies = [ - 'hdmf>=3.14.2', + 'hdmf>=3.14.3', 'zarr>=2.11.0, <3.0', # pin below 3.0 until HDMF-zarr supports zarr 3.0 - 'numpy>=1.24, <2.0', # pin below 2.0 until HDMF supports numpy 2.0 + 'numpy>=1.24, <2.0', # pin below 2.0 until HDMF-zarr supports numpy 2.0 'numcodecs>=0.9.1', 'pynwb>=2.5.0', 'threadpoolctl>=3.1.0', diff --git a/src/hdmf_zarr/backend.py b/src/hdmf_zarr/backend.py index 0aa33e29..a70e2e45 100644 --- a/src/hdmf_zarr/backend.py +++ b/src/hdmf_zarr/backend.py @@ -962,6 +962,9 @@ def write_dataset(self, **kwargs): # noqa: C901 else: options['io_settings'] = {} + if builder.dimension_labels is not None: + builder.attributes['_ARRAY_DIMENSIONS'] = builder.dimension_labels + attributes = builder.attributes options['dtype'] = builder.dtype diff --git a/tests/unit/test_zarrio.py b/tests/unit/test_zarrio.py index 174effb2..533557cd 100644 --- a/tests/unit/test_zarrio.py +++ b/tests/unit/test_zarrio.py @@ -19,6 +19,8 @@ from tests.unit.utils import (Baz, BazData, BazBucket, get_baz_buildmanager) import zarr from hdmf_zarr.backend import ZarrIO +from .utils import BuildDatasetShapeMixin, BarData, BarDataHolder +from hdmf.spec import DatasetSpec import os import shutil import warnings @@ -186,6 +188,44 @@ def test_force_open_without_consolidated_fails(self): self.fail("ZarrIO.__open_file_consolidated raised an unexpected ValueError: {}".format(e)) +class TestDimensionLabels(BuildDatasetShapeMixin): + """ + This is to test setting the dimension_labels as a zarr attribute '_ARRAY_DIMENSIONS'. + + Workflow: + i) We need to define a `get_dataset_inc_spec` to set the dim in the spec (via BuildDatasetShapeMixin) + ii) Create and write a BarDataHolder with a BarData. + iii) Read and check that the _ARRAY_DIMENSIONS attribute is set. + """ + def tearDown(self): + shutil.rmtree(self.store) + + def get_base_shape_dims(self): + return [None, None], ['a', 'b'] + + def get_dataset_inc_spec(self): + dataset_inc_spec = DatasetSpec( + doc='A BarData', + data_type_inc='BarData', + quantity='*', + ) + return dataset_inc_spec + + def test_build(self): + bar_data_inst = BarData(name='my_bar', data=[[1, 2, 3], [4, 5, 6]], attr1='a string') + bar_data_holder_inst = BarDataHolder( + name='my_bar_holder', + bar_datas=[bar_data_inst], + ) + + with ZarrIO(self.store, manager=self.manager, mode='w') as io: + io.write(bar_data_holder_inst) + + with ZarrIO(self.store, manager=self.manager, mode='r') as io: + file = io.read() + self.assertEqual(file.bar_datas[0].data.attrs['_ARRAY_DIMENSIONS'], ['a', 'b']) + + class TestDatasetofReferences(ZarrStoreTestCase): def setUp(self): self.store_path = "test_io.zarr" diff --git a/tests/unit/utils.py b/tests/unit/utils.py index ebc1f732..de343acd 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -1,14 +1,16 @@ import os import tempfile from copy import copy, deepcopy +from abc import ABCMeta, abstractmethod from hdmf.build import (ObjectMapper, TypeMap, BuildManager) from hdmf.container import (Container, Data) from hdmf.spec import (GroupSpec, DatasetSpec, AttributeSpec, LinkSpec, RefSpec, DtypeSpec, NamespaceCatalog, SpecCatalog, - SpecNamespace, NamespaceBuilder) + SpecNamespace, NamespaceBuilder, Spec) from hdmf.spec.spec import (ZERO_OR_MANY, ONE_OR_MANY, ZERO_OR_ONE) from hdmf.utils import (docval, getargs, get_docval) +from hdmf.testing import TestCase from hdmf_zarr.backend import ROOT_NAME CORE_NAMESPACE = 'test_core' @@ -591,3 +593,120 @@ class CustomSpecNamespace(SpecNamespace): @classmethod def types_key(cls): return cls.__types_key + + +class BarData(Data): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this BarData'}, + {'name': 'data', 'type': ('data', 'array_data'), 'doc': 'the data'}, + {'name': 'attr1', 'type': str, 'doc': 'a string attribute', 'default': None}, + {'name': 'attr2', 'type': 'int', 'doc': 'an int attribute', 'default': None}, + {'name': 'ext_attr', 'type': bool, 'doc': 'a boolean attribute', 'default': True}) + def __init__(self, **kwargs): + name, data, attr1, attr2, ext_attr = getargs('name', 'data', 'attr1', 'attr2', 'ext_attr', kwargs) + super().__init__(name=name, data=data) + self.__attr1 = attr1 + self.__attr2 = attr2 + self.__ext_attr = kwargs['ext_attr'] + + @property + def data_type(self): + return 'BarData' + + @property + def attr1(self): + return self.__attr1 + + @property + def attr2(self): + return self.__attr2 + + @property + def ext_attr(self): + return self.__ext_attr + + +class BarDataHolder(Container): + + @docval({'name': 'name', 'type': str, 'doc': 'the name of this BarDataHolder'}, + {'name': 'bar_datas', 'type': ('data', 'array_data'), 'doc': 'bar_datas', 'default': list()}) + def __init__(self, **kwargs): + name, bar_datas = getargs('name', 'bar_datas', kwargs) + super().__init__(name=name) + self.__bar_datas = bar_datas + for b in bar_datas: + if b is not None and b.parent is None: + b.parent = self + + @property + def data_type(self): + return 'BarDataHolder' + + @property + def bar_datas(self): + return self.__bar_datas + + +class ExtBarDataMapper(ObjectMapper): + + @docval({"name": "spec", "type": Spec, "doc": "the spec to get the attribute value for"}, + {"name": "container", "type": BarData, "doc": "the container to get the attribute value from"}, + {"name": "manager", "type": BuildManager, "doc": "the BuildManager used for managing this build"}, + returns='the value of the attribute') + def get_attr_value(self, **kwargs): + ''' Get the value of the attribute corresponding to this spec from the given container ''' + spec, container, manager = getargs('spec', 'container', 'manager', kwargs) + # handle custom mapping of field 'ext_attr' within container + # BardataHolder/BarData -> spec BarDataHolder/BarData.ext_attr + if isinstance(container.parent, BarDataHolder): + if spec.name == 'ext_attr': + return container.ext_attr + return super().get_attr_value(**kwargs) + + +class BuildDatasetShapeMixin(TestCase, metaclass=ABCMeta): + + def setUp(self): + self.store = "tests/unit/test_io.zarr" + self.set_up_specs() + spec_catalog = SpecCatalog() + spec_catalog.register_spec(self.bar_data_spec, 'test.yaml') + spec_catalog.register_spec(self.bar_data_holder_spec, 'test.yaml') + namespace = SpecNamespace( + doc='a test namespace', + name=CORE_NAMESPACE, + schema=[{'source': 'test.yaml'}], + version='0.1.0', + catalog=spec_catalog + ) + namespace_catalog = NamespaceCatalog() + namespace_catalog.add_namespace(CORE_NAMESPACE, namespace) + type_map = TypeMap(namespace_catalog) + type_map.register_container_type(CORE_NAMESPACE, 'BarData', BarData) + type_map.register_container_type(CORE_NAMESPACE, 'BarDataHolder', BarDataHolder) + type_map.register_map(BarData, ExtBarDataMapper) + type_map.register_map(BarDataHolder, ObjectMapper) + self.manager = BuildManager(type_map) + + def set_up_specs(self): + shape, dims = self.get_base_shape_dims() + self.bar_data_spec = DatasetSpec( + doc='A test dataset specification with a data type', + data_type_def='BarData', + dtype='int', + shape=shape, + dims=dims, + ) + self.bar_data_holder_spec = GroupSpec( + doc='A container of multiple extended BarData objects', + data_type_def='BarDataHolder', + datasets=[self.get_dataset_inc_spec()], + ) + + @abstractmethod + def get_base_shape_dims(self): + pass + + @abstractmethod + def get_dataset_inc_spec(self): + pass