Skip to content

Commit

Permalink
Merge branch 'dev' into dependabot/github_actions/codecov/codecov-act…
Browse files Browse the repository at this point in the history
…ion-5
  • Loading branch information
rly authored Dec 10, 2024
2 parents 108ed24 + f50dc84 commit 582706d
Show file tree
Hide file tree
Showing 14 changed files with 58 additions and 75 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ repos:
# hooks:
# - id: black
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.3
rev: v0.8.0
hooks:
- id: ruff
# - repo: https://github.com/econchick/interrogate
Expand Down
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
## HDMF 4.0.0 (Upcoming)

### Deprecations
- The following classes have been deprecated and removed: Array, AbstractSortedArray, SortedArray, LinSpace, Query, RegionSlicer, ListSlicer, H5RegionSlicer, DataRegion. The following methods have been deprecated and removed: fmt_docval_args, call_docval_func, get_container_cls, add_child, set_dataio (now refactored as set_data_io). We have also removed all early evelopment for region references. @mavaylon1 [#1998](https://github.com/hdmf-dev/hdmf/pull/1198)
- The following classes have been deprecated and removed: Array, AbstractSortedArray, SortedArray, LinSpace, Query, RegionSlicer, ListSlicer, H5RegionSlicer, DataRegion, RegionBuilder. The following methods have been deprecated and removed: fmt_docval_args, call_docval_func, get_container_cls, add_child, set_dataio (now refactored as set_data_io). We have also removed all early development for region references. @mavaylon1, @rly [#1998](https://github.com/hdmf-dev/hdmf/pull/1198), [#1212](https://github.com/hdmf-dev/hdmf/pull/1212)
- Python 3.8 has been deprecated. Python 3.9 is the new minimum with support for Python 3.13. @mavaylon1 [#1209](https://github.com/hdmf-dev/hdmf/pull/1209)

### Enhancements
Expand Down
1 change: 0 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@

nitpicky = True
nitpick_ignore = [('py:class', 'Intracomm'),
('py:class', 'h5py.RegionReference'),
('py:class', 'h5py._hl.dataset.Dataset'),
('py:class', 'function'),
('py:class', 'unittest.case.TestCase'),
Expand Down
2 changes: 1 addition & 1 deletion docs/source/overview_software_architecture.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ Builder
* :py:class:`~hdmf.build.builders.GroupBuilder` - represents a collection of objects
* :py:class:`~hdmf.build.builders.DatasetBuilder` - represents data
* :py:class:`~hdmf.build.builders.LinkBuilder` - represents soft-links
* :py:class:`~hdmf.build.builders.RegionBuilder` - represents a slice into data (Subclass of :py:class:`~hdmf.build.builders.DatasetBuilder`)
* :py:class:`~hdmf.build.builders.ReferenceBuilder` - represents a reference to another group or dataset

* **Main Module:** :py:class:`hdmf.build.builders`

Expand Down
15 changes: 8 additions & 7 deletions src/hdmf/backends/hdf5/h5tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,8 +908,6 @@ def get_type(cls, data):
"utf-8": H5_TEXT,
"ascii": H5_BINARY,
"bytes": H5_BINARY,
"ref": H5_REF,
"reference": H5_REF,
"object": H5_REF,
"isodatetime": H5_TEXT,
"datetime": H5_TEXT,
Expand Down Expand Up @@ -1492,7 +1490,7 @@ def __is_ref(self, dtype):
if isinstance(dtype, dict): # may be dict from reading a compound dataset
return self.__is_ref(dtype['dtype'])
if isinstance(dtype, str):
return dtype == DatasetBuilder.OBJECT_REF_TYPE or dtype == DatasetBuilder.REGION_REF_TYPE
return dtype == DatasetBuilder.OBJECT_REF_TYPE
return False

def __queue_ref(self, func):
Expand Down Expand Up @@ -1541,7 +1539,7 @@ def generate_dataset_html(dataset):

array_info_dict = get_basic_array_info(dataset)
if isinstance(dataset, h5py.Dataset):

dataset_type = "HDF5 dataset"
# get info from hdf5 dataset
compressed_size = dataset.id.get_storage_size()
if hasattr(dataset, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
Expand All @@ -1556,10 +1554,13 @@ def generate_dataset_html(dataset):
"Compression opts": dataset.compression_opts,
"Compression ratio": compression_ratio,
}

array_info_dict.update(hdf5_info_dict)

# generate html repr
repr_html = generate_array_html_repr(array_info_dict, dataset, "HDF5 dataset")
elif isinstance(dataset, np.ndarray):
dataset_type = "NumPy array"
else:
dataset_type = dataset.__class__.__name__

repr_html = generate_array_html_repr(array_info_dict, dataset, dataset_type)

return repr_html
2 changes: 1 addition & 1 deletion src/hdmf/build/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .builders import Builder, DatasetBuilder, GroupBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder
from .builders import Builder, DatasetBuilder, GroupBuilder, LinkBuilder, ReferenceBuilder
from .classgenerator import CustomClassGenerator, MCIClassGenerator
from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError,
ConstructError)
Expand Down
21 changes: 1 addition & 20 deletions src/hdmf/build/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from datetime import datetime, date

import numpy as np
from h5py import RegionReference

from ..utils import docval, getargs, get_docval

Expand Down Expand Up @@ -320,11 +319,10 @@ def values(self):

class DatasetBuilder(BaseBuilder):
OBJECT_REF_TYPE = 'object'
REGION_REF_TYPE = 'region'

@docval({'name': 'name', 'type': str, 'doc': 'The name of the dataset.'},
{'name': 'data',
'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', 'RegionBuilder', Iterable, datetime, date),
'type': ('array_data', 'scalar_data', 'data', 'DatasetBuilder', Iterable, datetime, date),
'doc': 'The data in this dataset.', 'default': None},
{'name': 'dtype', 'type': (type, np.dtype, str, list),
'doc': 'The datatype of this dataset.', 'default': None},
Expand Down Expand Up @@ -429,20 +427,3 @@ def __init__(self, **kwargs):
def builder(self):
"""The target builder object."""
return self['builder']


class RegionBuilder(ReferenceBuilder):

@docval({'name': 'region', 'type': (slice, tuple, list, RegionReference),
'doc': 'The region, i.e. slice or indices, into the target dataset.'},
{'name': 'builder', 'type': DatasetBuilder, 'doc': 'The dataset this region reference applies to.'})
def __init__(self, **kwargs):
"""Create a builder object for a region reference."""
region, builder = getargs('region', 'builder', kwargs)
super().__init__(builder)
self['region'] = region

@property
def region(self):
"""The selected region of the target dataset."""
return self['region']
4 changes: 1 addition & 3 deletions src/hdmf/build/objectmapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, RegionBuilder, BaseBuilder
from .builders import DatasetBuilder, GroupBuilder, LinkBuilder, Builder, ReferenceBuilder, BaseBuilder
from .errors import (BuildError, OrphanContainerBuildError, ReferenceTargetNotBuiltError, ContainerConfigurationError,
ConstructError)
from .manager import Proxy, BuildManager
Expand Down Expand Up @@ -1214,8 +1214,6 @@ def __get_subspec_values(self, builder, spec, manager):
continue
if isinstance(attr_val, (GroupBuilder, DatasetBuilder)):
ret[attr_spec] = manager.construct(attr_val)
elif isinstance(attr_val, RegionBuilder): # pragma: no cover
raise ValueError("RegionReferences as attributes is not yet supported")
elif isinstance(attr_val, ReferenceBuilder):
ret[attr_spec] = manager.construct(attr_val.builder)
else:
Expand Down
32 changes: 25 additions & 7 deletions src/hdmf/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -707,8 +707,11 @@ def _generate_field_html(self, key, value, level, access_code):
return f'<div style="margin-left: {level * 20}px;" class="container-fields"><span class="field-key"' \
f' title="{access_code}">{key}: </span><span class="field-value">{value}</span></div>'

is_array_data = isinstance(value, (np.ndarray, h5py.Dataset, DataIO)) or \
(hasattr(value, "store") and hasattr(value, "shape")) # Duck typing for zarr array
# Detects array-like objects that conform to the Array Interface specification
# (e.g., NumPy arrays, HDF5 datasets, DataIO objects). Objects must have both
# 'shape' and 'dtype' attributes. Iterators are excluded as they lack 'shape'.
# This approach keeps the implementation generic without coupling to specific backends methods
is_array_data = hasattr(value, "shape") and hasattr(value, "dtype")

if is_array_data:
html_content = self._generate_array_html(value, level + 1)
Expand All @@ -735,14 +738,29 @@ def _generate_field_html(self, key, value, level, access_code):


def _generate_array_html(self, array, level):
"""Generates HTML for array data"""
"""Generates HTML for array data (e.g., NumPy arrays, HDF5 datasets, Zarr datasets and DataIO objects)."""

read_io = self.get_read_io() # if the Container was read from file, get IO object
if read_io is not None: # Note that sometimes numpy array have a read_io attribute
repr_html = read_io.generate_dataset_html(array)
else:
is_numpy_array = isinstance(array, np.ndarray)
read_io = self.get_read_io()
it_was_read_with_io = read_io is not None
is_data_io = isinstance(array, DataIO)

if is_numpy_array:
array_info_dict = get_basic_array_info(array)
repr_html = generate_array_html_repr(array_info_dict, array, "NumPy array")
elif is_data_io:
array_info_dict = get_basic_array_info(array.data)
repr_html = generate_array_html_repr(array_info_dict, array.data, "DataIO")
elif it_was_read_with_io:
# The backend handles the representation here. Two special cases worth noting:
# 1. Array-type attributes (e.g., start_frame in ImageSeries) remain NumPy arrays
# even when their parent container has an IO
# 2. Data may have been modified after being read from storage
repr_html = read_io.generate_dataset_html(array)
else: # Not sure which object could get here
object_class = array.__class__.__name__
array_info_dict = get_basic_array_info(array.data)
repr_html = generate_array_html_repr(array_info_dict, array.data, object_class)

return f'<div style="margin-left: {level * 20}px;" class="container-fields">{repr_html}</div>'

Expand Down
10 changes: 3 additions & 7 deletions src/hdmf/spec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ class DtypeHelper:
'uint32': ["uint32", "uint"],
'uint64': ["uint64"],
'object': ['object'],
'region': ['region'],
'numeric': ['numeric'],
'isodatetime': ["isodatetime", "datetime", "date"]
}
Expand Down Expand Up @@ -174,12 +173,13 @@ def path(self):

_ref_args = [
{'name': _target_type_key, 'type': str, 'doc': 'the target type GroupSpec or DatasetSpec'},
{'name': 'reftype', 'type': str, 'doc': 'the type of references this is i.e. region or object'},
{'name': 'reftype', 'type': str,
'doc': 'the type of reference this is. only "object" is supported currently.'},
]


class RefSpec(ConstructableDict):
__allowable_types = ('object', 'region')
__allowable_types = ('object', )

@docval(*_ref_args)
def __init__(self, **kwargs):
Expand All @@ -200,10 +200,6 @@ def reftype(self):
'''The type of reference'''
return self['reftype']

@docval(rtype=bool, returns='True if this RefSpec specifies a region reference, False otherwise')
def is_region(self):
return self['reftype'] == 'region'


_attr_args = [
{'name': 'name', 'type': str, 'doc': 'The name of this attribute'},
Expand Down
20 changes: 14 additions & 6 deletions src/hdmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ def convert_bytes_to_str(bytes_size):

return basic_array_info_dict

def generate_array_html_repr(backend_info_dict, array, dataset_type=None):
def generate_array_html_repr(array_info_dict, array, dataset_type=None):
def html_table(item_dicts) -> str:
"""
Generates an html table from a dictionary
Expand All @@ -912,14 +912,22 @@ def html_table(item_dicts) -> str:
report += "</table>"
return report

array_info_html = html_table(backend_info_dict)
array_info_html = html_table(array_info_dict)
repr_html = dataset_type + "<br>" + array_info_html if dataset_type is not None else array_info_html

if hasattr(array, "nbytes"): # TODO: Remove this after h5py minimal version is larger than 3.0
array_size = array.nbytes
# Array like might lack nbytes (h5py < 3.0) or size (DataIO object)
if hasattr(array, "nbytes"):
array_size_bytes = array.nbytes
else:
array_size = array.size * array.dtype.itemsize
array_is_small = array_size < 1024 * 0.1 # 10 % a kilobyte to display the array
if hasattr(array, "size"):
array_size = array.size
else:
import math
array_size = math.prod(array.shape)
array_size_bytes = array_size * array.dtype.itemsize

# Heuristic for displaying data
array_is_small = array_size_bytes < 1024 * 0.1 # 10 % a kilobyte to display the array
if array_is_small:
repr_html += "<br>" + str(np.asarray(array))

Expand Down
5 changes: 1 addition & 4 deletions src/hdmf/validate/validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

from .errors import Error, DtypeError, MissingError, MissingDataType, ShapeError, IllegalLinkError, IncorrectDataType
from .errors import ExpectedArrayError, IncorrectQuantityError
from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder
from ..build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder
from ..build.builders import BaseBuilder
from ..spec import Spec, AttributeSpec, GroupSpec, DatasetSpec, RefSpec, LinkSpec
from ..spec import SpecNamespace
Expand Down Expand Up @@ -124,9 +124,6 @@ def get_type(data, builder_dtype=None):
# Bytes data
elif isinstance(data, bytes):
return 'ascii', get_string_format(data)
# RegionBuilder data
elif isinstance(data, RegionBuilder):
return 'region', None
# ReferenceBuilder data
elif isinstance(data, ReferenceBuilder):
return 'object', None
Expand Down
11 changes: 1 addition & 10 deletions tests/unit/build_tests/test_builder.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder, RegionBuilder
from hdmf.build import GroupBuilder, DatasetBuilder, LinkBuilder, ReferenceBuilder
from hdmf.testing import TestCase


Expand Down Expand Up @@ -392,12 +392,3 @@ def test_constructor(self):
db = DatasetBuilder('db1', [1, 2, 3])
rb = ReferenceBuilder(db)
self.assertIs(rb.builder, db)


class TestRegionBuilder(TestCase):

def test_constructor(self):
db = DatasetBuilder('db1', [1, 2, 3])
rb = RegionBuilder(slice(1, 3), db)
self.assertEqual(rb.region, slice(1, 3))
self.assertIs(rb.builder, db)
6 changes: 0 additions & 6 deletions tests/unit/spec_tests/test_ref_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,3 @@ def test_constructor(self):
def test_wrong_reference_type(self):
with self.assertRaises(ValueError):
RefSpec('TimeSeries', 'unknownreftype')

def test_isregion(self):
spec = RefSpec('TimeSeries', 'object')
self.assertFalse(spec.is_region())
spec = RefSpec('Data', 'region')
self.assertTrue(spec.is_region())

0 comments on commit 582706d

Please sign in to comment.