Backport PR #1783: IO: treat arrays with an empty shape like scalars …

…when writing (#1796) Co-authored-by: ilia-kats <[email protected]>
scverse · Dec 9, 2024 · 52fb6b6 · 52fb6b6
1 parent 8bf09f8
commit 52fb6b6
Show file tree

Hide file tree

Showing 4 changed files with 62 additions and 7 deletions.
diff --git a/docs/release-notes/1783.bugfix.md b/docs/release-notes/1783.bugfix.md
@@ -0,0 +1 @@
+`write_elem` now filters out incompatible `dataset_kwargs` when saving zero-dimensional arrays {user}`ilia-kats`
diff --git a/src/anndata/_io/specs/methods.py b/src/anndata/_io/specs/methods.py
@@ -21,7 +21,7 @@
 from anndata._core.index import _normalize_indices
 from anndata._core.merge import intersect_keys
 from anndata._core.sparse_dataset import _CSCDataset, _CSRDataset, sparse_dataset
-from anndata._io.utils import H5PY_V3, check_key
+from anndata._io.utils import H5PY_V3, check_key, zero_dim_array_as_scalar
 from anndata._warnings import OldFormatWarning
 from anndata.compat import (
     AwkArray,
@@ -382,6 +382,7 @@ def write_list(
 @_REGISTRY.register_write(ZarrGroup, h5py.Dataset, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, np.ma.MaskedArray, IOSpec("array", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, ZarrArray, IOSpec("array", "0.2.0"))
+@zero_dim_array_as_scalar
 def write_basic(
     f: GroupStorageType,
     k: str,
@@ -477,6 +478,7 @@ def read_string_array_partial(d, items=None, indices=slice(None)):
 )
 @_REGISTRY.register_write(H5Group, (np.ndarray, "U"), IOSpec("string-array", "0.2.0"))
 @_REGISTRY.register_write(H5Group, (np.ndarray, "O"), IOSpec("string-array", "0.2.0"))
+@zero_dim_array_as_scalar
 def write_vlen_string_array(
     f: H5Group,
     k: str,
@@ -498,6 +500,7 @@ def write_vlen_string_array(
 )
 @_REGISTRY.register_write(ZarrGroup, (np.ndarray, "U"), IOSpec("string-array", "0.2.0"))
 @_REGISTRY.register_write(ZarrGroup, (np.ndarray, "O"), IOSpec("string-array", "0.2.0"))
+@zero_dim_array_as_scalar
 def write_vlen_string_array_zarr(
     f: ZarrGroup,
     k: str,
@@ -1134,8 +1137,15 @@ def write_hdf5_scalar(
 ):
     # Can’t compress scalars, error is thrown
     dataset_kwargs = dict(dataset_kwargs)
-    dataset_kwargs.pop("compression", None)
-    dataset_kwargs.pop("compression_opts", None)
+    for arg in (
+        "compression",
+        "compression_opts",
+        "chunks",
+        "shuffle",
+        "fletcher32",
+        "scaleoffset",
+    ):
+        dataset_kwargs.pop(arg, None)
     f.create_dataset(key, data=np.array(value), **dataset_kwargs)
 
 

diff --git a/src/anndata/_io/utils.py b/src/anndata/_io/utils.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from functools import wraps
+from functools import WRAPPER_ASSIGNMENTS, wraps
 from itertools import pairwise
 from typing import TYPE_CHECKING, cast
 from warnings import warn
@@ -12,11 +12,12 @@
 from ..compat import add_note
 
 if TYPE_CHECKING:
-    from collections.abc import Callable
-    from typing import Literal
+    from collections.abc import Callable, Mapping
+    from typing import Any, Literal
 
-    from .._types import StorageType
+    from .._types import ContravariantRWAble, StorageType, _WriteInternal
     from ..compat import H5Group, ZarrGroup
+    from .specs.registry import Writer
 
     Storage = StorageType | BaseCompressedSparseDataset
 
@@ -285,3 +286,25 @@ def _read_legacy_raw(
     if "varm" in attrs and "raw.varm" in f:
         raw["varm"] = read_attr(f["raw.varm"])
     return raw
+
+
+def zero_dim_array_as_scalar(func: _WriteInternal):
+    """\
+    A decorator for write_elem implementations of arrays where zero-dimensional arrays need special handling.
+    """
+
+    @wraps(func, assigned=WRAPPER_ASSIGNMENTS + ("__defaults__", "__kwdefaults__"))
+    def func_wrapper(
+        f: StorageType,
+        k: str,
+        elem: ContravariantRWAble,
+        *,
+        _writer: Writer,
+        dataset_kwargs: Mapping[str, Any],
+    ):
+        if elem.shape == ():
+            _writer.write_elem(f, k, elem[()], dataset_kwargs=dataset_kwargs)
+        else:
+            func(f, k, elem, _writer=_writer, dataset_kwargs=dataset_kwargs)
+
+    return func_wrapper
diff --git a/tests/test_io_elementwise.py b/tests/test_io_elementwise.py
@@ -209,6 +209,27 @@ def test_io_spec(store, value, encoding_type):
     assert get_spec(store[key]) == _REGISTRY.get_spec(value)
 
 
+@pytest.mark.parametrize(
+    ("value", "encoding_type"),
+    [
+        pytest.param(np.asarray(1), "numeric-scalar", id="scalar_int"),
+        pytest.param(np.asarray(1.0), "numeric-scalar", id="scalar_float"),
+        pytest.param(np.asarray(True), "numeric-scalar", id="scalar_bool"),
+        pytest.param(np.asarray("test"), "string", id="scalar_string"),
+    ],
+)
+def test_io_spec_compressed_scalars(store: G, value: np.ndarray, encoding_type: str):
+    key = f"key_for_{encoding_type}"
+    write_elem(
+        store, key, value, dataset_kwargs={"compression": "gzip", "compression_opts": 5}
+    )
+
+    assert encoding_type == _read_attr(store[key].attrs, "encoding-type")
+
+    from_disk = read_elem(store[key])
+    assert_equal(value, from_disk)
+
+
 # Can't instantiate cupy types at the top level, so converting them within the test
 @pytest.mark.gpu
 @pytest.mark.parametrize(
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		`write_elem` now filters out incompatible `dataset_kwargs` when saving zero-dimensional arrays {user}`ilia-kats`