Skip to content

Commit

Permalink
Merge pull request #1797 from zacharyburnett/deprecate/copy_arrays
Browse files Browse the repository at this point in the history
deprecate `copy_arrays` with warning
  • Loading branch information
zacharyburnett authored Aug 12, 2024
2 parents ac7b6de + cd7d420 commit 867c280
Show file tree
Hide file tree
Showing 13 changed files with 114 additions and 90 deletions.
1 change: 1 addition & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
to allow lazy deserialization of ASDF tagged tree nodes to
custom objects. [#1733]

- Deprecate ``copy_arrays`` in favor of ``memmap`` [#1797]

3.2.0 (2024-04-05)
------------------
Expand Down
11 changes: 9 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ The `open` function also works as a context handler:
with asdf.open("example.asdf") as af:
...
.. warning::
The ``copy_arrays`` argument of `asdf.open()` and `AsdfFile` is deprecated,
and will be removed in ASDF 4.0. It is replaced by ``memmap``, which
is the opposite of ``copy_arrays`` (``memmap == not copy_arrays``).
In ASDF 4.0, ``memmap`` will default to ``False``, which means arrays
will no longer be memory-mapped by default.

To get a quick overview of the data stored in the file, use the top-level
`AsdfFile.info()` method:

Expand Down Expand Up @@ -245,12 +252,12 @@ Array data remains unloaded until it is explicitly accessed:
True
By default, uncompressed data blocks are memory mapped for efficient
access. Memory mapping can be disabled by using the ``copy_arrays``
access. Memory mapping can be disabled by using the ``memmap``
option of `open` when reading:

.. code:: python
af = asdf.open("example.asdf", copy_arrays=True)
af = asdf.open("example.asdf", memmap=False)
.. _end-read-file-text:

Expand Down
33 changes: 18 additions & 15 deletions asdf/_asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def __init__(
ignore_version_mismatch=True,
ignore_unrecognized_tag=False,
ignore_implicit_conversion=NotSet,
copy_arrays=False,
copy_arrays=NotSet,
memmap=NotSet,
lazy_load=True,
custom_schema=None,
Expand Down Expand Up @@ -117,16 +117,15 @@ def __init__(
as-is.
copy_arrays : bool, optional
Deprecated; use ``memmap`` instead.
When `False`, when reading files, attempt to memmap underlying data
arrays when possible.
memmap : bool, optional
When `True`, when reading files, attempt to memmap underlying data
arrays when possible. When set, this argument will override
``copy_arrays``. When not set, the ``copy_arrays`` will determine
if arrays are memory mapped or copied. ``copy_arrays`` will be
deprecated and the default will change in an upcoming asdf version
which by default will not memory map arrays.
``copy_arrays``. The default will change to ``False`` in an upcoming
ASDF version. At the moment the default is ``True``.
lazy_load : bool, optional
When `True` and the underlying file handle is seekable, data
Expand All @@ -135,8 +134,6 @@ def __init__(
open during the lifetime of the tree. Setting to False causes
all data arrays to be loaded up front, which means that they
can be accessed even after the underlying file is closed.
Note: even if ``lazy_load`` is `False`, ``copy_arrays`` is still taken
into account.
custom_schema : str, optional
Path to a custom schema file that will be used for a secondary
Expand Down Expand Up @@ -182,9 +179,16 @@ def __init__(
self._closed = False
self._external_asdf_by_uri = {}
# if memmap is set, it overrides copy_arrays
if memmap is not NotSet:
copy_arrays = not memmap
self._blocks = BlockManager(uri=uri, lazy_load=lazy_load, memmap=not copy_arrays)
if copy_arrays is not NotSet:
warnings.warn(
"copy_arrays is deprecated; use memmap instead. Note that memmap will default to False in asdf 4.0.",
AsdfWarning,
)
if memmap is NotSet:
memmap = not copy_arrays
elif memmap is NotSet:
memmap = True
self._blocks = BlockManager(uri=uri, lazy_load=lazy_load, memmap=memmap)
# this message is passed into find_references to only warn if
# a reference was found
find_ref_warning_msg = (
Expand Down Expand Up @@ -1618,7 +1622,7 @@ def open_asdf(
ignore_version_mismatch=True,
ignore_unrecognized_tag=False,
_force_raw_types=False,
copy_arrays=False,
copy_arrays=NotSet,
memmap=NotSet,
lazy_tree=NotSet,
lazy_load=True,
Expand Down Expand Up @@ -1661,16 +1665,15 @@ def open_asdf(
`False` by default.
copy_arrays : bool, optional
Deprecated; use ``memmap`` instead.
When `False`, when reading files, attempt to memmap underlying data
arrays when possible.
memmap : bool, optional
When `True`, when reading files, attempt to memmap underlying data
arrays when possible. When set, this argument will override
``copy_arrays``. When not set, the ``copy_arrays`` will determine
if arrays are memory mapped or copied. ``copy_arrays`` will be
deprecated and the default will change in an upcoming asdf version
which by default will not memory map arrays.
``copy_arrays``. The default will change to ``False`` in an upcoming
ASDF version. At the moment the default is ``True``.
lazy_load : bool, optional
When `True` and the underlying file handle is seekable, data
Expand Down
2 changes: 1 addition & 1 deletion asdf/_tests/_regtests/test_1334.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def test_memmap_view_access_after_close(tmp_path):
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"a": a}).write_to(fn)

with asdf.open(fn, copy_arrays=False) as af:
with asdf.open(fn, memmap=True) as af:
v = af["a"][:5]

assert np.all(v == 1)
8 changes: 4 additions & 4 deletions asdf/_tests/_regtests/test_1525.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
import asdf


@pytest.mark.parametrize("copy_arrays", [True, False])
def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, copy_arrays):
@pytest.mark.parametrize("memmap", [True, False])
def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, memmap):
"""
External blocks are always lazy loaded and memmapped
Expand All @@ -18,13 +18,13 @@ def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, copy_arrays)
af.set_array_storage(arr, "external")
af.write_to(fn)

with asdf.open(fn, copy_arrays=copy_arrays) as af:
with asdf.open(fn, memmap=memmap) as af:
# check that block is external
source = af["arr"]._source
assert isinstance(source, str)

# check if block is memmapped
if copy_arrays:
if not memmap:
assert not isinstance(af["arr"].base, np.memmap)
else:
assert isinstance(af["arr"].base, np.memmap)
2 changes: 1 addition & 1 deletion asdf/_tests/_regtests/test_1530.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_update_with_memmapped_data_can_make_view_data_invalid(tmp_path):
af = asdf.AsdfFile({"a": a, "b": b})
af.write_to(fn)

with asdf.open(fn, mode="rw", copy_arrays=False) as af:
with asdf.open(fn, mode="rw", memmap=True) as af:
va = af["a"][:3]
np.testing.assert_array_equal(a, af["a"])
np.testing.assert_array_equal(b, af["b"])
Expand Down
2 changes: 1 addition & 1 deletion asdf/_tests/_regtests/test_1558.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_asdffile_tree_cleared_on_close(tmp_path):
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"a": np.arange(1000), "b": np.arange(42)}).write_to(fn)

with asdf.open(fn, copy_arrays=True, lazy_load=False) as af:
with asdf.open(fn, memmap=False, lazy_load=False) as af:
array_weakref = weakref.ref(af["a"])
array_ref = af["b"]

Expand Down
10 changes: 5 additions & 5 deletions asdf/_tests/tags/core/tests/test_ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -980,16 +980,16 @@ def test_memmap_write(tmp_path):
# Make sure we're actually writing to an internal array for this test
af.write_to(tmpfile, all_array_storage="internal")

with asdf.open(tmpfile, mode="rw", copy_arrays=False) as af:
with asdf.open(tmpfile, mode="rw", memmap=True) as af:
data = af["data"]
assert data.flags.writeable is True
data[0] = 42
assert data[0] == 42

with asdf.open(tmpfile, mode="rw", copy_arrays=False) as af:
with asdf.open(tmpfile, mode="rw", memmap=True) as af:
assert af["data"][0] == 42

with asdf.open(tmpfile, mode="r", copy_arrays=False) as af:
with asdf.open(tmpfile, mode="r", memmap=True) as af:
assert af["data"][0] == 42


Expand All @@ -1008,7 +1008,7 @@ def test_readonly(tmp_path):
af["data"][0] = 41

# Forcing memmap, the array should still be readonly
with asdf.open(tmpfile, copy_arrays=False) as af:
with asdf.open(tmpfile, memmap=True) as af:
assert af["data"].flags.writeable is False
with pytest.raises(ValueError, match=r"assignment destination is read-only"):
af["data"][0] = 41
Expand All @@ -1019,7 +1019,7 @@ def test_readonly(tmp_path):
af["data"][0] = 40

# Copying the arrays makes it safe to write to the underlying array
with asdf.open(tmpfile, mode="r", copy_arrays=True) as af:
with asdf.open(tmpfile, mode="r", memmap=False) as af:
assert af["data"].flags.writeable is True
af["data"][0] = 42

Expand Down
4 changes: 2 additions & 2 deletions asdf/_tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def test_update_exceptions(tmp_path):
ff.write_to(path)

with (
asdf.open(path, mode="r", copy_arrays=True) as ff,
asdf.open(path, mode="r", memmap=False) as ff,
pytest.raises(
IOError,
match=r"Can not update, since associated file is read-only.*",
Expand Down Expand Up @@ -500,7 +500,7 @@ def test_array_access_after_file_close(tmp_path):

# With memory mapping disabled and copying arrays enabled,
# the array data should still persist in memory after close:
with asdf.open(path, lazy_load=False, copy_arrays=True) as af:
with asdf.open(path, lazy_load=False, memmap=False) as af:
tree = af.tree
assert_array_equal(tree["data"], data)

Expand Down
Loading

0 comments on commit 867c280

Please sign in to comment.