diff --git a/CHANGES.rst b/CHANGES.rst index f9120e2ff..2a441728c 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -55,6 +55,7 @@ to allow lazy deserialization of ASDF tagged tree nodes to custom objects. [#1733] +- Deprecate ``copy_arrays`` in favor of ``memmap`` [#1797] 3.2.0 (2024-04-05) ------------------ diff --git a/README.rst b/README.rst index 0309022b8..bba0fefbe 100644 --- a/README.rst +++ b/README.rst @@ -185,6 +185,13 @@ The `open` function also works as a context handler: with asdf.open("example.asdf") as af: ... +.. warning:: + The ``copy_arrays`` argument of `asdf.open()` and `AsdfFile` is deprecated, + and will be removed in ASDF 4.0. It is replaced by ``memmap``, which + is the opposite of ``copy_arrays`` (``memmap == not copy_arrays``). + In ASDF 4.0, ``memmap`` will default to ``False``, which means arrays + will no longer be memory-mapped by default. + To get a quick overview of the data stored in the file, use the top-level `AsdfFile.info()` method: @@ -245,12 +252,12 @@ Array data remains unloaded until it is explicitly accessed: True By default, uncompressed data blocks are memory mapped for efficient -access. Memory mapping can be disabled by using the ``copy_arrays`` +access. Memory mapping can be disabled by using the ``memmap`` option of `open` when reading: .. code:: python - af = asdf.open("example.asdf", copy_arrays=True) + af = asdf.open("example.asdf", memmap=False) .. _end-read-file-text: diff --git a/asdf/_asdf.py b/asdf/_asdf.py index 560615c5a..ba3ddab61 100644 --- a/asdf/_asdf.py +++ b/asdf/_asdf.py @@ -75,7 +75,7 @@ def __init__( ignore_version_mismatch=True, ignore_unrecognized_tag=False, ignore_implicit_conversion=NotSet, - copy_arrays=False, + copy_arrays=NotSet, memmap=NotSet, lazy_load=True, custom_schema=None, @@ -117,16 +117,15 @@ def __init__( as-is. copy_arrays : bool, optional + Deprecated; use ``memmap`` instead. When `False`, when reading files, attempt to memmap underlying data arrays when possible. memmap : bool, optional When `True`, when reading files, attempt to memmap underlying data arrays when possible. When set, this argument will override - ``copy_arrays``. When not set, the ``copy_arrays`` will determine - if arrays are memory mapped or copied. ``copy_arrays`` will be - deprecated and the default will change in an upcoming asdf version - which by default will not memory map arrays. + ``copy_arrays``. The default will change to ``False`` in an upcoming + ASDF version. At the moment the default is ``True``. lazy_load : bool, optional When `True` and the underlying file handle is seekable, data @@ -135,8 +134,6 @@ def __init__( open during the lifetime of the tree. Setting to False causes all data arrays to be loaded up front, which means that they can be accessed even after the underlying file is closed. - Note: even if ``lazy_load`` is `False`, ``copy_arrays`` is still taken - into account. custom_schema : str, optional Path to a custom schema file that will be used for a secondary @@ -182,9 +179,16 @@ def __init__( self._closed = False self._external_asdf_by_uri = {} # if memmap is set, it overrides copy_arrays - if memmap is not NotSet: - copy_arrays = not memmap - self._blocks = BlockManager(uri=uri, lazy_load=lazy_load, memmap=not copy_arrays) + if copy_arrays is not NotSet: + warnings.warn( + "copy_arrays is deprecated; use memmap instead. Note that memmap will default to False in asdf 4.0.", + AsdfWarning, + ) + if memmap is NotSet: + memmap = not copy_arrays + elif memmap is NotSet: + memmap = True + self._blocks = BlockManager(uri=uri, lazy_load=lazy_load, memmap=memmap) # this message is passed into find_references to only warn if # a reference was found find_ref_warning_msg = ( @@ -1618,7 +1622,7 @@ def open_asdf( ignore_version_mismatch=True, ignore_unrecognized_tag=False, _force_raw_types=False, - copy_arrays=False, + copy_arrays=NotSet, memmap=NotSet, lazy_tree=NotSet, lazy_load=True, @@ -1661,16 +1665,15 @@ def open_asdf( `False` by default. copy_arrays : bool, optional + Deprecated; use ``memmap`` instead. When `False`, when reading files, attempt to memmap underlying data arrays when possible. memmap : bool, optional When `True`, when reading files, attempt to memmap underlying data arrays when possible. When set, this argument will override - ``copy_arrays``. When not set, the ``copy_arrays`` will determine - if arrays are memory mapped or copied. ``copy_arrays`` will be - deprecated and the default will change in an upcoming asdf version - which by default will not memory map arrays. + ``copy_arrays``. The default will change to ``False`` in an upcoming + ASDF version. At the moment the default is ``True``. lazy_load : bool, optional When `True` and the underlying file handle is seekable, data diff --git a/asdf/_tests/_regtests/test_1334.py b/asdf/_tests/_regtests/test_1334.py index 383268ec9..b08581033 100644 --- a/asdf/_tests/_regtests/test_1334.py +++ b/asdf/_tests/_regtests/test_1334.py @@ -15,7 +15,7 @@ def test_memmap_view_access_after_close(tmp_path): fn = tmp_path / "test.asdf" asdf.AsdfFile({"a": a}).write_to(fn) - with asdf.open(fn, copy_arrays=False) as af: + with asdf.open(fn, memmap=True) as af: v = af["a"][:5] assert np.all(v == 1) diff --git a/asdf/_tests/_regtests/test_1525.py b/asdf/_tests/_regtests/test_1525.py index a682fc66e..f9573e38d 100644 --- a/asdf/_tests/_regtests/test_1525.py +++ b/asdf/_tests/_regtests/test_1525.py @@ -4,8 +4,8 @@ import asdf -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, memmap): """ External blocks are always lazy loaded and memmapped @@ -18,13 +18,13 @@ def test_external_blocks_always_lazy_loaded_and_memmapped(tmp_path, copy_arrays) af.set_array_storage(arr, "external") af.write_to(fn) - with asdf.open(fn, copy_arrays=copy_arrays) as af: + with asdf.open(fn, memmap=memmap) as af: # check that block is external source = af["arr"]._source assert isinstance(source, str) # check if block is memmapped - if copy_arrays: + if not memmap: assert not isinstance(af["arr"].base, np.memmap) else: assert isinstance(af["arr"].base, np.memmap) diff --git a/asdf/_tests/_regtests/test_1530.py b/asdf/_tests/_regtests/test_1530.py index 8cf2f73ee..d99372cf9 100644 --- a/asdf/_tests/_regtests/test_1530.py +++ b/asdf/_tests/_regtests/test_1530.py @@ -20,7 +20,7 @@ def test_update_with_memmapped_data_can_make_view_data_invalid(tmp_path): af = asdf.AsdfFile({"a": a, "b": b}) af.write_to(fn) - with asdf.open(fn, mode="rw", copy_arrays=False) as af: + with asdf.open(fn, mode="rw", memmap=True) as af: va = af["a"][:3] np.testing.assert_array_equal(a, af["a"]) np.testing.assert_array_equal(b, af["b"]) diff --git a/asdf/_tests/_regtests/test_1558.py b/asdf/_tests/_regtests/test_1558.py index 508cd1ba0..fb87b4d51 100644 --- a/asdf/_tests/_regtests/test_1558.py +++ b/asdf/_tests/_regtests/test_1558.py @@ -16,7 +16,7 @@ def test_asdffile_tree_cleared_on_close(tmp_path): fn = tmp_path / "test.asdf" asdf.AsdfFile({"a": np.arange(1000), "b": np.arange(42)}).write_to(fn) - with asdf.open(fn, copy_arrays=True, lazy_load=False) as af: + with asdf.open(fn, memmap=False, lazy_load=False) as af: array_weakref = weakref.ref(af["a"]) array_ref = af["b"] diff --git a/asdf/_tests/tags/core/tests/test_ndarray.py b/asdf/_tests/tags/core/tests/test_ndarray.py index 2661cb3a5..08a2202fe 100644 --- a/asdf/_tests/tags/core/tests/test_ndarray.py +++ b/asdf/_tests/tags/core/tests/test_ndarray.py @@ -980,16 +980,16 @@ def test_memmap_write(tmp_path): # Make sure we're actually writing to an internal array for this test af.write_to(tmpfile, all_array_storage="internal") - with asdf.open(tmpfile, mode="rw", copy_arrays=False) as af: + with asdf.open(tmpfile, mode="rw", memmap=True) as af: data = af["data"] assert data.flags.writeable is True data[0] = 42 assert data[0] == 42 - with asdf.open(tmpfile, mode="rw", copy_arrays=False) as af: + with asdf.open(tmpfile, mode="rw", memmap=True) as af: assert af["data"][0] == 42 - with asdf.open(tmpfile, mode="r", copy_arrays=False) as af: + with asdf.open(tmpfile, mode="r", memmap=True) as af: assert af["data"][0] == 42 @@ -1008,7 +1008,7 @@ def test_readonly(tmp_path): af["data"][0] = 41 # Forcing memmap, the array should still be readonly - with asdf.open(tmpfile, copy_arrays=False) as af: + with asdf.open(tmpfile, memmap=True) as af: assert af["data"].flags.writeable is False with pytest.raises(ValueError, match=r"assignment destination is read-only"): af["data"][0] = 41 @@ -1019,7 +1019,7 @@ def test_readonly(tmp_path): af["data"][0] = 40 # Copying the arrays makes it safe to write to the underlying array - with asdf.open(tmpfile, mode="r", copy_arrays=True) as af: + with asdf.open(tmpfile, mode="r", memmap=False) as af: assert af["data"].flags.writeable is True af["data"][0] = 42 diff --git a/asdf/_tests/test_api.py b/asdf/_tests/test_api.py index a6e476793..2b95734f3 100644 --- a/asdf/_tests/test_api.py +++ b/asdf/_tests/test_api.py @@ -136,7 +136,7 @@ def test_update_exceptions(tmp_path): ff.write_to(path) with ( - asdf.open(path, mode="r", copy_arrays=True) as ff, + asdf.open(path, mode="r", memmap=False) as ff, pytest.raises( IOError, match=r"Can not update, since associated file is read-only.*", @@ -500,7 +500,7 @@ def test_array_access_after_file_close(tmp_path): # With memory mapping disabled and copying arrays enabled, # the array data should still persist in memory after close: - with asdf.open(path, lazy_load=False, copy_arrays=True) as af: + with asdf.open(path, lazy_load=False, memmap=False) as af: tree = af.tree assert_array_equal(tree["data"], data) diff --git a/asdf/_tests/test_array_blocks.py b/asdf/_tests/test_array_blocks.py index 8a98bf229..32cb03491 100644 --- a/asdf/_tests/test_array_blocks.py +++ b/asdf/_tests/test_array_blocks.py @@ -109,8 +109,8 @@ def test_pad_blocks(tmp_path): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_expand_tree(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_expand_tree(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) testpath = os.path.join(tmp_path, "test.asdf") @@ -122,7 +122,7 @@ def test_update_expand_tree(tmp_path, lazy_load, copy_arrays): ff = asdf.AsdfFile(tree) ff.set_array_storage(tree["arrays"][2], "inline") ff.write_to(testpath, pad_blocks=True) - with asdf.open(testpath, lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(testpath, lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: assert len(list(ff._blocks.blocks)) == 2 assert_array_equal(ff.tree["arrays"][0], my_array) ff.tree["extra"] = [0] * 6000 @@ -137,7 +137,7 @@ def test_update_expand_tree(tmp_path, lazy_load, copy_arrays): ff = asdf.AsdfFile(tree) ff.set_array_storage(tree["arrays"][2], "inline") ff.write_to(os.path.join(tmp_path, "test2.asdf"), pad_blocks=True) - with asdf.open(os.path.join(tmp_path, "test2.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test2.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: ff.tree["extra"] = [0] * 2 ff.update() @@ -148,8 +148,8 @@ def test_update_expand_tree(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_all_external(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_all_external(tmp_path, lazy_load, memmap): fn = tmp_path / "test.asdf" my_array = np.arange(64) * 1 @@ -162,7 +162,7 @@ def test_update_all_external(tmp_path, lazy_load, copy_arrays): with asdf.config.config_context() as cfg: cfg.array_inline_threshold = 10 cfg.all_array_storage = "external" - with asdf.open(fn, lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as af: + with asdf.open(fn, lazy_load=lazy_load, memmap=memmap, mode="rw") as af: af.update() assert "test0000.asdf" in os.listdir(tmp_path) @@ -170,8 +170,8 @@ def test_update_all_external(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_some_external(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_some_external(tmp_path, lazy_load, memmap): fn = tmp_path / "test.asdf" my_array = np.arange(64) * 1 @@ -181,7 +181,7 @@ def test_update_some_external(tmp_path, lazy_load, copy_arrays): af = asdf.AsdfFile(tree) af.write_to(fn) - with asdf.open(fn, lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as af: + with asdf.open(fn, lazy_load=lazy_load, memmap=memmap, mode="rw") as af: af.set_array_storage(af["arrays"][0], "external") af.update() @@ -194,8 +194,8 @@ def _get_update_tree(): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_delete_first_array(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_delete_first_array(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) path = os.path.join(tmp_path, "test.asdf") @@ -207,7 +207,7 @@ def test_update_delete_first_array(tmp_path, lazy_load, copy_arrays): original_size = os.stat(path).st_size - with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: del ff.tree["arrays"][0] ff.update() @@ -219,8 +219,8 @@ def test_update_delete_first_array(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_delete_last_array(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_delete_last_array(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) path = os.path.join(tmp_path, "test.asdf") @@ -232,7 +232,7 @@ def test_update_delete_last_array(tmp_path, lazy_load, copy_arrays): original_size = os.stat(path).st_size - with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: del ff.tree["arrays"][-1] ff.update() @@ -244,8 +244,8 @@ def test_update_delete_last_array(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_delete_middle_array(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_delete_middle_array(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) path = os.path.join(tmp_path, "test.asdf") @@ -257,7 +257,7 @@ def test_update_delete_middle_array(tmp_path, lazy_load, copy_arrays): original_size = os.stat(path).st_size - with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: del ff.tree["arrays"][1] ff.update() assert len(ff._blocks.blocks) == 2 @@ -271,8 +271,8 @@ def test_update_delete_middle_array(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_replace_first_array(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_replace_first_array(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) path = os.path.join(tmp_path, "test.asdf") @@ -284,7 +284,7 @@ def test_update_replace_first_array(tmp_path, lazy_load, copy_arrays): original_size = os.stat(path).st_size - with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: ff.tree["arrays"][0] = np.arange(32) ff.update() @@ -297,8 +297,8 @@ def test_update_replace_first_array(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_replace_last_array(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_replace_last_array(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) path = os.path.join(tmp_path, "test.asdf") @@ -310,7 +310,7 @@ def test_update_replace_last_array(tmp_path, lazy_load, copy_arrays): original_size = os.stat(path).st_size - with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: ff.tree["arrays"][2] = np.arange(32) ff.update() @@ -323,8 +323,8 @@ def test_update_replace_last_array(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_replace_middle_array(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_replace_middle_array(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) path = os.path.join(tmp_path, "test.asdf") @@ -336,7 +336,7 @@ def test_update_replace_middle_array(tmp_path, lazy_load, copy_arrays): original_size = os.stat(path).st_size - with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: ff.tree["arrays"][1] = np.arange(32) ff.update() @@ -349,8 +349,8 @@ def test_update_replace_middle_array(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_add_array(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_add_array(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) path = os.path.join(tmp_path, "test.asdf") @@ -360,7 +360,7 @@ def test_update_add_array(tmp_path, lazy_load, copy_arrays): ff = asdf.AsdfFile(tree) ff.write_to(path, pad_blocks=True) - with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: ff.tree["arrays"].append(np.arange(32)) ff.update() @@ -372,8 +372,8 @@ def test_update_add_array(tmp_path, lazy_load, copy_arrays): @pytest.mark.parametrize("lazy_load", [True, False]) -@pytest.mark.parametrize("copy_arrays", [True, False]) -def test_update_add_array_at_end(tmp_path, lazy_load, copy_arrays): +@pytest.mark.parametrize("memmap", [True, False]) +def test_update_add_array_at_end(tmp_path, lazy_load, memmap): tmp_path = str(tmp_path) path = os.path.join(tmp_path, "test.asdf") @@ -385,7 +385,7 @@ def test_update_add_array_at_end(tmp_path, lazy_load, copy_arrays): original_size = os.stat(path).st_size - with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, copy_arrays=copy_arrays, mode="rw") as ff: + with asdf.open(os.path.join(tmp_path, "test.asdf"), lazy_load=lazy_load, memmap=memmap, mode="rw") as ff: ff.tree["arrays"].append(np.arange(65536, dtype="