diff --git a/CHANGELOG.md b/CHANGELOG.md index eae44edb2..b7beeed13 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,8 @@ ### Fixes - rename (fix typo) argument to `lcs_child_in_parent` in `CoordinateSystemManager.add_cs` \[{pull}`936`\]. - - replace usages of `pkg_resources` with `importlib.metadata` \[{pull}`941`\]. +- replace usages of `copy_arrays` with `memmap` for `asdf>=3.1.0` \[{pull}`940`\]. ### Dependencies diff --git a/weldx/asdf/file.py b/weldx/asdf/file.py index f75ec0583..decc873ac 100644 --- a/weldx/asdf/file.py +++ b/weldx/asdf/file.py @@ -3,6 +3,7 @@ from __future__ import annotations import copy +import importlib.metadata import io import pathlib import warnings @@ -42,12 +43,23 @@ __all__ = [ "WeldxFile", "DEFAULT_ARRAY_COMPRESSION", - "DEFAULT_ARRAY_COPYING", + "DEFAULT_MEMORY_MAPPING", "DEFAULT_ARRAY_INLINE_THRESHOLD", "_PROTECTED_KEYS", ] +def asdf_open_memory_mapping_kwarg(memmap: bool) -> dict: + if tuple(importlib.metadata.version("asdf").split(".")) >= ( + "3", + "1", + "0", + ): + return {"memmap": memmap} + else: + return {"copy_arrays": not memmap} + + @contextmanager def reset_file_position(fh: SupportsFileReadWrite): """Reset the internal position of the given file after leaving the context. @@ -66,8 +78,8 @@ def reset_file_position(fh: SupportsFileReadWrite): DEFAULT_ARRAY_COMPRESSION = "input" """All arrays will be compressed using this algorithm, if not specified by user.""" -DEFAULT_ARRAY_COPYING = True -"""Stored Arrays will be copied to memory, or not. If False, use memory mapping.""" +DEFAULT_MEMORY_MAPPING = False +"""Stored Arrays will be memory-mapped, or not. If True, use memory mapping.""" DEFAULT_ARRAY_INLINE_THRESHOLD = 10 """Arrays with less or equal elements will be inlined (stored as string, not binary).""" @@ -148,8 +160,8 @@ class WeldxFile(_ProtectedViewDict): - ``lz4``: Use lz4 compression. - ``input``: Use the same compression as in the file read. If there is no prior file, acts as None. - copy_arrays : - When `False`, when reading files, attempt to memory map (memmap) underlying data + memmap : + When `True`, when reading files, attempt to memory map (memmap) underlying data arrays when possible. This avoids blowing the memory when working with very large datasets. array_inline_threshold : @@ -219,19 +231,19 @@ def __init__( ) = None, software_history_entry: Mapping = None, compression: str = DEFAULT_ARRAY_COMPRESSION, - copy_arrays: bool = DEFAULT_ARRAY_COPYING, + memmap: bool = DEFAULT_MEMORY_MAPPING, array_inline_threshold: int = DEFAULT_ARRAY_INLINE_THRESHOLD, ): if write_kwargs is None: write_kwargs = dict(all_array_compression=compression) if asdffile_kwargs is None: - asdffile_kwargs = dict(copy_arrays=copy_arrays) + asdffile_kwargs = asdf_open_memory_mapping_kwarg(memmap=memmap) # this parameter is now (asdf-2.8) a asdf.config parameter, so we store it here. self._array_inline_threshold = array_inline_threshold - # TODO: ensure no mismatching args for compression and copy_arrays. + # TODO: ensure no mismatching args for compression and memmap. self._write_kwargs = write_kwargs self._asdffile_kwargs = asdffile_kwargs diff --git a/weldx/asdf/util.py b/weldx/asdf/util.py index 5f30d8d9b..9da1d5412 100644 --- a/weldx/asdf/util.py +++ b/weldx/asdf/util.py @@ -2,6 +2,7 @@ from __future__ import annotations +import importlib.metadata from collections.abc import Callable, Hashable, Mapping, MutableMapping, Set from contextlib import contextmanager from io import BytesIO, TextIOBase @@ -149,7 +150,7 @@ def read_buffer_context( Buffer containing ASDF file contents open_kwargs Additional keywords to pass to `asdf.AsdfFile.open` - Extensions are always set, ``copy_arrays=True`` is set by default. + Extensions are always set, ``memmap=False`` is set by default. Returns ------- @@ -158,7 +159,13 @@ def read_buffer_context( """ if open_kwargs is None: - open_kwargs = {"copy_arrays": True, "lazy_load": False} + open_kwargs = {"memmap": False, "lazy_load": False} + + if "memmap" in open_kwargs and tuple( + importlib.metadata.version("asdf").split(".") + ) < ("3", "1", "0"): + open_kwargs["copy_arrays"] = not open_kwargs["memmap"] + del open_kwargs["memmap"] buffer.seek(0) @@ -190,7 +197,7 @@ def read_buffer( Buffer containing ASDF file contents open_kwargs Additional keywords to pass to `asdf.AsdfFile.open` - Extensions are always set, ``copy_arrays=True`` is set by default. + Extensions are always set, ``memmap=False`` is set by default. Returns ------- @@ -220,7 +227,7 @@ def write_read_buffer_context( Extensions are always set. open_kwargs Additional keywords to pass to `asdf.AsdfFile.open` - Extensions are always set, ``copy_arrays=True`` is set by default. + Extensions are always set, ``memmap=False`` is set by default. Returns ------- @@ -248,7 +255,7 @@ def write_read_buffer( Extensions are always set. open_kwargs Additional keywords to pass to `asdf.AsdfFile.open` - Extensions are always set, ``copy_arrays=True`` is set by default. + Extensions are always set, ``memmap=False`` is set by default. Returns ------- diff --git a/weldx/tests/asdf_tests/test_asdf_core.py b/weldx/tests/asdf_tests/test_asdf_core.py index b2a07fd63..863b1204b 100644 --- a/weldx/tests/asdf_tests/test_asdf_core.py +++ b/weldx/tests/asdf_tests/test_asdf_core.py @@ -121,15 +121,15 @@ def get_xarray_example_data_array(): return dax -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize("select", [{}, {"d4": "z"}]) -def test_xarray_data_array(copy_arrays, lazy_load, select): +def test_xarray_data_array(memmap, lazy_load, select): """Test ASDF read/write of xarray.DataArray.""" dax = get_xarray_example_data_array().sel(**select) tree = {"dax": dax} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: dax_file = data["dax"] assert dax.identical(dax_file) @@ -172,13 +172,13 @@ def get_xarray_example_dataset(): return dsx -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_xarray_dataset(copy_arrays, lazy_load): +def test_xarray_dataset(memmap, lazy_load): dsx = get_xarray_example_dataset() tree = {"dsx": dsx} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: dsx_file = data["dsx"] assert dsx.identical(dsx_file) @@ -228,25 +228,25 @@ def get_local_coordinate_system(time_dep_orientation: bool, time_dep_coordinates @pytest.mark.parametrize("time_dep_orientation", [False, True]) @pytest.mark.parametrize("time_dep_coordinates", [False, True]) -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) def test_local_coordinate_system( - time_dep_orientation, time_dep_coordinates, copy_arrays, lazy_load + time_dep_orientation, time_dep_coordinates, memmap, lazy_load ): """Test (de)serialization of LocalCoordinateSystem in ASDF.""" lcs = get_local_coordinate_system(time_dep_orientation, time_dep_coordinates) with write_read_buffer_context( - {"lcs": lcs}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + {"lcs": lcs}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: assert data["lcs"] == lcs -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize("has_ref_time", [True, False]) @pytest.mark.parametrize("has_tdp_orientation", [True, False]) def test_local_coordinate_system_coords_timeseries( - copy_arrays, lazy_load, has_ref_time, has_tdp_orientation + memmap, lazy_load, has_ref_time, has_tdp_orientation ): """Test reading and writing a LCS with a `TimeSeries` as coordinates to asdf.""" # create inputs to lcs __init__ @@ -270,7 +270,7 @@ def test_local_coordinate_system_coords_timeseries( # round trip and compare with write_read_buffer_context( - {"lcs": lcs}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + {"lcs": lcs}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: lcs_buffer = data["lcs"] assert lcs_buffer == lcs @@ -337,13 +337,13 @@ def get_example_coordinate_system_manager(): return csm -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_coordinate_system_manager(copy_arrays, lazy_load): +def test_coordinate_system_manager(memmap, lazy_load): csm = get_example_coordinate_system_manager() tree = {"cs_hierarchy": csm} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: csm_file = data["cs_hierarchy"] assert csm == csm_file @@ -400,25 +400,23 @@ def get_coordinate_system_manager_with_subsystems(nested: bool): return csm_global -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize("nested", [True, False]) -def test_coordinate_system_manager_with_subsystems(copy_arrays, lazy_load, nested): +def test_coordinate_system_manager_with_subsystems(memmap, lazy_load, nested): csm = get_coordinate_system_manager_with_subsystems(nested) tree = {"cs_hierarchy": csm} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: csm_file = data["cs_hierarchy"] assert csm == csm_file -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize("csm_time_ref", [None, "2000-03-16"]) -def test_coordinate_system_manager_time_dependencies( - copy_arrays, lazy_load, csm_time_ref -): +def test_coordinate_system_manager_time_dependencies(memmap, lazy_load, csm_time_ref): """Test serialization of time components from CSM and its attached LCS.""" lcs_tdp_1_time_ref = None if csm_time_ref is None: @@ -449,15 +447,15 @@ def test_coordinate_system_manager_time_dependencies( tree = {"cs_hierarchy": csm_root} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: csm_file = data["cs_hierarchy"] assert csm_root == csm_file -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) -def test_coordinate_system_manager_with_data(copy_arrays, lazy_load): +def test_coordinate_system_manager_with_data(memmap, lazy_load): """Test if data attached to a CSM is stored and read correctly.""" csm = tf.CoordinateSystemManager("root", "csm") csm.create_cs("cs_1", "root", coordinates=Q_([1, 1, 1], "mm")) @@ -483,7 +481,7 @@ def test_coordinate_system_manager_with_data(copy_arrays, lazy_load): tree = {"csm": csm} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as buffer: csm_buffer = buffer["csm"] @@ -498,7 +496,7 @@ def test_coordinate_system_manager_with_data(copy_arrays, lazy_load): # -------------------------------------------------------------------------------------- -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( "ts", @@ -518,9 +516,9 @@ def test_coordinate_system_manager_with_data(copy_arrays, lazy_load): ), ], ) -def test_time_series(ts, copy_arrays, lazy_load): +def test_time_series(ts, memmap, lazy_load): with write_read_buffer_context( - {"ts": ts}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + {"ts": ts}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: ts_file = data["ts"] if isinstance(ts.data, ME): @@ -536,7 +534,7 @@ def test_time_series(ts, copy_arrays, lazy_load): # -------------------------------------------------------------------------------------- -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( "coords, interpolation", @@ -546,21 +544,21 @@ def test_time_series(ts, copy_arrays, lazy_load): (dict(time=Q_([1, 2, 3], "s"), space=Q_([4, 5, 6, 7], "m")), "step"), ], ) -def test_generic_series_discrete(coords, interpolation, copy_arrays, lazy_load): +def test_generic_series_discrete(coords, interpolation, memmap, lazy_load): shape = tuple(len(v) for v in coords.values()) data = Q_(np.ones(shape), "m") gs = GenericSeries(data, coords=coords, interpolation=interpolation) with write_read_buffer_context( - {"gs": gs}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + {"gs": gs}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: gs_file = data["gs"] assert gs == gs_file -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( "expr, params, units, dims", @@ -581,11 +579,11 @@ def test_generic_series_discrete(coords, interpolation, copy_arrays, lazy_load): ), ], ) -def test_generic_series_expression(expr, params, units, dims, copy_arrays, lazy_load): +def test_generic_series_expression(expr, params, units, dims, memmap, lazy_load): gs = GenericSeries(expr, parameters=params, units=units, dims=dims) with write_read_buffer_context( - {"gs": gs}, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + {"gs": gs}, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: gs_file = data["gs"] @@ -746,16 +744,16 @@ def test_hashing(algorithm: str, buffer_size: int): # test_asdf_serialization ---------------------------------------------------------- @staticmethod - @pytest.mark.parametrize("copy_arrays", [True, False]) + @pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize("store_content", [True, False]) - def test_asdf_serialization(copy_arrays, lazy_load, store_content): + def test_asdf_serialization(memmap, lazy_load, store_content): """Test the asdf serialization of the `ExternalFile` class. Parameters ---------- - copy_arrays : bool - If `False`, arrays are accessed via memory mapping whenever possible while + memmap : bool + If `True`, arrays are accessed via memory mapping whenever possible while reading them. lazy_load : bool If `True`, items from the asdf file are not loaded until accessed. @@ -769,7 +767,7 @@ def test_asdf_serialization(copy_arrays, lazy_load, store_content): ) tree = {"file": ef} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: ef_file = data["file"] @@ -801,10 +799,10 @@ def test_asdf_serialization(copy_arrays, lazy_load, store_content): class TestPointCloud: @staticmethod - @pytest.mark.parametrize("copy_arrays", [True, False]) + @pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize("reshape", [True, False]) - def test_asdf_serialization(copy_arrays, lazy_load, reshape): + def test_asdf_serialization(memmap, lazy_load, reshape): time = None coordinates = [ [0.0, 0.0, 0.0], @@ -822,7 +820,7 @@ def test_asdf_serialization(copy_arrays, lazy_load, reshape): pc = SpatialData(coordinates=coordinates, triangles=triangles, time=time) tree = {"point_cloud": pc} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: pc_file = data["point_cloud"] diff --git a/weldx/tests/asdf_tests/test_asdf_measurement.py b/weldx/tests/asdf_tests/test_asdf_measurement.py index abea5ae4b..bc8239fbf 100644 --- a/weldx/tests/asdf_tests/test_asdf_measurement.py +++ b/weldx/tests/asdf_tests/test_asdf_measurement.py @@ -80,17 +80,17 @@ def measurement_chain_with_equipment() -> MeasurementChain: return mc -@pytest.mark.parametrize("copy_arrays", [True, False]) +@pytest.mark.parametrize("memmap", [True, False]) @pytest.mark.parametrize("lazy_load", [True, False]) @pytest.mark.parametrize( "measurement_chain", [measurement_chain_without_equipment(), measurement_chain_with_equipment()], ) -def test_measurement_chain(copy_arrays, lazy_load, measurement_chain): +def test_measurement_chain(memmap, lazy_load, measurement_chain): """Test the asdf serialization of the measurement chain.""" tree = {"m_chain": measurement_chain} with write_read_buffer_context( - tree, open_kwargs={"copy_arrays": copy_arrays, "lazy_load": lazy_load} + tree, open_kwargs={"memmap": memmap, "lazy_load": lazy_load} ) as data: mc_file = data["m_chain"] assert measurement_chain == mc_file