Skip to content

Commit

Permalink
allow memmapped arrays to keep mmap open
Browse files Browse the repository at this point in the history
asdf uses a mmap to memory map files to produce np.memmap
arrays (when copy_arrays is True, the current default).
np.memmap does not check if the backing mmap is closed before
accessing array data and asdf closes the mmap when the asdf
file is closed. This results in segfaults (see issue 1334).

This commit changes when happens to the mmap when the asdf
file is closed. asdf will no longer force closure of the mmap.
This will result in an open file pointer if any array still
references the mmap (the mmap will likely be closed by the
garbage collector when all arrays are collected which matches
the behavior of np.memmap).
  • Loading branch information
braingram committed Dec 7, 2023
1 parent 0885637 commit f14f741
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 10 deletions.
2 changes: 2 additions & 0 deletions asdf/_core/_converters/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ def from_yaml_tree(self, node, tag, ctx):
elif isinstance(source, str):
# external
def data_callback(_attr=None, _ref=weakref.ref(ctx._blocks)):
if _attr not in (None, "cached_data", "data"):
raise AttributeError(f"_attr {_attr} is not supported")
blks = _ref()
if blks is None:
msg = "Failed to resolve reference to AsdfFile to read external block"
Expand Down
3 changes: 1 addition & 2 deletions asdf/_tests/_regtests/test_1523.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ def test_update_corrupts_stream_data(tmp_path):
with asdf.open(fn, mode="rw") as af:
af["a"] = np.arange(1000)
af.update()
# print(af['s']) # segmentation fault
np.testing.assert_array_equal(af["s"], [[1, 2, 3]])

with asdf.open(fn) as af:
# fails as af['s'] == [[116, 101, 111]]
np.testing.assert_array_equal(af["s"], [[1, 2, 3]])
8 changes: 4 additions & 4 deletions asdf/_tests/_regtests/test_1530.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import numpy as np
import pytest

import asdf


@pytest.mark.xfail(reason="fixing this may require subclassing ndarray")
def test_update_with_memmapped_data_can_make_view_data_invalid(tmp_path):
"""
Calling update with memmapped data can create invalid data in memmap views
Expand All @@ -31,5 +29,7 @@ def test_update_with_memmapped_data_can_make_view_data_invalid(tmp_path):
af.update()
np.testing.assert_array_equal(a, af["a"])
np.testing.assert_array_equal(b, af["b"])
assert False
# np.testing.assert_array_equal(va, ov) # segfault
# the view of 'a' taken above ('va') keeps the original memmap open
# and is not a valid view of af['a'] (as this now differs from the
# af['a'] used to generate the view).
assert not np.all(va == ov)
7 changes: 5 additions & 2 deletions asdf/generic_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,8 +786,11 @@ def memmap_array(self, offset, size):
return np.ndarray.__new__(np.memmap, shape=size, offset=offset, dtype="uint8", buffer=self._mmap)

def close_memmap(self):
if hasattr(self, "_mmap") and not self._mmap.closed:
self._mmap.close()
if hasattr(self, "_mmap"):
# we no longer close the _mmap here. This does mean that views of arrays
# that are backed by _mmap will keep the _mmap alive (and open). This is
# the cost of avoiding segfaults as np.memmap does not check if mmap is
# closed.
del self._mmap

def flush_memmap(self):
Expand Down
13 changes: 11 additions & 2 deletions asdf/tags/core/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,17 @@ def _make_array(self):
# memory mapping.
if self._array is not None:
base = util.get_array_base(self._array)
if isinstance(base, np.memmap) and isinstance(base.base, mmap.mmap) and base.base.closed:
self._array = None
if isinstance(base, np.memmap) and isinstance(base.base, mmap.mmap):
# check if the underlying mmap matches the one generated by generic_io
try:
fd = self._data_callback(_attr="_fd")()
except AttributeError:
# external blocks do not have a '_fd' and don't need to be updated
fd = None
if fd is not None:
if getattr(fd, "_mmap", None) is not base.base:
self._array = None
del fd

if self._array is None:
if isinstance(self._source, str):
Expand Down

0 comments on commit f14f741

Please sign in to comment.