Skip to content

Commit

Permalink
Merge pull request #1668 from braingram/memmap_open
Browse files Browse the repository at this point in the history
Allow memmapped arrays to keep the backing mmap file open
  • Loading branch information
braingram authored Dec 18, 2023
2 parents a6ec390 + 0c0aef4 commit 8078076
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 15 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ The ASDF Standard is at v1.6.0
- Deprecate automatic calling of ``AsdfFile.find_references`` during
``AsdfFile.__init__`` and ``asdf.open`` [#1708]

- Allow views of memmapped arrays to keep the backing mmap
open to avoid segfaults [#1668]

3.0.1 (2023-10-30)
------------------

Expand Down
4 changes: 2 additions & 2 deletions asdf/_asdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -1070,9 +1070,9 @@ def update(
def rewrite():
self._fd.seek(0)
self._serial_write(self._fd, pad_blocks, include_block_index)
self._fd.truncate()
if self._fd.can_memmap():
self._fd.close_memmap()
self._fd.truncate()

# if we have no read blocks, we can just call write_to as no internal blocks are reused
if len(self._blocks.blocks) == 0:
Expand Down Expand Up @@ -1107,9 +1107,9 @@ def rewrite():

# close memmap to trigger arrays to reload themselves
self._fd.seek(end_of_file)
self._fd.truncate()
if self._fd.can_memmap():
self._fd.close_memmap()
self._fd.truncate()

finally:
self._post_write(fd)
Expand Down
2 changes: 2 additions & 0 deletions asdf/_core/_converters/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,8 @@ def from_yaml_tree(self, node, tag, ctx):
elif isinstance(source, str):
# external
def data_callback(_attr=None, _ref=weakref.ref(ctx._blocks)):
if _attr not in (None, "cached_data", "data"):
raise AttributeError(f"_attr {_attr} is not supported")
blks = _ref()
if blks is None:
msg = "Failed to resolve reference to AsdfFile to read external block"
Expand Down
21 changes: 21 additions & 0 deletions asdf/_tests/_regtests/test_1334.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import numpy as np

import asdf


def test_memmap_view_access_after_close(tmp_path):
"""
Accessing a view of a memmap after the asdf file
is closed results in a segfault
https://github.com/asdf-format/asdf/issues/1334
"""

a = np.ones(10, dtype="uint8")
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"a": a}).write_to(fn)

with asdf.open(fn, copy_arrays=False) as af:
v = af["a"][:5]

assert np.all(v == 1)
3 changes: 1 addition & 2 deletions asdf/_tests/_regtests/test_1523.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ def test_update_corrupts_stream_data(tmp_path):
with asdf.open(fn, mode="rw") as af:
af["a"] = np.arange(1000)
af.update()
# print(af['s']) # segmentation fault
np.testing.assert_array_equal(af["s"], [[1, 2, 3]])

with asdf.open(fn) as af:
# fails as af['s'] == [[116, 101, 111]]
np.testing.assert_array_equal(af["s"], [[1, 2, 3]])
8 changes: 4 additions & 4 deletions asdf/_tests/_regtests/test_1530.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import numpy as np
import pytest

import asdf


@pytest.mark.xfail(reason="fixing this may require subclassing ndarray")
def test_update_with_memmapped_data_can_make_view_data_invalid(tmp_path):
"""
Calling update with memmapped data can create invalid data in memmap views
Expand All @@ -31,5 +29,7 @@ def test_update_with_memmapped_data_can_make_view_data_invalid(tmp_path):
af.update()
np.testing.assert_array_equal(a, af["a"])
np.testing.assert_array_equal(b, af["b"])
assert False
# np.testing.assert_array_equal(va, ov) # segfault
# the view of 'a' taken above ('va') keeps the original memmap open
# and is not a valid view of af['a'] (as this now differs from the
# af['a'] used to generate the view).
assert not np.all(va == ov)
19 changes: 14 additions & 5 deletions asdf/generic_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -738,9 +738,6 @@ def fast_forward(self, size):
self.seek(size, SEEK_CUR)

def truncate(self, size=None):
# windows supports truncating as long as the file not opened
# more than once. So this must be called after closing all
# memmaps
if size is None:
self._fd.truncate()
else:
Expand Down Expand Up @@ -786,8 +783,11 @@ def memmap_array(self, offset, size):
return np.ndarray.__new__(np.memmap, shape=size, offset=offset, dtype="uint8", buffer=self._mmap)

def close_memmap(self):
if hasattr(self, "_mmap") and not self._mmap.closed:
self._mmap.close()
if hasattr(self, "_mmap"):
# we no longer close the _mmap here. This does mean that views of arrays
# that are backed by _mmap will keep the _mmap alive (and open). This is
# the cost of avoiding segfaults as np.memmap does not check if mmap is
# closed.
del self._mmap

def flush_memmap(self):
Expand Down Expand Up @@ -834,6 +834,15 @@ def close(self):
if self._close:
self._fix_permissions()

def truncate(self, size=None):
# windows supports truncating as long as the file not opened
# more than once. So this must be called after closing all
# memmaps
if sys.platform.startswith("win") and hasattr(self, "_mmap"):
self._mmap.close()
self.close_memmap()
super().truncate(size=size)


class MemoryIO(RandomAccessFile):
"""
Expand Down
13 changes: 11 additions & 2 deletions asdf/tags/core/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,8 +259,17 @@ def _make_array(self):
# memory mapping.
if self._array is not None:
base = util.get_array_base(self._array)
if isinstance(base, np.memmap) and isinstance(base.base, mmap.mmap) and base.base.closed:
self._array = None
if isinstance(base, np.memmap) and isinstance(base.base, mmap.mmap):
# check if the underlying mmap matches the one generated by generic_io
try:
fd = self._data_callback(_attr="_fd")()
except AttributeError:
# external blocks do not have a '_fd' and don't need to be updated
fd = None
if fd is not None:
if getattr(fd, "_mmap", None) is not base.base:
self._array = None
del fd

if self._array is None:
if isinstance(self._source, str):
Expand Down

0 comments on commit 8078076

Please sign in to comment.