Skip to content

Commit

Permalink
Merge pull request #1537 from braingram/immutable_block_manager
Browse files Browse the repository at this point in the history
Move ndarray conversion to a Converter
  • Loading branch information
braingram authored Sep 8, 2023
2 parents 53b87b4 + 8672815 commit 1c6e5c1
Show file tree
Hide file tree
Showing 87 changed files with 5,400 additions and 3,063 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,6 @@ asdf/_version.py

# airspeed velocity files
.asv

# hypothesis files
.hypothesis
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ The ASDF Standard is at v1.6.0
AsdfFile.write_to and AsdfFile.update kwargs [#1592]
- Fix ``AsdfFile.info`` loading all array data [#1572]
- Blank out AsdfFile.tree on close [#1575]
- Move ndarray to a converter, add ``convert_unknown_ndarray_subclasses``
to ``asdf.config.AsdfConfig``, move ``asdf.Stream`` to
``asdf.tags.core.Stream``, update block storage support for
Converter and update internal block API [#1537]

2.15.1 (2023-08-07)
-------------------
Expand Down
3 changes: 1 addition & 2 deletions asdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,5 @@
from .asdf import open_asdf as open
from .config import config_context, get_config
from .exceptions import ValidationError
from .stream import Stream
from .tags.core import IntegerType
from .tags.core import IntegerType, Stream
from .tags.core.external_reference import ExternalArrayReference
61 changes: 61 additions & 0 deletions asdf/_block/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
"""
Submodule for reading and writing ASDF blocks.
The primary interface to this submodule is ``_block.manager.Manager``
that in some ways mimics the older ``BlockManager``. An instance
of ``Manager`` will be created by each `asdf.AsdfFile` instance.
Internally, this submodule is broken up into:
- low-level:
- ``io``: functions for reading and writing blocks
- ``key``: ``Key`` used to implement ``Store`` (see below)
- ``store``: ``Store`` special key-value store for indexing blocks
- medium-level:
- ``reader``: ``ReadBlock`` and ``read_blocks``
- ``writer``: ``WriteBlock`` and ``write_blocks``
- ``callback``: ``DataCallback`` for reading block data
- ``external``: ``ExternalBlockCache`` for reading external blocks
- ``options``: ``Options`` controlling block storage
- high-level:
- ``manager``: ``Manager`` and associated classes
The low-level ``io`` functions are responsible for reading and writing
bytes compatible with the block format defined in the ASDF standard.
These should be compatible with as wide a variety of file formats as possible
including files that are:
- seekable and non-seekable
- memory mappable
- accessed from a remote server
- stored in memory
- etc
To help organize ASDF block data the ``key`` and ``store`` submodules
provide a special key-value store, ``Store``. ``Store`` uses ``Key``
instances to tie the lifetime of values to the lifetime of objects
in the ASDF tree (without keeping references to the objects) and
allows non-hashable objects to be used as keys. See the ``key``
submodule docstring for more details. One usage of ``Store`` is
for managing ASDF block ``Options``. ``Options`` determine where
and how array data will be written and a single ``Options`` instance
might be associated with several arrays within the ASDF tree
(if the arrays share the same base array). By using a ``Key`` generated
with the base array the block ``Options`` can be stored in a ``Store``
without keeping a reference to the base array and these ``Options``
will be made unavailable if the base array is garbage collected (so
they are not inapproriately assigned to a new array).
The medium-level submodules ``reader`` and ``writer`` each define
a helper class and function for reading or writing blocks:
- ``ReadBlock`` and ``WriteBlock``
- ``read_blocks`` and ``write_blocks``
These abstract some of the complexity of reading and writing blocks
using the low-level API and are the primary means by which the ``Manager``
reads and writes ASDF blocks. Reading of external blocks by the ``Manager``
requires some special handling which is contained in the ``external``
submodule.
To allow for lazy-loading of ASDF block data, ``callback`` defines
``DataCallback`` which allows reading block data even after the blocks
have been rearranged following an update-in-place.
"""
43 changes: 43 additions & 0 deletions asdf/_block/callback.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
"""
A `DataCallback` class is implemented here to allow
for reassignment of the index of an ASDF block corresponding
to a callback.
This is needed so that extension code can generate a callback
during deserialization of an ASDF file that will continue
to be valid even after an `AsdfFile.update` which might
reorder blocks.
To allow for 'low-level' block access needed for ndarray
`DataCallback` can be called with an optional ``_attr``
argument to cache data, access the block header and other
operations that we generally do not want to expose to
extension code.
"""
import weakref


class DataCallback:
"""
A callable object used to read data from an ASDF block
read from an ASDF file.
"""

def __init__(self, index, read_blocks):
self._reassign(index, read_blocks)

def __call__(self, _attr=None):
read_blocks = self._read_blocks_ref()
if read_blocks is None:
msg = "Attempt to read block data from missing block"
raise OSError(msg)
if _attr is None:
return read_blocks[self._index].data
else:
# _attr allows NDArrayType to have low level block access for things
# like reading the header and cached_data
return getattr(read_blocks[self._index], _attr)

def _reassign(self, index, read_blocks):
self._index = index
self._read_blocks_ref = weakref.ref(read_blocks)
4 changes: 4 additions & 0 deletions asdf/_block/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
class BlockIndexError(Exception):
"""
An error occurred while reading or parsing an ASDF block index
"""
64 changes: 64 additions & 0 deletions asdf/_block/external.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
"""
For external blocks, the previous block management
would cache data opened from external files (to return the
same underlying ndarray if the same external block
was referenced more than once). `ExternalBlockCache` is
used here to allow for the same behavior without requiring
the block manager to have a reference to the `AsdfFile`
(that references the block manager).
"""
import os
import urllib

import numpy as np

from asdf import generic_io, util


class UseInternalType:
pass


UseInternal = UseInternalType()


class ExternalBlockCache:
def __init__(self):
self.clear()

def load(self, base_uri, uri, memmap=False, validate_checksums=False):
key = util.get_base_uri(uri)
if key not in self._cache:
resolved_uri = generic_io.resolve_uri(base_uri, uri)
if resolved_uri == "" or resolved_uri == base_uri:
return UseInternal

from asdf import open as asdf_open

with asdf_open(
resolved_uri, "r", lazy_load=False, copy_arrays=True, validate_checksums=validate_checksums
) as af:
blk = af._blocks.blocks[0]
if memmap and blk.header["compression"] == b"\0\0\0\0":
parsed_url = util.patched_urllib_parse.urlparse(resolved_uri)
if parsed_url.scheme == "file":
# deal with leading slash for windows file://
filename = urllib.request.url2pathname(parsed_url.path)
arr = np.memmap(filename, np.uint8, "r", blk.data_offset, blk.cached_data.nbytes)
else:
arr = blk.cached_data
else:
arr = blk.cached_data
self._cache[key] = arr
return self._cache[key]

def clear(self):
self._cache = {}


def relative_uri_for_index(uri, index):
# get the os-native separated path for this uri
path = util.patched_urllib_parse.urlparse(uri).path
dirname, filename = os.path.split(path)
filename = os.path.splitext(filename)[0] + f"{index:04d}.asdf"
return filename
Loading

0 comments on commit 1c6e5c1

Please sign in to comment.