-
Notifications
You must be signed in to change notification settings - Fork 58
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1537 from braingram/immutable_block_manager
Move ndarray conversion to a Converter
- Loading branch information
Showing
87 changed files
with
5,400 additions
and
3,063 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -59,3 +59,6 @@ asdf/_version.py | |
|
||
# airspeed velocity files | ||
.asv | ||
|
||
# hypothesis files | ||
.hypothesis |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
""" | ||
Submodule for reading and writing ASDF blocks. | ||
The primary interface to this submodule is ``_block.manager.Manager`` | ||
that in some ways mimics the older ``BlockManager``. An instance | ||
of ``Manager`` will be created by each `asdf.AsdfFile` instance. | ||
Internally, this submodule is broken up into: | ||
- low-level: | ||
- ``io``: functions for reading and writing blocks | ||
- ``key``: ``Key`` used to implement ``Store`` (see below) | ||
- ``store``: ``Store`` special key-value store for indexing blocks | ||
- medium-level: | ||
- ``reader``: ``ReadBlock`` and ``read_blocks`` | ||
- ``writer``: ``WriteBlock`` and ``write_blocks`` | ||
- ``callback``: ``DataCallback`` for reading block data | ||
- ``external``: ``ExternalBlockCache`` for reading external blocks | ||
- ``options``: ``Options`` controlling block storage | ||
- high-level: | ||
- ``manager``: ``Manager`` and associated classes | ||
The low-level ``io`` functions are responsible for reading and writing | ||
bytes compatible with the block format defined in the ASDF standard. | ||
These should be compatible with as wide a variety of file formats as possible | ||
including files that are: | ||
- seekable and non-seekable | ||
- memory mappable | ||
- accessed from a remote server | ||
- stored in memory | ||
- etc | ||
To help organize ASDF block data the ``key`` and ``store`` submodules | ||
provide a special key-value store, ``Store``. ``Store`` uses ``Key`` | ||
instances to tie the lifetime of values to the lifetime of objects | ||
in the ASDF tree (without keeping references to the objects) and | ||
allows non-hashable objects to be used as keys. See the ``key`` | ||
submodule docstring for more details. One usage of ``Store`` is | ||
for managing ASDF block ``Options``. ``Options`` determine where | ||
and how array data will be written and a single ``Options`` instance | ||
might be associated with several arrays within the ASDF tree | ||
(if the arrays share the same base array). By using a ``Key`` generated | ||
with the base array the block ``Options`` can be stored in a ``Store`` | ||
without keeping a reference to the base array and these ``Options`` | ||
will be made unavailable if the base array is garbage collected (so | ||
they are not inapproriately assigned to a new array). | ||
The medium-level submodules ``reader`` and ``writer`` each define | ||
a helper class and function for reading or writing blocks: | ||
- ``ReadBlock`` and ``WriteBlock`` | ||
- ``read_blocks`` and ``write_blocks`` | ||
These abstract some of the complexity of reading and writing blocks | ||
using the low-level API and are the primary means by which the ``Manager`` | ||
reads and writes ASDF blocks. Reading of external blocks by the ``Manager`` | ||
requires some special handling which is contained in the ``external`` | ||
submodule. | ||
To allow for lazy-loading of ASDF block data, ``callback`` defines | ||
``DataCallback`` which allows reading block data even after the blocks | ||
have been rearranged following an update-in-place. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
""" | ||
A `DataCallback` class is implemented here to allow | ||
for reassignment of the index of an ASDF block corresponding | ||
to a callback. | ||
This is needed so that extension code can generate a callback | ||
during deserialization of an ASDF file that will continue | ||
to be valid even after an `AsdfFile.update` which might | ||
reorder blocks. | ||
To allow for 'low-level' block access needed for ndarray | ||
`DataCallback` can be called with an optional ``_attr`` | ||
argument to cache data, access the block header and other | ||
operations that we generally do not want to expose to | ||
extension code. | ||
""" | ||
import weakref | ||
|
||
|
||
class DataCallback: | ||
""" | ||
A callable object used to read data from an ASDF block | ||
read from an ASDF file. | ||
""" | ||
|
||
def __init__(self, index, read_blocks): | ||
self._reassign(index, read_blocks) | ||
|
||
def __call__(self, _attr=None): | ||
read_blocks = self._read_blocks_ref() | ||
if read_blocks is None: | ||
msg = "Attempt to read block data from missing block" | ||
raise OSError(msg) | ||
if _attr is None: | ||
return read_blocks[self._index].data | ||
else: | ||
# _attr allows NDArrayType to have low level block access for things | ||
# like reading the header and cached_data | ||
return getattr(read_blocks[self._index], _attr) | ||
|
||
def _reassign(self, index, read_blocks): | ||
self._index = index | ||
self._read_blocks_ref = weakref.ref(read_blocks) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
class BlockIndexError(Exception): | ||
""" | ||
An error occurred while reading or parsing an ASDF block index | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
""" | ||
For external blocks, the previous block management | ||
would cache data opened from external files (to return the | ||
same underlying ndarray if the same external block | ||
was referenced more than once). `ExternalBlockCache` is | ||
used here to allow for the same behavior without requiring | ||
the block manager to have a reference to the `AsdfFile` | ||
(that references the block manager). | ||
""" | ||
import os | ||
import urllib | ||
|
||
import numpy as np | ||
|
||
from asdf import generic_io, util | ||
|
||
|
||
class UseInternalType: | ||
pass | ||
|
||
|
||
UseInternal = UseInternalType() | ||
|
||
|
||
class ExternalBlockCache: | ||
def __init__(self): | ||
self.clear() | ||
|
||
def load(self, base_uri, uri, memmap=False, validate_checksums=False): | ||
key = util.get_base_uri(uri) | ||
if key not in self._cache: | ||
resolved_uri = generic_io.resolve_uri(base_uri, uri) | ||
if resolved_uri == "" or resolved_uri == base_uri: | ||
return UseInternal | ||
|
||
from asdf import open as asdf_open | ||
|
||
with asdf_open( | ||
resolved_uri, "r", lazy_load=False, copy_arrays=True, validate_checksums=validate_checksums | ||
) as af: | ||
blk = af._blocks.blocks[0] | ||
if memmap and blk.header["compression"] == b"\0\0\0\0": | ||
parsed_url = util.patched_urllib_parse.urlparse(resolved_uri) | ||
if parsed_url.scheme == "file": | ||
# deal with leading slash for windows file:// | ||
filename = urllib.request.url2pathname(parsed_url.path) | ||
arr = np.memmap(filename, np.uint8, "r", blk.data_offset, blk.cached_data.nbytes) | ||
else: | ||
arr = blk.cached_data | ||
else: | ||
arr = blk.cached_data | ||
self._cache[key] = arr | ||
return self._cache[key] | ||
|
||
def clear(self): | ||
self._cache = {} | ||
|
||
|
||
def relative_uri_for_index(uri, index): | ||
# get the os-native separated path for this uri | ||
path = util.patched_urllib_parse.urlparse(uri).path | ||
dirname, filename = os.path.split(path) | ||
filename = os.path.splitext(filename)[0] + f"{index:04d}.asdf" | ||
return filename |
Oops, something went wrong.