Skip to content

Commit

Permalink
Merge pull request #1700 from braingram/load_yaml
Browse files Browse the repository at this point in the history
Introduce `asdf.util.load_yaml`
  • Loading branch information
braingram authored Jan 15, 2024
2 parents d9d9974 + bbfa9f2 commit c0eae06
Show file tree
Hide file tree
Showing 6 changed files with 98 additions and 18 deletions.
4 changes: 4 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ The ASDF Standard is at v1.6.0
is removed in an upcoming asdf release will be ``False`` and
asdf will no longer by-default memory map arrays. [#1667]

- Introduce ``asdf.util.load_yaml`` to load just the YAML contents
of an ASDF file (with the option ``tagged`` to load the contents
as a tree of ``asdf.tagged.Tagged`` instances to preserve tags) [#1700]

3.0.1 (2023-10-30)
------------------

Expand Down
20 changes: 10 additions & 10 deletions asdf/_tests/tags/core/tests/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,16 +46,16 @@ def test_integer_storage(tmpdir, inline):
with asdf.AsdfFile(tree) as af:
af.write_to(tmpfile)

with asdf.open(tmpfile, _force_raw_types=True) as rf:
if inline:
assert "source" not in rf.tree["integer"]["words"]
assert "data" in rf.tree["integer"]["words"]
else:
assert "source" in rf.tree["integer"]["words"]
assert "data" not in rf.tree["integer"]["words"]

assert "string" in rf.tree["integer"]
assert rf.tree["integer"]["string"] == str(value)
tree = asdf.util.load_yaml(tmpfile, tagged=True)
if inline:
assert "source" not in tree["integer"]["words"]
assert "data" in tree["integer"]["words"]
else:
assert "source" in tree["integer"]["words"]
assert "data" not in tree["integer"]["words"]

assert "string" in tree["integer"]
assert tree["integer"]["string"] == str(value)


def test_integer_conversion():
Expand Down
3 changes: 1 addition & 2 deletions asdf/_tests/test_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,8 +221,7 @@ def test_internal_reference(tmp_path):
ff.write_to(buff)
buff.seek(0)
ff = asdf.AsdfFile()
content = asdf.AsdfFile()._open_impl(ff, buff, _get_yaml_content=True)
assert b"{$ref: ''}" in content
assert b"{$ref: ''}" in buff.getvalue()


def test_implicit_internal_reference(tmp_path):
Expand Down
27 changes: 27 additions & 0 deletions asdf/_tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import contextlib
import io
import warnings

import numpy as np
import pytest

import asdf
from asdf import generic_io, util
from asdf.exceptions import AsdfDeprecationWarning

Expand Down Expand Up @@ -118,3 +121,27 @@ def test_minversion():

assert util.minversion(yaml, "3.1")
assert util.minversion("yaml", "3.1")


@pytest.mark.parametrize("input_type", ["filename", "binary_file", "generic_file"])
@pytest.mark.parametrize("tagged", [True, False])
def test_load_yaml(tmp_path, input_type, tagged):
fn = tmp_path / "test.asdf"
asdf.AsdfFile({"a": np.zeros(3)}).write_to(fn)

if input_type == "filename":
init = fn
ctx = contextlib.nullcontext()
elif input_type == "binary_file":
init = open(fn, "rb")
ctx = init
elif input_type == "generic_file":
init = generic_io.get_file(fn, "r")
ctx = init

with ctx:
tree = util.load_yaml(init, tagged=tagged)
if tagged:
assert isinstance(tree["a"], asdf.tagged.TaggedDict)
else:
assert not isinstance(tree["a"], asdf.tagged.TaggedDict)
16 changes: 10 additions & 6 deletions asdf/commands/edit.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import yaml

from asdf import constants, generic_io, schema, util
from asdf._asdf import AsdfFile, open_asdf
from asdf._asdf import AsdfFile
from asdf._block import io as bio
from asdf._block.exceptions import BlockIndexError

Expand Down Expand Up @@ -259,11 +259,15 @@ def edit(path):
continue

try:
# Blocks are not read during validation, so this will not raise
# an error even though we're only opening the YAML portion of
# the file.
with open_asdf(io.BytesIO(new_content), _force_raw_types=True):
pass
# check this is an ASDF file
if new_content[: len(constants.ASDF_MAGIC)] != constants.ASDF_MAGIC:
msg = "Does not appear to be a ASDF file."
raise ValueError(msg)
# read the tagged tree (which also checks if the YAML is valid)
tagged_tree = util.load_yaml(io.BytesIO(new_content), tagged=True)
# validate the tagged tree
ctx = AsdfFile(version=new_asdf_version)
schema.validate(tagged_tree, ctx=ctx, reading=True)
except yaml.YAMLError as e:
print("Error: failed to parse updated YAML:")
print_exception(e)
Expand Down
46 changes: 46 additions & 0 deletions asdf/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from urllib.request import pathname2url

import numpy as np
import yaml

# The standard library importlib.metadata returns duplicate entrypoints
# for all python versions up to and including 3.11
Expand Down Expand Up @@ -39,6 +40,7 @@


__all__ = [
"load_yaml",
"human_list",
"get_array_base",
"get_base_uri",
Expand All @@ -55,6 +57,50 @@
]


def load_yaml(init, tagged=False):
"""
Load just the yaml portion of an ASDF file
Parameters
----------
init : filename or file-like
If file-like this must be opened in binary mode.
tagged: bool, optional
Return tree with instances of `asdf.tagged.Tagged` this
can be helpful if the yaml tags are of interest.
If False, the tree will only contain basic python types
(see the pyyaml ``BaseLoader`` documentation).
Returns
-------
tree : dict
Dictionary representing the ASDF tree
"""

from .generic_io import get_file
from .yamlutil import AsdfLoader

if tagged:
loader = AsdfLoader
else:
loader = yaml.CBaseLoader if getattr(yaml, "__with_libyaml__", None) else yaml.BaseLoader

with get_file(init, "r") as gf:
reader = gf.reader_until(
constants.YAML_END_MARKER_REGEX,
7,
"End of YAML marker",
include=True,
)
# The following call to yaml.load is safe because we're
# using only loaders that don't create custom python objects
content = yaml.load(reader, Loader=loader) # noqa: S506
return content


def human_list(line, separator="and"):
"""
Formats a list for human readability.
Expand Down

0 comments on commit c0eae06

Please sign in to comment.