From 1890063bb49772a9c5d38ff63d34064318b8e848 Mon Sep 17 00:00:00 2001 From: Ed Slavich Date: Mon, 10 Aug 2020 18:42:40 -0400 Subject: [PATCH] Update matching behavior to be more glob-like --- asdf/schema.py | 3 ++- asdf/tests/test_util.py | 7 +++++++ asdf/util.py | 19 +++++++++++++------ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/asdf/schema.py b/asdf/schema.py index 2fe3a51a8..ffe5432a8 100644 --- a/asdf/schema.py +++ b/asdf/schema.py @@ -79,7 +79,8 @@ def _type_to_tag(type_): def validate_tag(validator, tag_pattern, instance, schema): """ Implements the tag validation directive, which checks the - tag against a pattern which may include '*' wildcards. + tag against a pattern that may include wildcards. See + `asdf.util.uri_match` for details on the matching behavior. """ if hasattr(instance, '_tag'): instance_tag = instance._tag diff --git a/asdf/tests/test_util.py b/asdf/tests/test_util.py index 83164c94d..5616f2370 100644 --- a/asdf/tests/test_util.py +++ b/asdf/tests/test_util.py @@ -49,9 +49,16 @@ def test_patched_urllib_parse(): ("asdf://somewhere.org/tags/foo-1.0", "asdf://somewhere.org/tags/bar-1.0", False), ("asdf://somewhere.org/tags/foo-*", "asdf://somewhere.org/tags/foo-1.0", True), ("asdf://somewhere.org/tags/foo-*", "asdf://somewhere.org/tags/bar-1.0", False), + ("asdf://somewhere.org/tags/foo-*", "asdf://somewhere.org/tags/foo-extras/bar-1.0", False), ("asdf://*/tags/foo-*", "asdf://anywhere.org/tags/foo-4.9", True), ("asdf://*/tags/foo-*", "asdf://anywhere.org/tags/bar-4.9", False), + ("asdf://*/tags/foo-*", "asdf://somewhere.org/tags/foo-extras/bar-4.9", False), + ("asdf://**/*-1.0", "asdf://somewhere.org/tags/foo-1.0", True), + ("asdf://**/*-1.0", "asdf://somewhere.org/tags/foo-2.0", False), + ("asdf://**/*-1.0", "asdf://somewhere.org/tags/foo-extras/bar-1.0", True), + ("asdf://**/*-1.0", "asdf://somewhere.org/tags/foo-extras/bar-2.0", False), ("asdf://somewhere.org/tags/foo-*", None, False), + ("**", None, False), ]) def test_uri_match(pattern, uri, result): assert util.uri_match(pattern, uri) is result diff --git a/asdf/util.py b/asdf/util.py index 926b61b40..bb09f760d 100644 --- a/asdf/util.py +++ b/asdf/util.py @@ -28,7 +28,7 @@ __all__ = ['human_list', 'get_array_base', 'get_base_uri', 'filepath_to_url', 'iter_subclasses', 'calculate_padding', 'resolve_name', 'NotSet', - 'is_primitive'] + 'is_primitive', 'uri_match'] def human_list(l, separator="and"): @@ -456,12 +456,16 @@ def is_primitive(value): def uri_match(pattern, uri): """ Determine if a URI matches a URI pattern with possible - wildcards. + wildcards. The two recognized wildcards: + + "*": match any character except / + + "**": match any character Parameters ---------- pattern : str - URI pattern with * wildcards. + URI pattern. uri : str URI to check against the pattern. @@ -474,7 +478,7 @@ def uri_match(pattern, uri): return False if "*" in pattern: - return _compile_uri_match_pattern(pattern).match(uri) is not None + return _compile_uri_match_pattern(pattern).fullmatch(uri) is not None else: return pattern == uri @@ -483,5 +487,8 @@ def uri_match(pattern, uri): def _compile_uri_match_pattern(pattern): # Escape the pattern in case it contains regex special characters # ('.' in particular is common in URIs) and then replace the - # escaped asterisk with a .* regex matcher. - return re.compile(re.escape(pattern).replace(r"\*", ".*")) + # escaped asterisks with the appropriate regex matchers. + pattern = re.escape(pattern) + pattern = pattern.replace(r"\*\*", r".*") + pattern = pattern.replace(r"\*", r"[^/]*") + return re.compile(pattern)