From 32e3cce37a0d528ca5f0292382e0689195191f11 Mon Sep 17 00:00:00 2001 From: Joe Naegele Date: Mon, 16 Dec 2024 20:43:47 +0000 Subject: [PATCH] Add option to download all test data concurrently --- .github/workflows/pingvin-ci.yml | 2 +- conda/run_test.sh | 2 +- justfile | 2 +- test/e2e/conftest.py | 352 ++++++++++++++++++++++++++++++- test/e2e/test_e2e.py | 322 ++-------------------------- 5 files changed, 358 insertions(+), 322 deletions(-) diff --git a/.github/workflows/pingvin-ci.yml b/.github/workflows/pingvin-ci.yml index 0c0a8c62..4209f09e 100644 --- a/.github/workflows/pingvin-ci.yml +++ b/.github/workflows/pingvin-ci.yml @@ -53,7 +53,7 @@ jobs: test_command=". /opt/conda/etc/profile.d/conda.sh \ && conda activate pingvin \ && cd /opt/e2e-test/ \ - && pytest --echo-log-on-failure" + && pytest --download-all --echo-log-on-failure" docker run --rm --gpus=all --entrypoint /bin/bash "${image_name}" -c "$test_command" publish-docker-images: diff --git a/conda/run_test.sh b/conda/run_test.sh index 3384a7ef..441fbb2c 100755 --- a/conda/run_test.sh +++ b/conda/run_test.sh @@ -6,4 +6,4 @@ cd test/e2e/ test -d cases test -f conftest.py -pytest \ No newline at end of file +pytest --download-all \ No newline at end of file diff --git a/justfile b/justfile index a276adf4..fafb536d 100644 --- a/justfile +++ b/justfile @@ -22,4 +22,4 @@ unit-test: build cd build && ctest e2e-test: install - cd test/e2e && pytest + cd test/e2e && pytest --download-all diff --git a/test/e2e/conftest.py b/test/e2e/conftest.py index 4a144afc..8fb0a3cf 100644 --- a/test/e2e/conftest.py +++ b/test/e2e/conftest.py @@ -1,12 +1,23 @@ #!/usr/bin/env python3 -import pytest -import os +from __future__ import annotations + +import concurrent import glob +import hashlib +import os +import pytest +import re import shutil +import socket +import subprocess +import urllib.error +import urllib.request +import yaml from pathlib import Path -from typing import List, Set +from dataclasses import dataclass, field +from typing import Dict, List, Callable, Set, Any def pytest_exception_interact(node, call, report): @@ -34,7 +45,7 @@ def pytest_addoption(parser): ) parser.addoption( '--cache-disable', action='store_true', default=False, - help='Disables local caching of input files.' + help='Disables local caching of data files.' ) parser.addoption( '--cache-path', action='store', default=os.path.join(os.path.dirname(__file__), "data"), @@ -56,32 +67,355 @@ def pytest_addoption(parser): '--save-results', action='store', default="", help='Save Pingvin output and logs in the specified directory.' ) + parser.addoption( + '--download-all', action='store_true', default=False, + help='Download all test data files before running any tests.' + ) -@pytest.fixture +@pytest.fixture(scope="session") def data_host_url(request) -> str: return request.config.getoption('--data-host') -@pytest.fixture +@pytest.fixture(scope="session") def cache_disable(request) -> bool: return request.config.getoption('--cache-disable') -@pytest.fixture +@pytest.fixture(scope="session") def cache_path(request, cache_disable) -> Path: if cache_disable: return None return Path(os.path.abspath(request.config.getoption('--cache-path'))) -@pytest.fixture +@pytest.fixture(scope="session") def ignore_requirements(request) -> Set[str]: reqs = request.config.getoption('--ignore-requirements') if not reqs: return set() return set(reqs) -@pytest.fixture +@pytest.fixture(scope="session") def run_tags(request) -> Set[str]: tags = request.config.getoption('--tags') if not tags: return set() return set(tags) + +@pytest.fixture(scope="session") +def pingvin_capabilities() -> Dict[str,str]: + command = ["pingvin", "--info"] + res = subprocess.run(command, capture_output=True, text=True) + if res.returncode != 0: + pytest.fail(f"Failed to query Pingvin capabilities... {res.args} return {res.returncode}") + + pingvin_info = res.stderr + + value_pattern = r"(?:\s*):(?:\s+)(?P.*)?" + + capability_markers = { + 'version': "Version", + 'build': "Git SHA1", + 'memory': "System Memory size", + 'cuda_support': "CUDA Support", + 'cuda_devices': "CUDA Device count" + } + + plural_capability_markers = { + 'cuda_memory': "CUDA Device Memory size", + } + + def find_value(marker): + pattern = re.compile(marker + value_pattern, re.IGNORECASE) + match = pattern.search(pingvin_info) + if match: + return match['value'] + else: + return None + + def find_plural_values(marker): + pattern = re.compile(marker + value_pattern, re.IGNORECASE) + return [match['value'] for match in pattern.finditer(pingvin_info)] + + capabilities = {key: find_value(marker) for key, marker in capability_markers.items()} + capabilities.update({key: find_plural_values(marker) for key, marker in plural_capability_markers.items()}) + + print(f"Pingvin capabilities: {capabilities}") + return capabilities + + +def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: + """Dynamically generates a test for each test case file""" + all_test_specs = [] + for filename in glob.glob('cases/*.yml'): + spec = Spec.fromfile(filename) + all_test_specs.append(spec) + all_test_specs = sorted(all_test_specs, key=lambda s: s.id()) + metafunc.parametrize('spec', all_test_specs, ids=lambda s: s.id()) + + if metafunc.config.getoption("--download-all"): + if metafunc.config.getoption("--cache-disable"): + pytest.fail("Cannot download all data files when caching is disabled") + all_test_data = {} + for spec in all_test_specs: + all_test_data.update(spec.test_data_files()) + host_url = metafunc.config.getoption("--data-host") + local_dir = metafunc.config.getoption("--cache-path") + Fetcher(host_url, local_dir).fetch(all_test_data) + + +@pytest.fixture +def check_requirements(spec: Spec, pingvin_capabilities: Dict[str,str], ignore_requirements: Set[str], run_tags: Set[str]): + """Checks whether each test case should be run based on Pingvin capabilities and test tags""" + + # Check tags first + if len(run_tags) > 0 and spec.tags != run_tags: + pytest.skip("Test missing required tags") + if 'skip' in spec.tags: + pytest.skip("Test was marked as skipped") + + # Then check requirements + def rules_from_reqs(section): + class Rule: + def __init__(self, capability, validator, message): + self.capability = capability + self.validator = validator + self.message = message + + def is_satisfied(self, capabilities): + value = capabilities.get(self.capability) + return self.validator(value) + + def parse_memory(string): + pattern = re.compile(r"(?P\d+)(?: MB)?") + match = pattern.search(string) + return float(match['value']) + + def is_enabled(value): + return value in ['YES', 'yes', 'True', 'true', '1'] + + def has_more_than(target): + return lambda value: value is not None and parse_memory(str(target)) <= parse_memory(value) + + def is_positive(value): + return value is not None and int(value) > 0 + + def each(validator): + return lambda values: all([validator(value) for value in values]) + + rules = [ + ('system_memory', lambda req: Rule('memory', has_more_than(req), "Not enough system memory.")), + ('gpu_support', lambda req: Rule('cuda_support', is_enabled, "CUDA support required.")), + ('gpu_support', lambda req: Rule('cuda_devices', is_positive, "Not enough CUDA devices.")), + ('gpu_memory', lambda req: Rule('cuda_memory', each(has_more_than(req)), "Not enough graphics memory.")) + ] + + return [(key, rule(section[key])) for key, rule in rules if key in section] + + rules = rules_from_reqs(spec.requirements) + for _, rule in rules: + if rule.capability in ignore_requirements: + continue + if not rule.is_satisfied(pingvin_capabilities): + pytest.skip(rule.message) + +@pytest.fixture +def local_test_data_path(cache_path: Path, tmp_path: Path): + # If cache_path is disabled, the fetched data will live in the test working directory (tmp_path) + # PyTest automatically cleans up these directories after 3 runs + def _local_test_data_path(filename: str) -> str: + if not cache_path: + return os.path.join(tmp_path, filename) + return os.path.join(cache_path, filename) + return _local_test_data_path + +@pytest.fixture +def fetch_test_data(data_host_url: str, local_test_data_path) -> Callable: + """Fetch test data for an individual test case""" + def _fetch_test_data(test_files: Dict[str,str]): + local_dir = local_test_data_path("") + Fetcher(data_host_url, local_dir).fetch(test_files) + return _fetch_test_data + + +class Fetcher: + """Concurrently fetches test data from a remote host.""" + def __init__(self, host_url: str, local_dir: Path): + self.host_url = host_url + self.local_dir = local_dir + + def fetch(self, test_files: Dict[str,str]) -> List[str]: + with concurrent.futures.ThreadPoolExecutor() as executor: + results = list(executor.map(self.download_item, test_files.items())) + return results + + def download_item(self, item): + """Fetches test data from the remote data host and caches it locally""" + filename, checksum = item + + destination = os.path.join(self.local_dir, filename) + need_to_fetch = True + if os.path.exists(destination): + if not os.path.isfile(destination): + raise RuntimeError(f"Destination '{destination}' exists but is not a file") + + if not self.is_valid(destination, checksum): + print(f"Destination '{destination}' exists file but checksum does not match... Forcing download") + else: + need_to_fetch = False + + if need_to_fetch: + print(f"Fetching test data: {filename}") + os.makedirs(os.path.dirname(destination), exist_ok=True) + url = f"{self.host_url}{filename}" + self.urlretrieve(url, destination) + + if not self.is_valid(destination, checksum): + raise RuntimeError(f"Downloaded file '{destination}' does not match checksum") + return destination + + + def is_valid(self, file: Path, digest: str) -> bool: + if not os.path.isfile(file): + return False + def compute_checksum(file: Path) -> str: + md5 = hashlib.new('md5') + with open(file, 'rb') as f: + for chunk in iter(lambda: f.read(65536), b''): + md5.update(chunk) + return md5.hexdigest() + return digest == compute_checksum(file) + + def urlretrieve(self, url: str, filename: str, retries: int = 5) -> str: + if retries <= 0: + raise RuntimeError("Download from {} failed".format(url)) + try: + with urllib.request.urlopen(url, timeout=60) as connection: + with open(filename,'wb') as f: + for chunk in iter(lambda : connection.read(1024*1024), b''): + f.write(chunk) + return connection.headers["Content-MD5"] + except (urllib.error.URLError, ConnectionResetError, socket.timeout) as exc: + print("Retrying connection for file {}, reason: {}".format(filename, str(exc))) + return self.urlretrieve(url, filename, retries=retries-1) + +@dataclass +class Spec(): + """Defines a test case specification""" + + @dataclass + class Job(): + """Defines a job to be run by Pingvin""" + name: str + datafile: str + checksum: str + args: List[str] + + @staticmethod + def fromdict(config: Dict[str, str], name: str) -> Spec.Job: + if not config: + return None + + datafile = config['data'] + if not datafile: + raise ValueError(f"Missing 'data' key in {name} configuration") + + checksum = config['checksum'] + if not checksum: + raise ValueError(f"Missing 'checksum' key in {name} configuration") + + args = [] + if 'run' in config: + for run in config['run']: + args.append(run['args']) + else: + args.append(config['args']) + + return Spec.Job(name=name, datafile=datafile, checksum=checksum, args=args) + + @dataclass + class ImageSeriesTest(): + """Defines a test for an image series comparison""" + image_series: int + scale_comparison_threshold: float + value_comparison_threshold: float + + @dataclass + class Validation(): + """Defines a validation test for the output of a job""" + reference: str + checksum: str + image_series_tests: List[ImageSeriesTest] + + @staticmethod + def fromdict(config: Dict[str, str]) -> Spec.Validation: + if not config: + return None + reference = config['reference'] + if not reference: + raise ValueError("Missing 'reference' key in validation configuration") + checksum = config['checksum'] + if not checksum: + raise ValueError("Missing 'checksum' key in validation configuration") + tests = config['tests'] + if not tests: + raise ValueError("Missing 'tests' key in validation configuration") + if not isinstance(tests, list): + raise ValueError("Key 'tests' should be a list in validation configuration") + + image_series_tests = [] + for test in tests: + num = test['image_series'] + st = test.get('scale_comparison_threshold', 0.01) + vt = test.get('value_comparison_threshold', 0.01) + image_series_tests.append( + Spec.ImageSeriesTest(image_series=num, scale_comparison_threshold=st, value_comparison_threshold=vt) + ) + + return Spec.Validation(reference=reference, checksum=checksum, + image_series_tests=image_series_tests) + + name: str + tags: Set[str] = field(default_factory=set) + requirements: Dict[str, str] = field(default_factory=dict) + + dependency: Spec.Job = None + reconstruction: Spec.Job = None + validation: Spec.Validation = None + + def id(self): + return f"{self.name}" + + def test_data_files(self): + files = {} + if self.dependency is not None: + files[self.dependency.datafile] = self.dependency.checksum + files[self.reconstruction.datafile] = self.reconstruction.checksum + files[self.validation.reference] = self.validation.checksum + return files + + @staticmethod + def fromfile(filename: str) -> Spec: + with open(filename, 'r') as file: + parsed = yaml.safe_load(file) + name = os.path.relpath(filename) + spec = Spec(name=name) + + tags = parsed.get('tags', None) + if not tags: + tags = [] + if not isinstance(tags, list): + tags = [tags] + spec.tags = set(tags) + + requirements = parsed.get('requirements', None) + if not requirements: + requirements = {} + if not isinstance(requirements, dict): + raise ValueError(f"Invalid requirements in {filename}") + spec.requirements = requirements + + spec.dependency = Spec.Job.fromdict(parsed.get('dependency', None), 'dependency') + spec.reconstruction = Spec.Job.fromdict(parsed['reconstruction'], 'reconstruction') + spec.validation = Spec.Validation.fromdict(parsed.get('validation', None)) + + return spec \ No newline at end of file diff --git a/test/e2e/test_e2e.py b/test/e2e/test_e2e.py index 67d4caf9..b634a56c 100644 --- a/test/e2e/test_e2e.py +++ b/test/e2e/test_e2e.py @@ -1,43 +1,25 @@ from __future__ import annotations -import re import os -import glob +import mrd import pytest -import hashlib import itertools import subprocess -import yaml - -import mrd - import numpy import numpy.typing as npt -import socket -import urllib.error -import urllib.request - -from dataclasses import dataclass, field from typing import Dict, List, Callable, Set, Any -all_test_specs = [] -pingvin_capabilities = None +def test_e2e(spec, check_requirements, request, fetch_test_data, process_data, validate_output): + """The main test function for each test case. + One instance is created for each case file in ./cases/. + These instances and related fixtures are defined in conftest.py. + """ + if not request.config.getoption("--download-all"): + fetch_test_data(spec.test_data_files()) -def pytest_generate_tests(metafunc: pytest.Metafunc) -> None: - """Dynamically generates a test for each test case file""" - global pingvin_capabilities - pingvin_capabilities = load_pingvin_capabilities() - print(f"Pingvin capabilities: {pingvin_capabilities}") - for spec in load_test_cases(): - all_test_specs.append(spec) - - -@pytest.mark.parametrize('spec', all_test_specs, ids=lambda s: s.id()) -def test_e2e(spec, check_requirements, process_data, validate_output): - """The main test function for each test case""" if spec.dependency is not None: process_data(spec.dependency) output_file = process_data(spec.reconstruction) @@ -45,100 +27,10 @@ def test_e2e(spec, check_requirements, process_data, validate_output): @pytest.fixture -def check_requirements(spec: Spec, ignore_requirements: Set[str], run_tags: Set[str]): - """Checks whether each test case should be run based on Pingvin capabilities and test tags""" - - # Check tags first - if len(run_tags) > 0 and spec.tags != run_tags: - pytest.skip("Test missing required tags") - if 'skip' in spec.tags: - pytest.skip("Test was marked as skipped") - - # Then check requirements - def rules_from_reqs(section): - class Rule: - def __init__(self, capability, validator, message): - self.capability = capability - self.validator = validator - self.message = message - - def is_satisfied(self, capabilities): - value = capabilities.get(self.capability) - return self.validator(value) - - def parse_memory(string): - pattern = re.compile(r"(?P\d+)(?: MB)?") - match = pattern.search(string) - return float(match['value']) - - def is_enabled(value): - return value in ['YES', 'yes', 'True', 'true', '1'] - - def has_more_than(target): - return lambda value: value is not None and parse_memory(str(target)) <= parse_memory(value) - - def is_positive(value): - return value is not None and int(value) > 0 - - def each(validator): - return lambda values: all([validator(value) for value in values]) - - rules = [ - ('system_memory', lambda req: Rule('memory', has_more_than(req), "Not enough system memory.")), - ('gpu_support', lambda req: Rule('cuda_support', is_enabled, "CUDA support required.")), - ('gpu_support', lambda req: Rule('cuda_devices', is_positive, "Not enough CUDA devices.")), - ('gpu_memory', lambda req: Rule('cuda_memory', each(has_more_than(req)), "Not enough graphics memory.")) - ] - - return [(key, rule(section[key])) for key, rule in rules if key in section] - - rules = rules_from_reqs(spec.requirements) - for _, rule in rules: - if rule.capability in ignore_requirements: - continue - if not rule.is_satisfied(pingvin_capabilities): - pytest.skip(rule.message) - -@pytest.fixture -def fetch_test_data(cache_path: Path, data_host_url: str, tmp_path: Path) -> Callable: - """Fetches test data from the remote data host and caches it locally""" - # If cache_path is disabled, the fetched data will live in the test working directory (tmp_path) - # PyTest automatically cleans up these directories after 3 runs - def _fetch_test_data(filename: str, checksum: str) -> str: - if not cache_path: - destination = os.path.join(tmp_path, filename) - else: - destination = os.path.join(cache_path, filename) - - need_to_fetch = True - if os.path.exists(destination): - if not os.path.isfile(destination): - pytest.fail(f"Destination '{destination}' exists but is not a file") - - if not is_valid(destination, checksum): - print(f"Destination '{destination}' exists file but checksum does not match... Forcing download") - else: - need_to_fetch = False - - if need_to_fetch: - print(f"Fetching test data: {filename}") - os.makedirs(os.path.dirname(destination), exist_ok=True) - url = f"{data_host_url}{filename}" - urlretrieve(url, destination) - - if not is_valid(destination, checksum): - pytest.fail(f"Downloaded file '{destination}' does not match checksum") - - return destination - - return _fetch_test_data - - -@pytest.fixture -def process_data(fetch_test_data, tmp_path): +def process_data(local_test_data_path, tmp_path): """Runs the Pingvin on the input test data, producing an output file.""" def _process_data(job): - input_file = fetch_test_data(job.datafile, job.checksum) + input_file = local_test_data_path(job.datafile) output_file = os.path.join(tmp_path, job.name + ".output.mrd") invocations = [] @@ -165,11 +57,10 @@ def _process_data(job): return _process_data @pytest.fixture -def validate_output(fetch_test_data): +def validate_output(local_test_data_path): """Validates each image (data and header) in the output file against the reference file.""" def _validate_output(spec: Spec.Validation, output_file: str) -> None: - reference_file = fetch_test_data(spec.reference, spec.checksum) - + reference_file = local_test_data_path(spec.reference) reference_images = extract_image_data(reference_file) output_images = extract_image_data(output_file) @@ -185,80 +76,6 @@ def _validate_output(spec: Spec.Validation, output_file: str) -> None: return _validate_output - -def load_pingvin_capabilities() -> Dict[str, str]: - command = ["pingvin", "--info"] - res = subprocess.run(command, capture_output=True, text=True) - if res.returncode != 0: - pytest.fail(f"Failed to query Pingvin capabilities... {res.args} return {res.returncode}") - - pingvin_info = res.stderr - - value_pattern = r"(?:\s*):(?:\s+)(?P.*)?" - - capability_markers = { - 'version': "Version", - 'build': "Git SHA1", - 'memory': "System Memory size", - 'cuda_support': "CUDA Support", - 'cuda_devices': "CUDA Device count" - } - - plural_capability_markers = { - 'cuda_memory': "CUDA Device Memory size", - } - - def find_value(marker): - pattern = re.compile(marker + value_pattern, re.IGNORECASE) - match = pattern.search(pingvin_info) - if match: - return match['value'] - else: - return None - - def find_plural_values(marker): - pattern = re.compile(marker + value_pattern, re.IGNORECASE) - return [match['value'] for match in pattern.finditer(pingvin_info)] - - capabilities = {key: find_value(marker) for key, marker in capability_markers.items()} - capabilities.update({key: find_plural_values(marker) for key, marker in plural_capability_markers.items()}) - - return capabilities - - -def load_test_cases() -> List[Dict[str, str]]: - specs = [] - for filename in glob.glob('cases/*.yml'): - spec = Spec.fromfile(filename) - specs.append(spec) - return sorted(specs, key=lambda s: s.id()) - - -def checksum(file: Path) -> str: - md5 = hashlib.new('md5') - with open(file, 'rb') as f: - for chunk in iter(lambda: f.read(65536), b''): - md5.update(chunk) - return md5.hexdigest() - -def is_valid(file: Path, digest: str) -> bool: - if not os.path.isfile(file): - return False - return digest == checksum(file) - -def urlretrieve(url: str, filename: str, retries: int = 5) -> str: - if retries <= 0: - pytest.fail("Download from {} failed".format(url)) - try: - with urllib.request.urlopen(url, timeout=60) as connection: - with open(filename,'wb') as f: - for chunk in iter(lambda : connection.read(1024*1024), b''): - f.write(chunk) - return connection.headers["Content-MD5"] - except (urllib.error.URLError, ConnectionResetError, socket.timeout) as exc: - print("Retrying connection for file {}, reason: {}".format(filename, str(exc))) - return urlretrieve(url, filename, retries=retries-1) - def extract_image_data(filename: Path) -> Dict[int, Dict[str, Any]]: dataset = {} with mrd.BinaryMrdReader(filename) as reader: @@ -340,118 +157,3 @@ def each(rule): pytest.fail(f"Image header '{attribute}' does not match reference" f" (series {output_header.image_series_index}, index {output_header.image_index})" f" [{getattr(output_header, attribute)} != {getattr(reference_header, attribute)}]") - - -@dataclass -class Spec(): - """Defines a test case specification""" - - @dataclass - class Job(): - """Defines a job to be run by Pingvin""" - name: str - datafile: str - checksum: str - args: List[str] - - @staticmethod - def fromdict(config: Dict[str, str], name: str) -> Spec.Job: - if not config: - return None - - datafile = config['data'] - if not datafile: - raise ValueError(f"Missing 'data' key in {name} configuration") - - checksum = config['checksum'] - if not checksum: - raise ValueError(f"Missing 'checksum' key in {name} configuration") - - args = [] - if 'run' in config: - for run in config['run']: - args.append(run['args']) - else: - args.append(config['args']) - - return Spec.Job(name=name, datafile=datafile, checksum=checksum, args=args) - - @dataclass - class ImageSeriesTest(): - """Defines a test for an image series comparison""" - image_series: int - scale_comparison_threshold: float - value_comparison_threshold: float - - @dataclass - class Validation(): - """Defines a validation test for the output of a job""" - reference: str - checksum: str - image_series_tests: List[ImageSeriesTest] - - @staticmethod - def fromdict(config: Dict[str, str]) -> Spec.Validation: - if not config: - return None - reference = config['reference'] - if not reference: - raise ValueError("Missing 'reference' key in validation configuration") - checksum = config['checksum'] - if not checksum: - raise ValueError("Missing 'checksum' key in validation configuration") - tests = config['tests'] - if not tests: - raise ValueError("Missing 'tests' key in validation configuration") - if not isinstance(tests, list): - raise ValueError("Key 'tests' should be a list in validation configuration") - - image_series_tests = [] - for test in tests: - num = test['image_series'] - st = test.get('scale_comparison_threshold', 0.01) - vt = test.get('value_comparison_threshold', 0.01) - image_series_tests.append( - Spec.ImageSeriesTest(image_series=num, scale_comparison_threshold=st, value_comparison_threshold=vt) - ) - - return Spec.Validation(reference=reference, checksum=checksum, - image_series_tests=image_series_tests) - - name: str - tags: Set[str] = field(default_factory=set) - requirements: Dict[str, str] = field(default_factory=dict) - - dependency: Spec.Job = None - reconstruction: Spec.Job = None - validation: Spec.Validation = None - - def id(self): - return f"{self.name}" - - @staticmethod - def fromfile(filename: str) -> Spec: - with open(filename, 'r') as file: - parsed = yaml.safe_load(file) - name = os.path.relpath(filename) - spec = Spec(name=name) - - tags = parsed.get('tags', None) - if not tags: - tags = [] - if not isinstance(tags, list): - tags = [tags] - spec.tags = set(tags) - - requirements = parsed.get('requirements', None) - if not requirements: - requirements = {} - if not isinstance(requirements, dict): - raise ValueError(f"Invalid requirements in {filename}") - spec.requirements = requirements - - spec.dependency = Spec.Job.fromdict(parsed.get('dependency', None), 'dependency') - spec.reconstruction = Spec.Job.fromdict(parsed['reconstruction'], 'reconstruction') - spec.validation = Spec.Validation.fromdict(parsed.get('validation', None)) - - return spec \ No newline at end of file