diff --git a/.gitignore b/.gitignore index bd139eea0..3ec5e7e5f 100644 --- a/.gitignore +++ b/.gitignore @@ -202,6 +202,7 @@ cython_debug/ !dev-requirements.txt !mkdocs-requirements.txt !src/pynxtools/nexus-version.txt +!src/pynxtools/remote_definitions_url.txt build/ nexusparser.egg-info/PKG-INFO .python-version diff --git a/MANIFEST.in b/MANIFEST.in index 7b8255ac3..0d610492b 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -9,5 +9,5 @@ recursive-include src/pynxtools/definitions/applications/ *.xml recursive-include src/pynxtools/definitions/contributed_definitions/ *.xml include src/pynxtools/definitions/*.xsd include src/pynxtools/nexus-version.txt -include src/pynxtools/definitions/NXDL_VERSION - +include src/pynxtools/remote_definitions_url.txt +include src/pynxtools/definitions/NXDL_VERSION \ No newline at end of file diff --git a/src/pynxtools/__init__.py b/src/pynxtools/__init__.py index fb01ad631..f7db586be 100644 --- a/src/pynxtools/__init__.py +++ b/src/pynxtools/__init__.py @@ -105,3 +105,10 @@ def get_nexus_version_hash() -> str: return MAIN_BRANCH_NAME return version.group(1) + + +def get_definitions_url() -> str: + """Get the URL of the NeXus definitions that are submoduled in pynxtools.""" + url_file = os.path.join(os.path.dirname(__file__), "remote_definitions_url.txt") + with open(url_file, encoding="utf-8") as file: + return file.read().strip() diff --git a/src/pynxtools/_build_wrapper.py b/src/pynxtools/_build_wrapper.py index 6f94553a0..4317d9582 100644 --- a/src/pynxtools/_build_wrapper.py +++ b/src/pynxtools/_build_wrapper.py @@ -56,13 +56,61 @@ def _write_version_to_metadata(): file.write(version) +def get_definitions_submodule_url(): + """ + The URL of the definitions submodule in pynxtools. + """ + submodule_path = "src/pynxtools/definitions" + + try: + # Define the command to run + url_line = run( + [ + "git", + "config", + "--file", + ".git/config", + "--get-regexp", + f"^submodule\\.{submodule_path}\\.url", + ], + text=True, + capture_output=True, + check=True, + ).stdout.strip() + + if url_line: + url = url_line.split(" ")[1] + return url + else: + return None + + except (FileNotFoundError, CalledProcessError): + return None + + +def _write_definitions_remote_url(): + """Write the URL of the definitions remote to file.""" + remote_repo_url = get_definitions_submodule_url() + if remote_repo_url is None or not remote_repo_url: + return + + with open( + os.path.join(os.path.dirname(__file__), "remote_definitions_url.txt"), + "w+", + encoding="utf-8", + ) as file: + file.write(remote_repo_url) + + # pylint: disable=function-redefined def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): """ PEP 517 compliant build wheel hook. - This is a wrapper for setuptools and adds a nexus version file. + This is a wrapper for setuptools and adds a nexus version file and a + file with the remote of the definitions submodule. """ _write_version_to_metadata() + _write_definitions_remote_url() return _orig.build_wheel(wheel_directory, config_settings, metadata_directory) @@ -70,7 +118,9 @@ def build_wheel(wheel_directory, config_settings=None, metadata_directory=None): def build_sdist(sdist_directory, config_settings=None): """ PEP 517 compliant build sdist hook. - This is a wrapper for setuptools and adds a nexus version file. + This is a wrapper for setuptools and adds a nexus version file and a + file with the remote of the definitions submodule. """ _write_version_to_metadata() + _write_definitions_remote_url() return _orig.build_sdist(sdist_directory, config_settings) diff --git a/src/pynxtools/nomad/parser.py b/src/pynxtools/nomad/parser.py index c20c68aa9..8f26bdc61 100644 --- a/src/pynxtools/nomad/parser.py +++ b/src/pynxtools/nomad/parser.py @@ -40,13 +40,34 @@ import pynxtools.nomad.schema as nexus_schema from pynxtools.nexus.nexus import HandleNexus +__REPLACEMENT_FOR_NX = "BS" +__REPLACEMENT_LEN = len(__REPLACEMENT_FOR_NX) + + +def _rename_nx_to_nomad(name: str) -> Optional[str]: + """ + Rename the NXDL name to NOMAD. + For example: NXdata -> BSdata, + except NXobject -> NXobject + """ + if name == "NXobject": + return name + if name is not None: + if name.startswith("NX"): + return name.replace("NX", __REPLACEMENT_FOR_NX) + return name + def _to_group_name(nx_node: ET.Element): """ Normalise the given group name """ # assuming always upper() is incorrect, e.g. NXem_msr is a specific one not EM_MSR! - return nx_node.attrib.get("name", nx_node.attrib["type"][2:].upper()) + grp_nm = nx_node.attrib.get( + "name", nx_node.attrib["type"][__REPLACEMENT_LEN:].upper() + ) + + return grp_nm # noinspection SpellCheckingInspection @@ -203,7 +224,6 @@ def _populate_data( target_name=attr_name, exc_info=exc, ) - if parent_field_name in current.__dict__: quantity = current.__dict__[parent_field_name] if isinstance(quantity, dict): @@ -328,7 +348,8 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613 hdf_path: str = hdf_info["hdf_path"] hdf_node = hdf_info["hdf_node"] - + if nx_def is not None: + nx_def = _rename_nx_to_nomad(nx_def) if nx_path is None: return diff --git a/src/pynxtools/nomad/schema.py b/src/pynxtools/nomad/schema.py index a2bae966d..5fbbb6ac8 100644 --- a/src/pynxtools/nomad/schema.py +++ b/src/pynxtools/nomad/schema.py @@ -16,19 +16,31 @@ # limitations under the License. # +import json import os import os.path +import pickle import re import sys # noinspection PyPep8Naming import xml.etree.ElementTree as ET -from typing import Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Union import numpy as np try: + from nomad import utils from nomad.datamodel import EntryArchive + from nomad.datamodel.metainfo.basesections import ( + BaseSection, + Component, + CompositeSystem, + Entity, + EntityReference, + Instrument, + ) + from nomad.datamodel.metainfo.eln import BasicEln from nomad.metainfo import ( Attribute, Bytes, @@ -59,7 +71,9 @@ "Could not import nomad package. Please install the package 'nomad-lab'." ) from exc +from pynxtools import get_definitions_url from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path +from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_to_nomad # __URL_REGEXP from # https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url @@ -78,10 +92,29 @@ __logger = get_logger(__name__) +__BASESECTIONS_MAP: Dict[str, Any] = { + "BSfabrication": [Instrument], + "BSsample": [CompositeSystem], + "BSsample_component": [Component], + "BSidentifier": [EntityReference], + # "BSobject": BaseSection, +} + + VALIDATE = False __XML_PARENT_MAP: Dict[ET.Element, ET.Element] -__NX_DOC_BASE = "https://manual.nexusformat.org/classes" +__NX_DOC_BASES: Dict[str, str] = { + "https://github.com/nexusformat/definitions.git": "https://manual.nexusformat.org/classes", + "https://github.com/FAIRmat-NFDI/nexus_definitions.git": "https://fairmat-nfdi.github.io/nexus_definitions/classes", +} + +__PACKAGE_NAME = "nexus" +__GROUPING_NAME = "NeXus" + +from nomad import utils + +logger_ = utils.get_logger(__name__) def get_nx_type(nx_type: str) -> Optional[Datatype]: @@ -241,9 +274,14 @@ def __get_documentation_url( if xml_node is None: break + definitions_url = get_definitions_url() + + doc_base = __NX_DOC_BASES.get( + definitions_url, "https://manual.nexusformat.org/classes" + ) nx_package = xml_parent.get("nxdl_base").split("/")[-1] anchor = "-".join([name.lower() for name in reversed(anchor_segments)]) - return f"{__NX_DOC_BASE}/{nx_package}/{anchor_segments[-1]}.html#{anchor}" + return f"{doc_base}/{nx_package}/{anchor_segments[-1]}.html#{anchor}" def __to_section(name: str, **kwargs) -> Section: @@ -254,13 +292,15 @@ def __to_section(name: str, **kwargs) -> Section: This allows to access the metainfo section even before it is generated from the base class nexus definition. """ + + # name = __rename_nx_to_nomad(name) + if name in __section_definitions: section = __section_definitions[name] section.more.update(**kwargs) return section section = Section(validate=VALIDATE, name=name, **kwargs) - __section_definitions[name] = section return section @@ -508,7 +548,7 @@ def __create_group(xml_node: ET.Element, root_section: Section): xml_attrs = group.attrib assert "type" in xml_attrs, "Expecting type to be present" - nx_type = xml_attrs["type"] + nx_type = __rename_nx_to_nomad(xml_attrs["type"]) nx_name = xml_attrs.get("name", nx_type) group_section = Section(validate=VALIDATE, nx_kind="group", name=nx_name) @@ -516,7 +556,9 @@ def __create_group(xml_node: ET.Element, root_section: Section): __attach_base_section(group_section, root_section, __to_section(nx_type)) __add_common_properties(group, group_section) - nx_name = xml_attrs.get("name", nx_type.replace("NX", "").upper()) + nx_name = xml_attrs.get( + "name", nx_type.replace(__REPLACEMENT_FOR_NX, "").upper() + ) group_subsection = SubSection( section_def=group_section, nx_kind="group", @@ -562,13 +604,18 @@ def __create_class_section(xml_node: ET.Element) -> Section: nx_type = xml_attrs["type"] nx_category = xml_attrs["category"] + nx_name = __rename_nx_to_nomad(nx_name) class_section: Section = __to_section( nx_name, nx_kind=nx_type, nx_category=nx_category ) + nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [BaseSection]) + if "extends" in xml_attrs: - base_section = __to_section(xml_attrs["extends"]) - class_section.base_sections = [base_section] + nx_base_sec = __to_section(__rename_nx_to_nomad(xml_attrs["extends"])) + class_section.base_sections = [nx_base_sec] + [ + cls.m_def for cls in nomad_base_sec_cls + ] __add_common_properties(xml_node, class_section) @@ -678,7 +725,7 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: Creates a metainfo package from the given nexus directory. Will generate the respective metainfo definitions from all the nxdl files in that directory. """ - package = Package(name="nexus") + package = Package(name=__PACKAGE_NAME) folder_list = ("base_classes", "contributed_definitions", "applications") paths = [ @@ -690,7 +737,6 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: section = __add_section_from_nxdl(nxdl_file) if section is not None: sections.append(section) - sections.sort(key=lambda x: x.name) for section in sections: @@ -705,9 +751,6 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package: nexus_metainfo_package: Optional[Package] = None # pylint: disable=C0103 -import pickle -import traceback - def save_nexus_schema(suf): nexus_metainfo_package @@ -736,7 +779,7 @@ def init_nexus_metainfo(): # We take the application definitions and create a common parent section that allows # to include nexus in an EntryArchive. - nexus_section = Section(validate=VALIDATE, name="NeXus") + nexus_section = Section(validate=VALIDATE, name=__GROUPING_NAME) # try: # load_nexus_schema('') @@ -746,7 +789,6 @@ def init_nexus_metainfo(): # save_nexus_schema('') # except Exception: # pass - nexus_metainfo_package = __create_package_from_nxdl_directories(nexus_section) EntryArchive.nexus = SubSection(name="nexus", section_def=nexus_section) @@ -783,3 +825,90 @@ def init_nexus_metainfo(): init_nexus_metainfo() + + +def normalize_BSfabrication(self, archive, logger): + """Normalizer for BSfabrication section.""" + current_cls = __section_definitions["BSfabrication"].section_cls + super(current_cls, self).normalize(archive, logger) + self.lab_id = "Hello" + + +def normalize_BSsample_component(self, archive, logger): + """Normalizer for BSsample_component section.""" + current_cls = __section_definitions["BSsample_component"].section_cls + if self.name__field: + self.name = self.name__field + if self.mass__field: + self.mass = self.mass__field + # we may want to add normalisation for mass_fraction (calculating from components) + super(current_cls, self).normalize(archive, logger) + + +def normalize_BSsample(self, archive, logger): + """Normalizer for BSsample section.""" + current_cls = __section_definitions["BSsample"].section_cls + if self.name__field: + self.name = self.name__field + # one could also copy local ids to BSidentifier for search purposes + super(current_cls, self).normalize(archive, logger) + + +def normalize_BSidentifier(self, archive, logger): + """Normalizer for BSidentifier section.""" + + def create_Entity(lab_id, archive, f_name): + entity = BasicEln() + entity.lab_id = lab_id + entity.entity = Entity() + entity.entity.lab_id = lab_id + + with archive.m_context.raw_file(f_name, "w") as f_obj: + json.dump( + {"data": entity.m_to_dict(with_meta=True, include_derived=True)}, + f_obj, + indent=4, + ) + archive.m_context.process_updated_raw_file(f_name) + + def get_entry_reference(archive, f_name): + """Returns a reference to data from entry.""" + from nomad.utils import hash + + upload_id = archive.metadata.upload_id + entry_id = hash(upload_id, f_name) + + return f"/entries/{entry_id}/archive#/data" + + current_cls = __section_definitions["BSidentifier"].section_cls + # super(current_cls, self).normalize(archive, logger) + if self.identifier__field: + logger.info(f"{self.identifier__field} - identifier received") + self.lab_id = self.identifier__field # + "__occurrence" + EntityReference.normalize(self, archive, logger) + if not self.reference: + logger.info(f"{self.lab_id} to be created") + + f_name = f"{current_cls.__name__}_{self.lab_id}.archive.json" + create_Entity(self.lab_id, archive, f_name) + self.reference = get_entry_reference(archive, f_name) + logger.info(f"{self.reference} - referenced directly") + + +__NORMALIZER_MAP: Dict[str, Any] = { + "BSfabrication": normalize_BSfabrication, + "BSsample": normalize_BSsample, + "BSsample_component": normalize_BSsample_component, + "BSidentifier": normalize_BSidentifier, +} + +# Handling nomad BaseSection and other inherited Section from BaseSection +for nx_name, section in __section_definitions.items(): + if nx_name == "NXobject": + continue + + normalize_func = __NORMALIZER_MAP.get(nx_name) + + # Append the normalize method from a function + if normalize_func: + section.section_cls.normalize = normalize_func diff --git a/src/pynxtools/nomad/utils.py b/src/pynxtools/nomad/utils.py new file mode 100644 index 000000000..203e52bc7 --- /dev/null +++ b/src/pynxtools/nomad/utils.py @@ -0,0 +1,35 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Optional + +__REPLACEMENT_FOR_NX = "BS" + + +def __rename_nx_to_nomad(name: str) -> Optional[str]: + """ + Rename the NXDL name to NOMAD. + For example: NXdata -> BSdata, + except NXobject -> NXobject + """ + if name == "NXobject": + return name + if name is not None: + if name.startswith("NX"): + return name.replace("NX", __REPLACEMENT_FOR_NX) + return name diff --git a/src/pynxtools/remote_definitions_url.txt b/src/pynxtools/remote_definitions_url.txt new file mode 100644 index 000000000..8c157e3ad --- /dev/null +++ b/src/pynxtools/remote_definitions_url.txt @@ -0,0 +1 @@ +https://github.com/FAIRmat-NFDI/nexus_definitions.git \ No newline at end of file diff --git a/tests/nexus/test_remote_definitions_url.py b/tests/nexus/test_remote_definitions_url.py new file mode 100644 index 000000000..a06bcef03 --- /dev/null +++ b/tests/nexus/test_remote_definitions_url.py @@ -0,0 +1,21 @@ +""" +Tests the version retrieval for the nexus definitions submodule +""" + +import re + +from pynxtools import get_definitions_url + +# Regex pattern to match a valid GitHub repo URL +GITHUB_URL_REGEX = r"^https://github\.com/[A-Za-z0-9_.-]+/[A-Za-z0-9_.-]+/?$" + + +def test_get_definitions_url(): + """ + Tests if we get a valid GitHub URL from the text value stored in the + remote_definitions_url.txt file. + """ + definitions_url = get_definitions_url() + + assert definitions_url is not None + assert re.match(GITHUB_URL_REGEX, definitions_url) diff --git a/tests/nomad/test_parsing.py b/tests/nomad/test_parsing.py index 2546394d9..bcf3afaae 100644 --- a/tests/nomad/test_parsing.py +++ b/tests/nomad/test_parsing.py @@ -34,42 +34,47 @@ from pynxtools.nomad.parser import NexusParser from pynxtools.nomad.schema import nexus_metainfo_package +__REPLACEMENT_FOR_NX = "BS" + @pytest.mark.parametrize( "path,value", [ pytest.param("name", "nexus"), pytest.param("NXobject.name", "NXobject"), - pytest.param("NXentry.nx_kind", "group"), - pytest.param("NXdetector.real_time__field", "*"), - pytest.param("NXentry.DATA.nx_optional", True), - pytest.param("NXentry.DATA.nx_kind", "group"), - pytest.param("NXentry.DATA.nx_optional", True), - pytest.param("NXdetector.real_time__field.name", "real_time__field"), - pytest.param("NXdetector.real_time__field.nx_type", "NX_NUMBER"), - pytest.param("NXdetector.real_time__field.nx_units", "NX_TIME"), - pytest.param("NXarpes.ENTRY.DATA.nx_optional", False), - pytest.param("NXentry.nx_category", "base"), + pytest.param(f"{__REPLACEMENT_FOR_NX}entry.nx_kind", "group"), + pytest.param(f"{__REPLACEMENT_FOR_NX}detector.real_time__field", "*"), + pytest.param(f"{__REPLACEMENT_FOR_NX}entry.DATA.nx_optional", True), + pytest.param(f"{__REPLACEMENT_FOR_NX}entry.DATA.nx_kind", "group"), + pytest.param(f"{__REPLACEMENT_FOR_NX}entry.DATA.nx_optional", True), + pytest.param( + f"{__REPLACEMENT_FOR_NX}detector.real_time__field.name", "real_time__field" + ), + pytest.param( + f"{__REPLACEMENT_FOR_NX}detector.real_time__field.nx_type", "NX_NUMBER" + ), + pytest.param( + f"{__REPLACEMENT_FOR_NX}detector.real_time__field.nx_units", "NX_TIME" + ), + pytest.param(f"{__REPLACEMENT_FOR_NX}arpes.ENTRY.DATA.nx_optional", False), + pytest.param(f"{__REPLACEMENT_FOR_NX}entry.nx_category", "base"), pytest.param( - "NXdispersion_table.refractive_index__field.nx_type", "NX_COMPLEX" + f"{__REPLACEMENT_FOR_NX}dispersion_table.refractive_index__field.nx_type", + "NX_COMPLEX", ), pytest.param( - "NXdispersive_material.ENTRY.dispersion_x." + f"{__REPLACEMENT_FOR_NX}dispersive_material.ENTRY.dispersion_x." "DISPERSION_TABLE.refractive_index__field.nx_type", "NX_COMPLEX", ), - pytest.param("NXapm.nx_category", "application"), + pytest.param(f"{__REPLACEMENT_FOR_NX}apm.nx_category", "application"), ], ) def test_assert_nexus_metainfo(path: str, value: Any): """ Test the existence of nexus metainfo - pytest.param('NXdispersive_material.inner_section_definitions[0].sub_sections[1].sub_section.inner_section_definitions[0].quantities[4].more["nx_type"] - - - """ current = nexus_metainfo_package for name in path.split("."): @@ -120,12 +125,13 @@ def test_nexus_example(): example_data = "src/pynxtools/data/201805_WSe2_arpes.nxs" NexusParser().parse(example_data, archive, get_logger(__name__)) - assert archive.nexus.NXarpes.ENTRY[0].SAMPLE[0].pressure__field == ureg.Quantity( + arpes_obj = getattr(archive.nexus, f"{__REPLACEMENT_FOR_NX}arpes") + + assert arpes_obj.ENTRY[0].SAMPLE[0].pressure__field == ureg.Quantity( "3.27e-10*millibar" ) - instrument = archive.nexus.NXarpes.ENTRY[0].INSTRUMENT[0] - + instrument = arpes_obj.ENTRY[0].INSTRUMENT[0] assert instrument.nx_name == "instrument" assert instrument.monochromator.energy__field == ureg.Quantity( "36.49699020385742*electron_volt" @@ -139,8 +145,7 @@ def test_nexus_example(): assert instrument.SOURCE[0].mode__field is None # wrong inherited ENUM for extended field - 'Free Electron Laser' assert instrument.SOURCE[0].type__field is None - - data = archive.nexus.NXarpes.ENTRY[0].DATA[0] + data = arpes_obj.ENTRY[0].DATA[0] assert len(data.AXISNAME__field) == 3 # there is still a bug in the variadic name resolution, so skip these # assert data.delays__field is not None