Skip to content

Commit

Permalink
Merge pull request #453 from FAIRmat-NFDI/avoid-name-conflict
Browse files Browse the repository at this point in the history
Avoid name conflict with BaseSection classes
  • Loading branch information
lukaspie authored Oct 18, 2024
2 parents e267676 + ab40557 commit a287b39
Show file tree
Hide file tree
Showing 7 changed files with 218 additions and 120 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Install nomad
if: "${{ matrix.python_version != '3.8' && matrix.python_version != '3.12'}}"
run: |
uv pip install nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git
uv pip install git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@fixes_resolve_variadic_name
- name: Install pynx
run: |
uv pip install ".[dev]"
Expand Down
24 changes: 14 additions & 10 deletions src/pynxtools/nomad/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
import pynxtools.nomad.schema as nexus_schema
from pynxtools.nexus.nexus import HandleNexus
from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX
from pynxtools.nomad.utils import __rename_nx_to_nomad as rename_nx_to_nomad
from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad


def _to_group_name(nx_node: ET.Element):
Expand Down Expand Up @@ -93,6 +93,8 @@ def _to_section(
# no need to change section for quantities and attributes
return current

nomad_def_name = rename_nx_for_nomad(nomad_def_name, is_group=True)

# for groups, get the definition from the package
new_def = current.m_def.all_sub_sections[nomad_def_name]

Expand Down Expand Up @@ -218,7 +220,7 @@ def _populate_data(
"setting attribute attempt before creating quantity"
)
current.m_set_quantity_attribute(
metainfo_def, attr_name, attr_value, quantity=quantity
quantity.name, attr_name, attr_value
)
except Exception as e:
self._logger.warning(
Expand Down Expand Up @@ -292,26 +294,26 @@ def _populate_data(
try:
current.m_set(metainfo_def, field)
current.m_set_quantity_attribute(
metainfo_def, "m_nx_data_path", hdf_node.name, quantity=field
data_instance_name, "m_nx_data_path", hdf_node.name
)
current.m_set_quantity_attribute(
metainfo_def, "m_nx_data_file", self.nxs_fname, quantity=field
data_instance_name, "m_nx_data_file", self.nxs_fname
)
if field_stats is not None:
# TODO _add_additional_attributes function has created these nx_data_*
# attributes speculatively already so if the field_stats is None
# this will cause unpopulated attributes in the GUI
current.m_set_quantity_attribute(
metainfo_def, "nx_data_mean", field_stats[0], quantity=field
data_instance_name, "nx_data_mean", field_stats[0]
)
current.m_set_quantity_attribute(
metainfo_def, "nx_data_var", field_stats[1], quantity=field
data_instance_name, "nx_data_var", field_stats[1]
)
current.m_set_quantity_attribute(
metainfo_def, "nx_data_min", field_stats[2], quantity=field
data_instance_name, "nx_data_min", field_stats[2]
)
current.m_set_quantity_attribute(
metainfo_def, "nx_data_max", field_stats[3], quantity=field
data_instance_name, "nx_data_max", field_stats[3]
)
except Exception as e:
self._logger.warning(
Expand All @@ -333,7 +335,8 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613
hdf_path: str = hdf_info["hdf_path"]
hdf_node = hdf_info["hdf_node"]
if nx_def is not None:
nx_def = rename_nx_to_nomad(nx_def)
nx_def = rename_nx_for_nomad(nx_def)

if nx_path is None:
return

Expand Down Expand Up @@ -473,7 +476,8 @@ def parse(
child_archives: Dict[str, EntryArchive] = None,
) -> None:
self.archive = archive
self.nx_root = nexus_schema.NeXus()
self.nx_root = nexus_schema.NeXus() # type: ignore # pylint: disable=no-member

self.archive.data = self.nx_root
self._logger = logger if logger else get_logger(__name__)
self._clear_class_refs()
Expand Down
22 changes: 12 additions & 10 deletions src/pynxtools/nomad/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@

from pynxtools import get_definitions_url
from pynxtools.definitions.dev_tools.utils.nxdl_utils import get_nexus_definitions_path
from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_to_nomad
from pynxtools.nomad.utils import __REPLACEMENT_FOR_NX, __rename_nx_for_nomad

# __URL_REGEXP from
# https://stackoverflow.com/questions/3809401/what-is-a-good-regular-expression-to-match-a-url
Expand All @@ -83,6 +83,7 @@
r"(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+"
r'(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?«»“”‘’]))'
)

# noinspection HttpUrlsUsage
__XML_NAMESPACES = {"nx": "http://definition.nexusformat.org/nxdl/3.1"}

Expand Down Expand Up @@ -294,8 +295,6 @@ def __to_section(name: str, **kwargs) -> Section:
class nexus definition.
"""

# name = __rename_nx_to_nomad(name)

if name in __section_definitions:
section = __section_definitions[name]
section.more.update(**kwargs)
Expand Down Expand Up @@ -373,7 +372,7 @@ def __create_attributes(xml_node: ET.Element, definition: Union[Section, Quantit
todo: account for more attributes of attribute, e.g., default, minOccurs
"""
for attribute in xml_node.findall("nx:attribute", __XML_NAMESPACES):
name = attribute.get("name") + "__attribute"
name = __rename_nx_for_nomad(attribute.get("name"), is_attribute=True)

nx_enum = __get_enumeration(attribute)
if nx_enum:
Expand Down Expand Up @@ -466,7 +465,8 @@ def __create_field(xml_node: ET.Element, container: Section) -> Quantity:

# name
assert "name" in xml_attrs, "Expecting name to be present"
name = xml_attrs["name"] + "__field"

name = __rename_nx_for_nomad(xml_attrs["name"], is_field=True)

# type
nx_type = xml_attrs.get("type", "NX_CHAR")
Expand Down Expand Up @@ -549,21 +549,23 @@ def __create_group(xml_node: ET.Element, root_section: Section):
xml_attrs = group.attrib

assert "type" in xml_attrs, "Expecting type to be present"
nx_type = __rename_nx_to_nomad(xml_attrs["type"])
nx_type = __rename_nx_for_nomad(xml_attrs["type"])

nx_name = xml_attrs.get("name", nx_type)
group_section = Section(validate=VALIDATE, nx_kind="group", name=nx_name)
section_name = __rename_nx_for_nomad(nx_name, is_group=True)
group_section = Section(validate=VALIDATE, nx_kind="group", name=section_name)

__attach_base_section(group_section, root_section, __to_section(nx_type))
__add_common_properties(group, group_section)

nx_name = xml_attrs.get(
"name", nx_type.replace(__REPLACEMENT_FOR_NX, "").upper()
)
subsection_name = __rename_nx_for_nomad(nx_name, is_group=True)
group_subsection = SubSection(
section_def=group_section,
nx_kind="group",
name=nx_name,
name=subsection_name,
repeats=__if_repeats(nx_name, xml_attrs.get("maxOccurs", "0")),
variable=__if_template(nx_name),
)
Expand Down Expand Up @@ -605,15 +607,15 @@ def __create_class_section(xml_node: ET.Element) -> Section:
nx_type = xml_attrs["type"]
nx_category = xml_attrs["category"]

nx_name = __rename_nx_to_nomad(nx_name)
nx_name = __rename_nx_for_nomad(nx_name)
class_section: Section = __to_section(
nx_name, nx_kind=nx_type, nx_category=nx_category
)

nomad_base_sec_cls = __BASESECTIONS_MAP.get(nx_name, [BaseSection])

if "extends" in xml_attrs:
nx_base_sec = __to_section(__rename_nx_to_nomad(xml_attrs["extends"]))
nx_base_sec = __to_section(__rename_nx_for_nomad(xml_attrs["extends"]))
class_section.base_sections = [nx_base_sec] + [
cls.m_def for cls in nomad_base_sec_cls
]
Expand Down
62 changes: 55 additions & 7 deletions src/pynxtools/nomad/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,64 @@

__REPLACEMENT_FOR_NX = ""

# This is a list of NeXus group names that are not allowed because they are defined as quantities in the BaseSection class.
UNALLOWED_GROUP_NAMES = {"name", "datetime", "lab_id", "description"}

def __rename_nx_to_nomad(name: str) -> Optional[str]:

def __rename_classes_in_nomad(nx_name: str) -> Optional[str]:
"""
Modify group names that conflict with NOMAD due to being defined as quantities
in the BaseSection class by appending '__group' to those names.
Some quantities names names are reserved in the BaseSection class (or even higher up in metainfo),
and thus require renaming to avoid collisions.
Args:
nx_name (str): The original group name.
Returns:
Optional[str]: The modified group name with '__group' appended if it's in
UNALLOWED_GROUP_NAMES, or the original name if no change is needed.
"""
return nx_name + "__group" if nx_name in UNALLOWED_GROUP_NAMES else nx_name


def __rename_nx_for_nomad(
name: str,
is_group: bool = False,
is_field: bool = False,
is_attribute: bool = False,
) -> Optional[str]:
"""
Rename the NXDL name to NOMAD.
For example: NXdata -> data,
except NXobject -> NXobject
Rename NXDL names for compatibility with NOMAD, applying specific rules
based on the type of the NeXus concept. (group, field, or attribute).
- NXobject is unchanged.
- NX-prefixed names (e.g., NXdata) are renamed by replacing 'NX' with a custom string.
- Group names are passed to __rename_classes_in_nomad(), and the result is capitalized.
- Fields and attributes have '__field' or '__attribute' appended, respectively.
Args:
name (str): The NXDL name.
is_group (bool): Whether the name represents a group.
is_field (bool): Whether the name represents a field.
is_attribute (bool): Whether the name represents an attribute.
Returns:
Optional[str]: The renamed NXDL name, with group names capitalized,
or None if input is invalid.
"""
if name == "NXobject":
return name
if name is not None:
if name.startswith("NX"):
return __REPLACEMENT_FOR_NX + name[2:]

if name and name.startswith("NX"):
name = __REPLACEMENT_FOR_NX + name[2:]

if is_group:
name = __rename_classes_in_nomad(name)
elif is_field:
name += "__field"
elif is_attribute:
name += "__attribute"

return name
Binary file added tests/data/nomad/NXlauetof.hdf5
Binary file not shown.
117 changes: 117 additions & 0 deletions tests/nomad/test_metainfo_schema.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""This is a code that performs several tests on nexus tool"""

#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import pytest

try:
from nomad.metainfo import Section
except ImportError:
pytest.skip("nomad not installed", allow_module_level=True)

from typing import Any

from pynxtools.nomad.schema import nexus_metainfo_package
from pynxtools.nomad.utils import __rename_nx_for_nomad as rename_nx_for_nomad


@pytest.mark.parametrize(
"path,value",
[
pytest.param("name", "nexus"),
pytest.param("NXobject.name", "NXobject"),
pytest.param(rename_nx_for_nomad("NXentry") + ".nx_kind", "group"),
pytest.param(rename_nx_for_nomad("NXdetector") + ".real_time__field", "*"),
pytest.param(rename_nx_for_nomad("NXentry") + ".DATA.nx_optional", True),
pytest.param(rename_nx_for_nomad("NXentry") + ".DATA.nx_kind", "group"),
pytest.param(rename_nx_for_nomad("NXentry") + ".DATA.nx_optional", True),
pytest.param(
rename_nx_for_nomad("NXdetector") + ".real_time__field.name",
"real_time__field",
),
pytest.param(
rename_nx_for_nomad("NXdetector") + ".real_time__field.nx_type", "NX_NUMBER"
),
pytest.param(
rename_nx_for_nomad("NXdetector") + ".real_time__field.nx_units", "NX_TIME"
),
pytest.param(rename_nx_for_nomad("NXarpes") + ".ENTRY.DATA.nx_optional", False),
pytest.param(rename_nx_for_nomad("NXentry") + ".nx_category", "base"),
pytest.param(
rename_nx_for_nomad("NXdispersion_table")
+ ".refractive_index__field.nx_type",
"NX_COMPLEX",
),
pytest.param(
rename_nx_for_nomad("NXdispersive_material")
+ ".ENTRY.dispersion_x."
+ "DISPERSION_TABLE.refractive_index__field.nx_type",
"NX_COMPLEX",
),
pytest.param(rename_nx_for_nomad("NXapm") + ".nx_category", "application"),
],
)
def test_assert_nexus_metainfo(path: str, value: Any):
"""
Test the existence of nexus metainfo
pytest.param('NXdispersive_material.inner_section_definitions[0].sub_sections[1].sub_section.inner_section_definitions[0].quantities[4].more["nx_type"]
"""
current = nexus_metainfo_package
for name in path.split("."):
elements: list = []
if name.endswith("__field"):
subelement_list = getattr(current, "quantities", None)
if subelement_list:
elements += subelement_list
else:
subelement_list = getattr(current, "section_definitions", None)
if subelement_list:
elements += subelement_list
subelement_list = getattr(current, "sub_sections", None)
if subelement_list:
elements += subelement_list
subelement_list = getattr(current, "attributes", None)
if subelement_list:
elements += subelement_list
subelement_list = current.m_contents()
if subelement_list:
elements += subelement_list
for content in elements:
if getattr(content, "name", None) == name:
current = content # type: ignore
if getattr(current, "sub_section", None):
current = current.section_definition
break
else:
current = getattr(current, name, None)
if current is None:
assert False, f"{path} does not exist"

if value == "*":
assert current is not None, f"{path} does not exist"
elif value is None:
assert current is None, f"{path} does exist"
else:
assert current == value, f"{path} has wrong value"

if isinstance(current, Section):
assert current.nx_kind is not None
for base_section in current.all_base_sections:
assert base_section.nx_kind == current.nx_kind
Loading

0 comments on commit a287b39

Please sign in to comment.