Skip to content

Commit

Permalink
Merge pull request #455 from FAIRmat-NFDI/basiceln_filename
Browse files Browse the repository at this point in the history
carefull name generation for new BasicELN archive and population NXroot
  • Loading branch information
lukaspie authored Oct 31, 2024
2 parents b81041d + 4af0896 commit c91ee6b
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 34 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
- name: Install nomad
if: "${{ matrix.python_version != '3.8' && matrix.python_version != '3.12'}}"
run: |
uv pip install nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git@Sprint_Nomad_BaseSection
uv pip install nomad-lab@git+https://gitlab.mpcdf.mpg.de/nomad-lab/nomad-FAIR.git
- name: Install pynx
run: |
uv pip install ".[dev]"
Expand Down
13 changes: 9 additions & 4 deletions src/pynxtools/nexus/nexus.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,11 @@ def decode_if_string(
def get_nxdl_entry(hdf_info):
"""Get the nxdl application definition for an HDF5 node"""
entry = hdf_info
if (
"NX_class" in entry["hdf_node"].attrs.keys()
and decode_if_string(entry["hdf_node"].attrs["NX_class"]) == "NXroot"
):
return "NXroot"
while (
isinstance(entry["hdf_node"], h5py.Dataset)
or "NX_class" not in entry["hdf_node"].attrs.keys()
Expand All @@ -97,7 +102,7 @@ def get_nxdl_entry(hdf_info):
nxdef = entry["hdf_node"]["definition"][()]
return nxdef.decode()
except KeyError: # 'NO Definition referenced'
return "NXentry"
return "NXroot"


def get_nx_class_path(hdf_info):
Expand Down Expand Up @@ -398,6 +403,8 @@ def get_inherited_hdf_nodes(
path = hdf_path

for pind in range(len(path)):
if len(path) == 1 and path[0] == "":
return ([""], ["/"], elist)
hdf_info2 = [hdf_path, hdf_node, hdf_class_path]
[
hdf_path,
Expand Down Expand Up @@ -803,9 +810,7 @@ def not_yet_visited(self, root, name):

def full_visit(self, root, hdf_node, name, func):
"""visiting recursivly all children, but avoiding endless cycles"""
# print(name)
if len(name) > 0:
func(name, hdf_node)
func(name, hdf_node)
if isinstance(hdf_node, h5py.Group):
for ch_name, child in hdf_node.items():
full_name = ch_name if len(name) == 0 else name + "/" + ch_name
Expand Down
20 changes: 13 additions & 7 deletions src/pynxtools/nomad/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,15 +159,21 @@ def _collect_class(self, current: MSection):
self._sample_class_refs[class_name].append(current)

def _populate_data(
self, depth: int, nx_path: list, nx_def: str, hdf_node, current: MSection
self, depth: int, nx_path: list, nx_def: str, hdf_node, current: MSection, attr
):
"""
Populate attributes and fields
"""
if depth < len(nx_path):
if attr:
# it is an attribute of either field or group
nx_attr = nx_path[depth]
nx_parent: ET.Element = nx_path[depth - 1]
nx_root = False
if nx_path[0] == "/":
nx_attr = nx_path[1]
nx_parent = nx_attr.getparent()
nx_root = True
else:
nx_attr = nx_path[depth]
nx_parent = nx_path[depth - 1]

if isinstance(nx_attr, str):
if nx_attr != "units":
Expand All @@ -191,7 +197,7 @@ def _populate_data(
current = _to_section(attr_name, nx_def, nx_attr, current)

try:
if nx_parent.tag.endswith("group"):
if nx_root or nx_parent.tag.endswith("group"):
current.m_set_section_attribute(attr_name, attr_value)
else:
parent_html_name = nx_path[-2].get("name")
Expand Down Expand Up @@ -323,7 +329,7 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613
if nx_def is not None:
nx_def = rename_nx_for_nomad(nx_def)

if nx_path is None:
if nx_path is None or nx_path == "/":
return

current: MSection = _to_section(None, nx_def, None, self.nx_root)
Expand All @@ -340,7 +346,7 @@ def __nexus_populate(self, params: dict, attr=None): # pylint: disable=W0613
if nx_node.tag.endswith("group"):
current.m_set_section_attribute("m_nx_data_path", current_hdf_path)
current.m_set_section_attribute("m_nx_data_file", self.nxs_fname)
self._populate_data(depth, nx_path, nx_def, hdf_node, current)
self._populate_data(depth, nx_path, nx_def, hdf_node, current, attr)

def get_sub_element_names(self, elem: MSection):
return elem.m_def.all_aliases.keys()
Expand Down
11 changes: 8 additions & 3 deletions src/pynxtools/nomad/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# limitations under the License.
#

import hashlib
import json
import os
import os.path
Expand Down Expand Up @@ -744,7 +745,9 @@ def __create_package_from_nxdl_directories(nexus_section: Section) -> Package:

for section in sections:
package.section_definitions.append(section)
if section.nx_category == "application":
if section.nx_category == "application" or (
section.nx_category == "base" and section.nx_name == "NXroot"
):
nexus_section.sub_sections.append(
SubSection(section_def=section, name=section.name)
)
Expand Down Expand Up @@ -905,8 +908,10 @@ def get_entry_reference(archive, f_name):
EntityReference.normalize(self, archive, logger)
if not self.reference:
logger.info(f"{self.lab_id} to be created")

f_name = f"{current_cls.__name__}_{self.lab_id}.archive.json"
f_name = re.split("([0-9a-zA-Z.]+)", self.lab_id)[1]
if len(f_name) != len(self.lab_id):
f_name = f_name + hashlib.md5(self.lab_id.encode()).hexdigest()
f_name = f"{current_cls.__name__}_{f_name}.archive.json"
create_Entity(self.lab_id, archive, f_name)
self.reference = get_entry_reference(archive, f_name)
logger.info(f"{self.reference} - referenced directly")
Expand Down
83 changes: 64 additions & 19 deletions src/pynxtools/testing/nexus_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@


from pynxtools.dataconverter.convert import get_reader, transfer_data_into_template
from pynxtools.dataconverter.helpers import get_nxdl_root_and_path
from pynxtools.dataconverter.helpers import (
get_nxdl_root_and_path,
add_default_root_attributes,
)
from pynxtools.dataconverter.validation import validate_dict_against
from pynxtools.dataconverter.writer import Writer
from pynxtools.nexus.nexus import HandleNexus
Expand Down Expand Up @@ -115,6 +118,9 @@ def convert_to_nexus(
)
assert self.caplog.text == ""

add_default_root_attributes(
data=read_data, filename=os.path.basename(self.created_nexus)
)
Writer(read_data, nxdl_file, self.created_nexus).write()

if NOMAD_AVAILABLE:
Expand All @@ -133,24 +139,63 @@ def check_reproducibility_of_nexus(self):
IGNORE_LINES = [
"DEBUG - value: v",
"DEBUG - value: https://github.com/FAIRmat-NFDI/nexus_definitions/blob/",
"DEBUG - ===== GROUP (// [NXroot::]):",
]
ref_log = get_log_file(self.ref_nexus_file, "ref_nexus.log", self.tmp_path)
gen_log = get_log_file(self.created_nexus, "gen_nexus.log", self.tmp_path)
with open(gen_log, "r", encoding="utf-8") as gen, open(
ref_log, "r", encoding="utf-8"
) as ref:
gen_lines = gen.readlines()
ref_lines = ref.readlines()
if len(gen_lines) != len(ref_lines):
assert False, "Log files are different"
for ind, (gen_l, ref_l) in enumerate(zip(gen_lines, ref_lines)):
if gen_l != ref_l:
# skip ignored lines (mainly version conflicts)
for ignore_line in IGNORE_LINES:
if gen_l.startswith(ignore_line) and ref_l.startswith(ignore_line):
break
else:
SECTION_IGNORE = {
"ATTRS (//@HDF5_version)": ["DEBUG - value:"],
"ATTRS (//@file_name)": ["DEBUG - value:"],
"ATTRS (//@file_time)": ["DEBUG - value:"],
"ATTRS (//@file_update_time)": ["DEBUG - value:"],
"ATTRS (//@h5py_version)": ["DEBUG - value:"],
}
SECTION_SEPARATOR = "DEBUG - ===== "

def should_skip_line(gen_l: str, ref_l: str, ignore_lines: list[str]) -> bool:
"""Check if both lines start with any ignored prefix."""
return any(
gen_l.startswith(ignore) and ref_l.startswith(ignore)
for ignore in ignore_lines
)

def load_logs(
gen_log_path: str, ref_log_path: str
) -> tuple[list[str], list[str]]:
"""Load log files and return their contents as lists of lines."""
with open(gen_log_path, "r", encoding="utf-8") as gen, open(
ref_log_path, "r", encoding="utf-8"
) as ref:
return gen.readlines(), ref.readlines()

def compare_logs(gen_lines: list[str], ref_lines: list[str]) -> None:
"""Compare log lines, ignoring specific differences."""
if len(gen_lines) != len(ref_lines):
assert False, (
f"Log files are different: mismatched line counts. "
f"Generated file has {len(gen_lines)} lines, "
f"while reference file has {len(ref_lines)} lines."
)

section_ignore_lines = []
section = None
for ind, (gen_l, ref_l) in enumerate(zip(gen_lines, ref_lines)):
if gen_l.startswith(SECTION_SEPARATOR) and ref_l.startswith(
SECTION_SEPARATOR
):
section = gen_l.rsplit(SECTION_SEPARATOR)[-1].strip()
section_ignore_lines = SECTION_IGNORE.get(section, [])

# Compare lines if not in ignore list
if gen_l != ref_l and not should_skip_line(
gen_l, ref_l, IGNORE_LINES + section_ignore_lines
):
assert False, (
f"Log files are different at line {ind}"
f" generated: {gen_l} \n referenced : {ref_l}"
f"Log files are different at line {ind}\n"
f"generated: {gen_l}\nreferenced: {ref_l}"
)

ref_log_path = get_log_file(self.ref_nexus_file, "ref_nexus.log", self.tmp_path)
gen_log_path = get_log_file(self.created_nexus, "gen_nexus.log", self.tmp_path)
gen_lines, ref_lines = load_logs(gen_log_path, ref_log_path)

# Compare logs
compare_logs(gen_lines, ref_lines)
29 changes: 29 additions & 0 deletions tests/data/nexus/Ref_nexus_test.log
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
DEBUG - ===== GROUP (// [NO NXentry found::]): <HDF5 file "201805_WSe2_arpes.nxs" (mode r)>
DEBUG - classpath: None
DEBUG - NOT IN SCHEMA
DEBUG -
DEBUG - ===== ATTRS (//@HDF5_Version)
DEBUG - value: 1.10.5
DEBUG - classpath: None
DEBUG - NOT IN SCHEMA
DEBUG -
DEBUG - ===== ATTRS (//@file_name)
DEBUG - value: /home/tommaso/Desktop/NeXus/Test/201805_WSe2_arpes.nxs
DEBUG - classpath: None
DEBUG - NOT IN SCHEMA
DEBUG -
DEBUG - ===== ATTRS (//@file_time)
DEBUG - value: 2020-06-04T19:19:48.464472
DEBUG - classpath: None
DEBUG - NOT IN SCHEMA
DEBUG -
DEBUG - ===== ATTRS (//@h5py_version)
DEBUG - value: 2.10.0
DEBUG - classpath: None
DEBUG - NOT IN SCHEMA
DEBUG -
DEBUG - ===== ATTRS (//@nexusformat_version)
DEBUG - value: 0.5.2
DEBUG - classpath: None
DEBUG - NOT IN SCHEMA
DEBUG -
DEBUG - ===== GROUP (//entry [NXarpes::/NXentry]): <HDF5 group "/entry" (12 members)>
DEBUG - classpath: ['NXentry']
DEBUG - classes:
Expand Down

0 comments on commit c91ee6b

Please sign in to comment.