Ellips example (#22)

* updated test.yaml consistent with fairmat-ellips PR * resorted the indices according to the App. Def. * silly syntax error fix * remove unnecessary h5py import * removed empty line at 219 breaking pycodestyle check * Minor fixes to wavelength. * definitions update * updated defs from fairmat-ellips branch * updated defs from fairmat-ellips branch, bis * specify spectrometer/wavelength * Quick-fix for optional parent check * Fixed psi/delta * Fixed optional parent check * Corrected link of wavelength * Fixed the changing dict() issue with generating a key list before modifying the dict(). * Fix stuff remaining after merging master into this branch * silly typo in using keys() * In testing '@' in a key, use '\\@' for clearer logic. Thanks Florian for pointing it out! * parser tests updated to match the new appdefs * Added a segment to the reader for changing metadata keys for lens voltages and added more paths to config. * bug fix for python version 3.8 * separated momentum calibrations into kx and ky and NXtranformation fields into x,y translation * Updates mpes example config file to reflect the appdef changes * Updates mpes config: LENS_EM instead of LENS * Fixes tests for mpes * Updates action to work with las * find definitions in contributed rather than in base_clases * fix for preference * fix in filtering multiple definitions * fix for finding definition files Co-authored-by: T. Haraszti <[email protected]> Co-authored-by: Andrea Albino <[email protected]> Co-authored-by: Sherjeel Shabih <[email protected]> Co-authored-by: sanbrock <[email protected]> Co-authored-by: Arora0 <[email protected]> Co-authored-by: domna <[email protected]>
FAIRmat-NFDI · Jun 27, 2022 · dc11aa7 · dc11aa7
1 parent c91c1f8
commit dc11aa7
Show file tree

Hide file tree

Showing 14 changed files with 488 additions and 311 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1 @@
+*.h5 filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/pylint.yml b/.github/workflows/pylint.yml
@@ -8,6 +8,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
     - uses: actions/checkout@v2
+      with:
+        lfs: true
     - name: Set up Python 3.7
       uses: actions/setup-python@v2
       with:
@@ -16,6 +18,7 @@ jobs:
       run: |
         git submodule sync --recursive
         git submodule update --init --recursive --jobs=4
+        git lfs pull
         python -m pip install --upgrade pip
         pip install nomad-lab==1.1.1 --extra-index-url https://gitlab.mpcdf.mpg.de/api/v4/projects/2187/packages/pypi/simple
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi

diff --git a/nexusparser/definitions b/nexusparser/definitions
diff --git a/nexusparser/metainfo/nexus.py b/nexusparser/metainfo/nexus.py
@@ -449,6 +449,8 @@ def compare_dependencies(nxdl1, nxdl2):
         for nxdl_file in sorted(os.listdir(path)):
             if not nxdl_file.endswith('.nxdl.xml'):
                 continue
+            if nxdl_file in [el.attrib["name"] + '.nxdl.xml' for el in list_of_nxdl]:
+                continue
             xml_tree = ET.parse(os.path.join(path, nxdl_file))
             xml_node = xml_tree.getroot()
             xml_node.set('nxdl_base', path)
@@ -465,7 +467,7 @@ def compare_dependencies(nxdl1, nxdl2):
             current_index = current_index + 1
         if current_index == len(list_of_nxdl):
             sorted_index = sorted_index + 1
-    # print('\n'.join([nxdl.attrib['name'] for nxdl in list_of_nxdl]))
+    # print('\n'.join([(nxdl.attrib['name'],p.attrib["nxdl_base"]) for nxdl in list_of_nxdl]))
     return list_of_nxdl
 
 
@@ -501,8 +503,8 @@ def create_package_from_nxdl_directories(paths) -> Package:
 
 
 # separated metainfo package for the nexus base classes, application defs and contributed classes.
-DIRS = [os.path.join(nexus.get_nexus_definitions_path(), 'base_classes')]
-DIRS.append(os.path.join(nexus.get_nexus_definitions_path(), 'contributed_definitions'))
+DIRS = [os.path.join(nexus.get_nexus_definitions_path(), 'contributed_definitions')]
+DIRS.append(os.path.join(nexus.get_nexus_definitions_path(), 'base_classes'))
 DIRS.append(os.path.join(nexus.get_nexus_definitions_path(), 'applications'))
 APPLICATIONS = create_package_from_nxdl_directories(DIRS)
 PACKAGES = (APPLICATIONS,)  # , APPLICATIONS, CONTRIBUTED)

diff --git a/nexusparser/tools/dataconverter/helpers.py b/nexusparser/tools/dataconverter/helpers.py
@@ -252,6 +252,15 @@ def all_required_children_are_set(optional_parent_path, data, nxdl_root):
     return True
 
 
+def is_nxdl_path_a_child(nxdl_path: str, parent: str):
+    """Takes an NXDL path for an element and an NXDL parent and confirms it is a child."""
+    while nxdl_path.rfind("/") != -1:
+        nxdl_path = nxdl_path[0:nxdl_path.rfind("/")]
+        if parent == nxdl_path:
+            return True
+    return False
+
+
 def check_optionality_based_on_parent_group(
         path,
         nxdl_path,
@@ -261,7 +270,7 @@ def check_optionality_based_on_parent_group(
     """Checks whether field is part of an optional parent and then confirms its optionality"""
     for optional_parent in template["optional_parents"]:
         optional_parent_nxdl = convert_data_converter_dict_to_nxdl_path(optional_parent)
-        if optional_parent_nxdl in nxdl_path \
+        if is_nxdl_path_a_child(nxdl_path, optional_parent_nxdl) \
            and not all_required_children_are_set(optional_parent, data, nxdl_root):
             raise Exception(f"The data entry, {path}, has an optional parent, "
                             f"{optional_parent}, with required children set. Either"

diff --git a/nexusparser/tools/dataconverter/readers/ellips/reader.py b/nexusparser/tools/dataconverter/readers/ellips/reader.py
@@ -21,6 +21,7 @@
 import yaml
 import pandas as pd
 import numpy as np
+# import h5py
 from nexusparser.tools.dataconverter.readers.base.reader import BaseReader
 
 DEFAULT_HEADER = {'sep': '\t', 'skip': 0}
@@ -40,8 +41,9 @@ def load_header(filename, default):
     with open(filename, 'rt', encoding='utf8') as file:
         header = yaml.safe_load(file)
 
-    for attr in header:
-        if "@" in attr:
+    header_keys = list(header.keys())
+    for attr in header_keys:
+        if "\\@" in attr:
             header[attr.replace("\\@", "@")] = header.pop(attr)
 
     for key, value in default.items():
@@ -102,6 +104,10 @@ def populate_header_dict(file_paths):
                 raise KeyError("filename is missing from", file_path)
             data_file = os.path.join(os.path.split(file_path)[0], header["filename"])
 
+            # if the path is not right, try the path provided directly
+            if not os.path.isfile(data_file):
+                data_file = header["filename"]
+
     return header, data_file
 
 
@@ -154,6 +160,7 @@ def populate_header_dict_with_datasets(self, file_paths):
         if os.path.isfile(data_file):
             whole_data = load_as_pandas_array(data_file, header)
         else:
+            # this we have tried, we should throw an error...
             whole_data = load_as_pandas_array(header["filename"], header)
 
         # User defined variables to produce slices of the whole data set
@@ -172,53 +179,55 @@ def populate_header_dict_with_datasets(self, file_paths):
             block_idx.append(index)
 
         # array that will be allocated in a HDF5 file
-        my_numpy_array = np.empty([counts[0],
-                                   len(['psi', 'delta']),
-                                   len(unique_angles),
+        # counts[0] = N_wavelents*N_time*N_p1
+        my_numpy_array = np.empty([1,
                                    1,
-                                   1
+                                   len(unique_angles),
+                                   len(['psi', 'delta']),
+                                   counts[0]
                                    ])
 
         for index, unique_angle in enumerate(unique_angles):
-            my_numpy_array[:,
-                           :,
-                           index,
+            my_numpy_array[0,
                            0,
-                           0] = unique_angle
+                           index,
+                           :,
+                           :] = unique_angle
 
         for index in range(len(labels["psi"])):
-            my_numpy_array[:,
+            my_numpy_array[0,
                            0,
                            index,
                            0,
-                           0] = whole_data["psi"].to_numpy()[block_idx[index]:block_idx[index + 1]
+                           :] = whole_data["psi"].to_numpy()[block_idx[index]:block_idx[index + 1]
                                                              ].astype("float64")
 
         for index in range(len(labels["delta"])):
-            my_numpy_array[:,
-                           1,
-                           index,
+            my_numpy_array[0,
                            0,
-                           0] = whole_data["delta"].to_numpy()[block_idx[index]:block_idx[index + 1]
+                           index,
+                           1,
+                           :] = whole_data["delta"].to_numpy()[block_idx[index]:block_idx[index + 1]
                                                                ].astype("float64")
 
         # measured_data is a required field
         header["measured_data"] = my_numpy_array
-        header["wavelength"] = whole_data["wavelength"].to_numpy()[0:counts[0]].astype("float64")
+        header["spectrometer/wavelength"] = whole_data["wavelength"].to_numpy()[0:counts[0]].astype("float64")
         header["angle_of_incidence"] = unique_angles
         return header, labels["psi"], labels["delta"]
 
     def read(self, template: dict = None, file_paths: Tuple[str] = None) -> dict:  # pylint: disable=W0221
-        """Reads data from given file and returns a filled template dictionary.
+        """ Reads data from given file and returns a filled template dictionary.
 
-A handlings of virtual datasets is implemented:
+            A handlings of virtual datasets is implemented:
 
-virtual dataset are created inside the final NeXus file.
+            virtual dataset are created inside the final NeXus file.
+
+            The template entry is filled with a dictionary containing the following keys:
+            - link: the path of the external data file and the path of desired dataset inside it
+            - shape: numpy array slice object (according to array slice notation)
+        """
 
-The template entry is filled with a dictionary containing the following keys:
-- link: the path of the external data file and the path of desired dataset inside it
-- shape: numpy array slice object (according to array slice notation)
-"""
         if not file_paths:
             raise Exception("No input files were given to Ellipsometry Reader.")
 
@@ -229,23 +238,23 @@ def read(self, template: dict = None, file_paths: Tuple[str] = None) -> dict:  #
         template = populate_template_dict(header, template)
 
         template["/ENTRY[entry]/plot/wavelength"] = {"link":
-                                                     "/entry/sample/wavelength"
+                                                     "/entry/instrument/spectrometer/wavelength"
                                                      }
         template["/ENTRY[entry]/plot/wavelength/@units"] = "angstrom"
 
         for index, psi in enumerate(psilist):
             template[f"/ENTRY[entry]/plot/{psi}"] = {"link":
                                                      "/entry/sample/measured_data",
                                                      "shape":
-                                                     np.index_exp[:, 0, index, 0, 0]
+                                                     np.index_exp[0, 0, index, 0, :]
                                                      }
             template[f"/ENTRY[entry]/plot/{psi}/@units"] = "degrees"
 
         for index, delta in enumerate(deltalist):
             template[f"/ENTRY[entry]/plot/{delta}"] = {"link":
                                                        "/entry/sample/measured_data",
                                                        "shape":
-                                                       np.index_exp[:, 1, index, 0, 0]
+                                                       np.index_exp[0, 0, index, 1, :]
                                                        }
             template[f"/ENTRY[entry]/plot/{delta}/@units"] = "degrees"
 

diff --git a/nexusparser/tools/dataconverter/readers/mpes/reader.py b/nexusparser/tools/dataconverter/readers/mpes/reader.py
@@ -130,6 +130,13 @@ def recursive_parse_metadata(node):
                 return dictionary
 
             metadata = recursive_parse_metadata(h5_file["metadata"])
+        # Segment to change Vset to V in lens voltages
+        if "file" in metadata.keys():
+            for k in list(metadata['file']):
+                if "VSet" in k:
+                    key = k[:-3]
+                    metadata['file'][key] = metadata['file'][k]
+                    del metadata['file'][k]
 
         xarray = res_to_xarray(data, bin_names, axes, metadata)
         return xarray

diff --git a/nexusparser/tools/nexus.py b/nexusparser/tools/nexus.py
@@ -289,6 +289,19 @@ def get_own_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_ty
     return None
 
 
+def find_definition_file(bc_name):
+    """find the nxdl file corresponding to the name.
+    Note that it first checks in contributed and goes beyond only if no contributed found"""
+    bc_filename = None
+    for nxdl_folder in ['contributed_definitions', 'base_classes', 'applications']:
+        if os.path.exists(f"{get_nexus_definitions_path()}{os.sep}"
+                          f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml"):
+            bc_filename = f"{get_nexus_definitions_path()}{os.sep}" \
+                          f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml"
+            break
+    return bc_filename
+
+
 def get_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=None, go_base=True):  # pylint: disable=too-many-arguments
     """Get the NXDL child node corresponding to a specific name
 (e.g. of an HDF5 node,or of a documentation) note that if child is not found in application
@@ -305,11 +318,7 @@ def get_nxdl_child(nxdl_elem, name, class_type=None, hdf_name=None, nexus_type=N
         return None
     if bc_name == "group":  # Check if it is the root element. Then send to NXroot.nxdl.xml
         bc_name = "NXroot"
-    for nxdl_folder in ['base_classes', 'contributed_definitions', 'applications']:
-        if os.path.exists(f"{get_nexus_definitions_path()}{os.sep}"
-                          f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml"):
-            bc_filename = f"{get_nexus_definitions_path()}{os.sep}" \
-                          f"{nxdl_folder}{os.sep}{bc_name}.nxdl.xml"
+    bc_filename = find_definition_file(bc_name)
     if not bc_filename:
         raise ValueError('nxdl file not found in definitions folder!')
     bc_obj = ET.parse(bc_filename).getroot()
@@ -637,16 +646,15 @@ def add_base_classes(elist, nx_name=None, elem: ET.Element = None):
 elist is empty, a nxdl file with the name of nx_name or a rather room elem is used if provided"""
     if elist and nx_name is None:
         nx_name = get_nx_class(elist[-1])
-    if elist and nx_name and f"{nx_name}.nxdl.xml" in (e.get('nxdlbase') for e in elist):
-        return
+    # to support recursive defintions, like NXsample in NXsample, the following test is removed
+    # if elist and nx_name and f"{nx_name}.nxdl.xml" in (e.get('nxdlbase') for e in elist):
+    #     return
     if elem is None:
         if not nx_name:
             return
-        nxdl_file_path = f"{nx_name}.nxdl.xml"
-        for root, dirs, files in os.walk(get_nexus_definitions_path()):  # pylint: disable=unused-variable
-            if nxdl_file_path in files:
-                nxdl_file_path = os.path.join(root, nxdl_file_path)
-                break
+        nxdl_file_path = find_definition_file(nx_name)
+        if nxdl_file_path is None:
+            nxdl_file_path = f"{nx_name}.nxdl.xml"
         elem = ET.parse(nxdl_file_path).getroot()
         elem.set('nxdlbase', nxdl_file_path)
     else: