FAIRmat-NFDI · domna · Mar 1, 2024 · Jul 17, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/pynxtools_mpes/igor.py b/pynxtools_mpes/igor.py
@@ -0,0 +1,207 @@
+"""
+Parser for the igor binarywave files from the FHI Phoibos detector
+"""
+
+import re
+from typing import Any, Dict, List
+
+import numpy as np
+from igor2 import binarywave
+from pynxtools.dataconverter.readers.multi.reader import MultiFormatReader
+from pynxtools.dataconverter.readers.utils import parse_yml
+
+
+def parse_note(bnote: bytes) -> Dict[str, Any]:
+    """
+    Parsers the note field of the igor binarywave file.
+    It assumes that the note field contains key-value pairs of the
+    form 'key=value' separated by newlines.
+
+    Args:
+        bnote (bytes): The bytes of the binarywave note field.
+
+    Returns:
+        Dict[str, Any]: The dictionary of the parsed note field.
+    """
+    note = bnote.decode("utf-8").replace("\r", "\n")
+    notes = {}
+    for line in note.split():
+        split = line.split("=")
+        if len(split) == 2:
+            key, val = split
+            notes[key] = val
+
+    return notes
+
+
+def sort_key(filename: str, pattern: str = r"[^\/_]+_(\d+)_(\d+).ibw$") -> int:
+    r"""
+    Returns the sort key based on the second group in the regex pattern.
+    Default is to match filenames of the form ..._<scan>_<frame>.ibw.
+    Where <frame> is used as the sort key.
+
+    Args:
+        filename (str): The filename to return a sort key for.
+        pattern (str, optional):
+            The sort key pattern. Defaults to r"[^\/_]+_(\d+)_(\d+).ibw$".
+
+    Raises:
+        ValueError: If no match in the filename is found.
+
+    Returns:
+        int: The sort key.
+    """
+    groups = re.search(pattern, filename)
+    if groups is not None:
+        return int(groups.group(2))
+    raise ValueError(
+        "Invalid filename: Expected file of the form ..._<scan>_<frame>.ibw."
+    )
+
+
+def find_scan_sets(
+    filenames: List[str], pattern: str = r"[^\/_]+_(\d+)_(\d+).ibw$"
+) -> Dict[int, Any]:
+    r"""
+    Returns a dict of scan sets where the key is the scan number
+    and the value is a list of filenames.
+    Default is to match filenames of the form ..._<scan>_<frame>.ibw.
+    Where <frame> is used as the sort key and <scan> is used to indicate the scan number.
+
+    Args:
+        filenames (List[str]): The filenames to sort into scan sets.
+        pattern (str, optional):
+            The pattern to search for scan groups.
+            The first regex group is used as a scan number.
+            Defaults to r"[^\/_]+_(\d+)_(\d+).ibw$".
+
+    Returns:
+        Dict[int, Any]: A dict of scan sets.
+    """
+    scan_sets: Dict[int, Any] = {}
+    for fn in filenames:
+        groups = re.search(pattern, fn)
+        if groups is not None:
+            scan = int(groups.group(1))
+            if scan not in scan_sets:
+                scan_sets[scan] = []
+            scan_sets[scan].append(fn)
+            scan_sets[scan].sort(key=lambda fn: sort_key(fn, pattern))
+    return scan_sets
+
+
+def axis_from(ibw_data: Dict[str, Any], dim: int) -> np.ndarray:
+    """
+    Returns the axis values for a given dimension from the wave header.
+
+    Args:
+        ibw_data (Dict[str, Any]): The ibw data containing the wave_header.
+        dim (int): The dimension to return the axis for.
+
+    Returns:
+        np.ndarray: The axis values.
+    """
+    wave_header = ibw_data["wave"]["wave_header"]
+    return (
+        wave_header["sfA"][dim] * np.arange(wave_header["nDim"][dim])
+        + wave_header["sfB"][dim]
+    )
+
+
+def axis_units_from(ibw_data: Dict[str, Any], dim: int) -> str:
+    """ "
+    Returns the unit for a given dimension from the wave header.
+
+    Args:
+        ibw_data (Dict[str, Any]): The ibw data containing the wave_header.
+        dim (int): The dimension to return the unit for.
+
+    Returns:
+        str: The axis units
+    """
+    unit_arr = ibw_data["wave"]["wave_header"]["dimUnits"][dim]
+
+    unit = ""
+    for elem in unit_arr:
+        unit += elem.decode("utf-8")
+
+    return unit
+
+
+class IgorReader(MultiFormatReader):
+    """Reader for FHI specific igor binarywave files"""
+
+    supported_nxdls = ["NXmpes", "NXmpes_arpes"]
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.ibw_files = []
+        self.eln_data = None
+        self.ibw_data = {}
+        self.ibw_attrs = {}
+        self.scan_nos = []
+
+        self.extensions = {
+            ".yml": self.handle_eln_file,
+            ".yaml": self.handle_eln_file,
+            ".ibw": self.collect_ibw_file,
+        }
+
+    def handle_eln_file(self, file_path: str) -> Dict[str, Any]:
+        self.eln_data = parse_yml(file_path)
+        return {}
+
+    def get_eln_data(self, path: str) -> Any:
+        """Returns data from the given eln path."""
+        if self.eln_data is None:
+            return None
+
+        return self.eln_data.get(path)
+
+    def get_data(self, path: str) -> Any:
+        return self.ibw_data.get(f"{self.callbacks.entry_name}/{path}")
+
+    def get_attr(self, path: str) -> Any:
+        return self.ibw_attrs.get(path)
+
+    def get_entry_names(self) -> List[str]:
+        return [f"entry{scan_no}" for scan_no in self.scan_nos]
+
+    def post_process(self) -> None:
+        for scan_no, files in find_scan_sets(self.ibw_files).items():
+            self.scan_nos.append(scan_no)
+            waves = []
+            beta = []
+            theta = []
+            for file in files:
+                ibw = binarywave.load(file)
+                self.ibw_attrs = parse_note(ibw["wave"]["note"])
+                beta.append(float(self.ibw_attrs["Beta"]))
+                theta.append(float(self.ibw_attrs["Theta"]))
+                waves.append(ibw["wave"]["wData"])
+
+            data_entry = f"entry{scan_no}/data"
+            self.ibw_data[f"entry{scan_no}/theta"] = theta
+            self.ibw_data[
+                f"entry{scan_no}/process/energy_referencing/reference_peak"
+            ] = "vacuum level"
+            self.ibw_data[f"{data_entry}/@axes"] = ["theta", "beta", "energy"]
+            self.ibw_data[f"{data_entry}/beta"] = beta
+            self.ibw_data[f"{data_entry}/beta/@units"] = "degrees"
+            self.ibw_data[f"{data_entry}/energy"] = axis_from(ibw, 0)
+            self.ibw_data[f"{data_entry}/energy/@units"] = axis_units_from(ibw, 0)
+            self.ibw_data[f"{data_entry}/theta"] = axis_from(ibw, 1)
+            self.ibw_data[f"{data_entry}/theta/@units"] = axis_units_from(ibw, 1)
+            self.ibw_data[f"{data_entry}/@signal"] = "data"
+            self.ibw_data[f"{data_entry}/data"] = (
+                np.array(waves).swapaxes(1, 2).swapaxes(0, 1)
+            )
+            self.ibw_data[f"{data_entry}/data/@units"] = "counts"
+            self.ibw_data[f"{data_entry}/energy/@type"] = "kinetic"
+
+    def collect_ibw_file(self, file_path: str) -> Dict[str, Any]:
+        self.ibw_files.append(file_path)
+        return {}
+
+
+READER = IgorReader
diff --git a/pyproject.toml b/pyproject.toml
@@ -41,9 +41,13 @@ dev = [
     "types-pyyaml",
     "pip-tools",
 ]
+igor = [
+    "igor2",
+]
 
 [project.entry-points."pynxtools.reader"]
 mpes = "pynxtools_mpes.reader:MPESReader"
+igor_fhi = "pynxtools_mpes.igor:IgorReader"
 
 [tool.setuptools]
 packages = ["pynxtools_mpes"]

diff --git a/scripts/regenerate_examples.sh b/scripts/regenerate_examples.sh
@@ -13,7 +13,7 @@ function update_mpes_eln_example {
 }
 
 project_dir=$(dirname $(dirname $(realpath $0)))
-cd $project_dir/tests/data
+cd $project_dir/tests/data/mpes
 
 update_mpes_example
 update_mpes_eln_example
diff --git a/tests/data/igor/config_file.json b/tests/data/igor/config_file.json
@@ -0,0 +1,15 @@
+{
+    "/@default": "entry1",
+    "/ENTRY[entry]/PROCESS_MPES[process]/energy_referencing/reference_peak": "@data:process/energy_referencing/reference_peak",
+    "/ENTRY[entry]/data/@axes": "@data:data/@axes",
+    "/ENTRY[entry]/data/@signal": "@data:data/@signal",
+    "/ENTRY[entry]/data/beta": "@data:data/beta",
+    "/ENTRY[entry]/data/beta/@units": "@data:data/beta/@units",
+    "/ENTRY[entry]/data/data": "@data:data/data",
+    "/ENTRY[entry]/data/data/@units": "@data:data/data/@units",
+    "/ENTRY[entry]/data/energy": "@data:data/energy",
+    "/ENTRY[entry]/data/energy/@units": "@data:data/energy/@units",
+    "/ENTRY[entry]/data/energy/@type": "@data:data/energy/@type",
+    "/ENTRY[entry]/data/theta": "@data:data/theta",
+    "/ENTRY[entry]/data/theta/@units": "@data:data/theta/@units"
+}
diff --git a/tests/data/config_file.json → tests/data/mpes/config_file.json b/tests/data/config_file.json → tests/data/mpes/config_file.json
@@ -7,7 +7,9 @@
   "/ENTRY/experiment_institution": "Fritz Haber Institute - Max Planck Society",
   "/ENTRY/experiment_facility": "Time Resolved ARPES",
   "/ENTRY/experiment_laboratory": "Clean Room 4",
-  "/ENTRY/entry_identifier": "@attrs:metadata/entry_identifier",
+  "/ENTRY/entry_identifier": {
+    "identifier":"@attrs:metadata/entry_identifier"
+  },
   "/ENTRY/end_time": "@attrs:metadata/timing/acquisition_stop",
   "/ENTRY/duration": "@attrs:metadata/timing/acquisition_duration",
   "/ENTRY/duration/@units": "s",

diff --git a/tests/data/eln_data.yaml → tests/data/mpes/eln_data.yaml b/tests/data/eln_data.yaml → tests/data/mpes/eln_data.yaml
diff --git a/tests/data/example.nxs → tests/data/mpes/example.nxs b/tests/data/example.nxs → tests/data/mpes/example.nxs
diff --git a/tests/data/example_eln.nxs → tests/data/mpes/example_eln.nxs b/tests/data/example_eln.nxs → tests/data/mpes/example_eln.nxs
diff --git a/tests/data/xarray_saved_small_calibration.h5 → ...ta/mpes/xarray_saved_small_calibration.h5 b/tests/data/xarray_saved_small_calibration.h5 → ...ta/mpes/xarray_saved_small_calibration.h5
diff --git a/tests/test_igor.py b/tests/test_igor.py
@@ -0,0 +1,20 @@
+import os
+from glob import glob
+from pathlib import Path
+
+import pytest
+from pynxtools.dataconverter.convert import convert
+
+
+@pytest.mark.skip(reason="Skip because igor data is not in the repo yet")
+def test_igor_reader(tmp_path):
+    dir_path = Path(__file__).parent / "data" / "igor"
+    convert(
+        input_file=glob(str(dir_path / "data" / "*.ibw")),
+        reader="igor_fhi",
+        nxdl="NXmpes",
+        output=os.path.join(tmp_path / "igor_test.nxs"),
+        config_file=str(dir_path / "config_file.json"),
+        skip_verify=False,
+        ignore_undocumented=False,
+    )
diff --git a/tests/test_reader.py b/tests/test_reader.py
@@ -12,7 +12,7 @@ def test_nexus_conversion(caplog, tmp_path):
     Tests the conversion into nexus.
     """
     caplog.clear()
-    dir_path = Path(__file__).parent / "data"
+    dir_path = Path(__file__).parent / "data" / "mpes"
     test = ReaderTest(
         nxdl="NXmpes",
         reader_name="mpes",
@@ -33,7 +33,7 @@ def test_conversion_w_eln_data(caplog, tmp_path):
     Tests the conversion with additional ELN data
     """
     caplog.clear()
-    dir_path = Path(__file__).parent / "data"
+    dir_path = Path(__file__).parent / "data" / "mpes"
     test = ReaderTest(
         nxdl="NXmpes",
         reader_name="mpes",