diff --git a/pynxtools_mpes/igor.py b/pynxtools_mpes/igor.py new file mode 100644 index 0000000..c7b2b18 --- /dev/null +++ b/pynxtools_mpes/igor.py @@ -0,0 +1,207 @@ +""" +Parser for the igor binarywave files from the FHI Phoibos detector +""" + +import re +from typing import Any, Dict, List + +import numpy as np +from igor2 import binarywave +from pynxtools.dataconverter.readers.multi.reader import MultiFormatReader +from pynxtools.dataconverter.readers.utils import parse_yml + + +def parse_note(bnote: bytes) -> Dict[str, Any]: + """ + Parsers the note field of the igor binarywave file. + It assumes that the note field contains key-value pairs of the + form 'key=value' separated by newlines. + + Args: + bnote (bytes): The bytes of the binarywave note field. + + Returns: + Dict[str, Any]: The dictionary of the parsed note field. + """ + note = bnote.decode("utf-8").replace("\r", "\n") + notes = {} + for line in note.split(): + split = line.split("=") + if len(split) == 2: + key, val = split + notes[key] = val + + return notes + + +def sort_key(filename: str, pattern: str = r"[^\/_]+_(\d+)_(\d+).ibw$") -> int: + r""" + Returns the sort key based on the second group in the regex pattern. + Default is to match filenames of the form ...__.ibw. + Where is used as the sort key. + + Args: + filename (str): The filename to return a sort key for. + pattern (str, optional): + The sort key pattern. Defaults to r"[^\/_]+_(\d+)_(\d+).ibw$". + + Raises: + ValueError: If no match in the filename is found. + + Returns: + int: The sort key. + """ + groups = re.search(pattern, filename) + if groups is not None: + return int(groups.group(2)) + raise ValueError( + "Invalid filename: Expected file of the form ...__.ibw." + ) + + +def find_scan_sets( + filenames: List[str], pattern: str = r"[^\/_]+_(\d+)_(\d+).ibw$" +) -> Dict[int, Any]: + r""" + Returns a dict of scan sets where the key is the scan number + and the value is a list of filenames. + Default is to match filenames of the form ...__.ibw. + Where is used as the sort key and is used to indicate the scan number. + + Args: + filenames (List[str]): The filenames to sort into scan sets. + pattern (str, optional): + The pattern to search for scan groups. + The first regex group is used as a scan number. + Defaults to r"[^\/_]+_(\d+)_(\d+).ibw$". + + Returns: + Dict[int, Any]: A dict of scan sets. + """ + scan_sets: Dict[int, Any] = {} + for fn in filenames: + groups = re.search(pattern, fn) + if groups is not None: + scan = int(groups.group(1)) + if scan not in scan_sets: + scan_sets[scan] = [] + scan_sets[scan].append(fn) + scan_sets[scan].sort(key=lambda fn: sort_key(fn, pattern)) + return scan_sets + + +def axis_from(ibw_data: Dict[str, Any], dim: int) -> np.ndarray: + """ + Returns the axis values for a given dimension from the wave header. + + Args: + ibw_data (Dict[str, Any]): The ibw data containing the wave_header. + dim (int): The dimension to return the axis for. + + Returns: + np.ndarray: The axis values. + """ + wave_header = ibw_data["wave"]["wave_header"] + return ( + wave_header["sfA"][dim] * np.arange(wave_header["nDim"][dim]) + + wave_header["sfB"][dim] + ) + + +def axis_units_from(ibw_data: Dict[str, Any], dim: int) -> str: + """ " + Returns the unit for a given dimension from the wave header. + + Args: + ibw_data (Dict[str, Any]): The ibw data containing the wave_header. + dim (int): The dimension to return the unit for. + + Returns: + str: The axis units + """ + unit_arr = ibw_data["wave"]["wave_header"]["dimUnits"][dim] + + unit = "" + for elem in unit_arr: + unit += elem.decode("utf-8") + + return unit + + +class IgorReader(MultiFormatReader): + """Reader for FHI specific igor binarywave files""" + + supported_nxdls = ["NXmpes", "NXmpes_arpes"] + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.ibw_files = [] + self.eln_data = None + self.ibw_data = {} + self.ibw_attrs = {} + self.scan_nos = [] + + self.extensions = { + ".yml": self.handle_eln_file, + ".yaml": self.handle_eln_file, + ".ibw": self.collect_ibw_file, + } + + def handle_eln_file(self, file_path: str) -> Dict[str, Any]: + self.eln_data = parse_yml(file_path) + return {} + + def get_eln_data(self, path: str) -> Any: + """Returns data from the given eln path.""" + if self.eln_data is None: + return None + + return self.eln_data.get(path) + + def get_data(self, path: str) -> Any: + return self.ibw_data.get(f"{self.callbacks.entry_name}/{path}") + + def get_attr(self, path: str) -> Any: + return self.ibw_attrs.get(path) + + def get_entry_names(self) -> List[str]: + return [f"entry{scan_no}" for scan_no in self.scan_nos] + + def post_process(self) -> None: + for scan_no, files in find_scan_sets(self.ibw_files).items(): + self.scan_nos.append(scan_no) + waves = [] + beta = [] + theta = [] + for file in files: + ibw = binarywave.load(file) + self.ibw_attrs = parse_note(ibw["wave"]["note"]) + beta.append(float(self.ibw_attrs["Beta"])) + theta.append(float(self.ibw_attrs["Theta"])) + waves.append(ibw["wave"]["wData"]) + + data_entry = f"entry{scan_no}/data" + self.ibw_data[f"entry{scan_no}/theta"] = theta + self.ibw_data[ + f"entry{scan_no}/process/energy_referencing/reference_peak" + ] = "vacuum level" + self.ibw_data[f"{data_entry}/@axes"] = ["theta", "beta", "energy"] + self.ibw_data[f"{data_entry}/beta"] = beta + self.ibw_data[f"{data_entry}/beta/@units"] = "degrees" + self.ibw_data[f"{data_entry}/energy"] = axis_from(ibw, 0) + self.ibw_data[f"{data_entry}/energy/@units"] = axis_units_from(ibw, 0) + self.ibw_data[f"{data_entry}/theta"] = axis_from(ibw, 1) + self.ibw_data[f"{data_entry}/theta/@units"] = axis_units_from(ibw, 1) + self.ibw_data[f"{data_entry}/@signal"] = "data" + self.ibw_data[f"{data_entry}/data"] = ( + np.array(waves).swapaxes(1, 2).swapaxes(0, 1) + ) + self.ibw_data[f"{data_entry}/data/@units"] = "counts" + self.ibw_data[f"{data_entry}/energy/@type"] = "kinetic" + + def collect_ibw_file(self, file_path: str) -> Dict[str, Any]: + self.ibw_files.append(file_path) + return {} + + +READER = IgorReader diff --git a/pyproject.toml b/pyproject.toml index a1740d3..c303232 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,9 +41,13 @@ dev = [ "types-pyyaml", "pip-tools", ] +igor = [ + "igor2", +] [project.entry-points."pynxtools.reader"] mpes = "pynxtools_mpes.reader:MPESReader" +igor_fhi = "pynxtools_mpes.igor:IgorReader" [tool.setuptools] packages = ["pynxtools_mpes"] diff --git a/scripts/regenerate_examples.sh b/scripts/regenerate_examples.sh index 83bfbc1..7a012f0 100755 --- a/scripts/regenerate_examples.sh +++ b/scripts/regenerate_examples.sh @@ -13,7 +13,7 @@ function update_mpes_eln_example { } project_dir=$(dirname $(dirname $(realpath $0))) -cd $project_dir/tests/data +cd $project_dir/tests/data/mpes update_mpes_example update_mpes_eln_example \ No newline at end of file diff --git a/tests/data/igor/config_file.json b/tests/data/igor/config_file.json new file mode 100644 index 0000000..4127e2d --- /dev/null +++ b/tests/data/igor/config_file.json @@ -0,0 +1,15 @@ +{ + "/@default": "entry1", + "/ENTRY[entry]/PROCESS_MPES[process]/energy_referencing/reference_peak": "@data:process/energy_referencing/reference_peak", + "/ENTRY[entry]/data/@axes": "@data:data/@axes", + "/ENTRY[entry]/data/@signal": "@data:data/@signal", + "/ENTRY[entry]/data/beta": "@data:data/beta", + "/ENTRY[entry]/data/beta/@units": "@data:data/beta/@units", + "/ENTRY[entry]/data/data": "@data:data/data", + "/ENTRY[entry]/data/data/@units": "@data:data/data/@units", + "/ENTRY[entry]/data/energy": "@data:data/energy", + "/ENTRY[entry]/data/energy/@units": "@data:data/energy/@units", + "/ENTRY[entry]/data/energy/@type": "@data:data/energy/@type", + "/ENTRY[entry]/data/theta": "@data:data/theta", + "/ENTRY[entry]/data/theta/@units": "@data:data/theta/@units" +} \ No newline at end of file diff --git a/tests/data/config_file.json b/tests/data/mpes/config_file.json similarity index 97% rename from tests/data/config_file.json rename to tests/data/mpes/config_file.json index a8bfd3a..c56d92c 100644 --- a/tests/data/config_file.json +++ b/tests/data/mpes/config_file.json @@ -7,7 +7,9 @@ "/ENTRY/experiment_institution": "Fritz Haber Institute - Max Planck Society", "/ENTRY/experiment_facility": "Time Resolved ARPES", "/ENTRY/experiment_laboratory": "Clean Room 4", - "/ENTRY/entry_identifier": "@attrs:metadata/entry_identifier", + "/ENTRY/entry_identifier": { + "identifier":"@attrs:metadata/entry_identifier" + }, "/ENTRY/end_time": "@attrs:metadata/timing/acquisition_stop", "/ENTRY/duration": "@attrs:metadata/timing/acquisition_duration", "/ENTRY/duration/@units": "s", diff --git a/tests/data/eln_data.yaml b/tests/data/mpes/eln_data.yaml similarity index 100% rename from tests/data/eln_data.yaml rename to tests/data/mpes/eln_data.yaml diff --git a/tests/data/example.nxs b/tests/data/mpes/example.nxs similarity index 99% rename from tests/data/example.nxs rename to tests/data/mpes/example.nxs index cbf0321..b2f20be 100644 Binary files a/tests/data/example.nxs and b/tests/data/mpes/example.nxs differ diff --git a/tests/data/example_eln.nxs b/tests/data/mpes/example_eln.nxs similarity index 99% rename from tests/data/example_eln.nxs rename to tests/data/mpes/example_eln.nxs index d7baa11..8883b5b 100644 Binary files a/tests/data/example_eln.nxs and b/tests/data/mpes/example_eln.nxs differ diff --git a/tests/data/xarray_saved_small_calibration.h5 b/tests/data/mpes/xarray_saved_small_calibration.h5 similarity index 100% rename from tests/data/xarray_saved_small_calibration.h5 rename to tests/data/mpes/xarray_saved_small_calibration.h5 diff --git a/tests/test_igor.py b/tests/test_igor.py new file mode 100644 index 0000000..cb3b539 --- /dev/null +++ b/tests/test_igor.py @@ -0,0 +1,20 @@ +import os +from glob import glob +from pathlib import Path + +import pytest +from pynxtools.dataconverter.convert import convert + + +@pytest.mark.skip(reason="Skip because igor data is not in the repo yet") +def test_igor_reader(tmp_path): + dir_path = Path(__file__).parent / "data" / "igor" + convert( + input_file=glob(str(dir_path / "data" / "*.ibw")), + reader="igor_fhi", + nxdl="NXmpes", + output=os.path.join(tmp_path / "igor_test.nxs"), + config_file=str(dir_path / "config_file.json"), + skip_verify=False, + ignore_undocumented=False, + ) diff --git a/tests/test_reader.py b/tests/test_reader.py index e54d589..95c75f5 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -12,7 +12,7 @@ def test_nexus_conversion(caplog, tmp_path): Tests the conversion into nexus. """ caplog.clear() - dir_path = Path(__file__).parent / "data" + dir_path = Path(__file__).parent / "data" / "mpes" test = ReaderTest( nxdl="NXmpes", reader_name="mpes", @@ -33,7 +33,7 @@ def test_conversion_w_eln_data(caplog, tmp_path): Tests the conversion with additional ELN data """ caplog.clear() - dir_path = Path(__file__).parent / "data" + dir_path = Path(__file__).parent / "data" / "mpes" test = ReaderTest( nxdl="NXmpes", reader_name="mpes",