Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adds support for reading igor files #7

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
207 changes: 207 additions & 0 deletions pynxtools_mpes/igor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
"""
Parser for the igor binarywave files from the FHI Phoibos detector
"""

import re
from typing import Any, Dict, List

import numpy as np
from igor2 import binarywave
from pynxtools.dataconverter.readers.multi.reader import MultiFormatReader
from pynxtools.dataconverter.readers.utils import parse_yml


def parse_note(bnote: bytes) -> Dict[str, Any]:
"""
Parsers the note field of the igor binarywave file.
It assumes that the note field contains key-value pairs of the
form 'key=value' separated by newlines.

Args:
bnote (bytes): The bytes of the binarywave note field.

Returns:
Dict[str, Any]: The dictionary of the parsed note field.
"""
note = bnote.decode("utf-8").replace("\r", "\n")
notes = {}
for line in note.split():
split = line.split("=")
if len(split) == 2:
key, val = split
notes[key] = val

return notes


def sort_key(filename: str, pattern: str = r"[^\/_]+_(\d+)_(\d+).ibw$") -> int:
r"""
Returns the sort key based on the second group in the regex pattern.
Default is to match filenames of the form ..._<scan>_<frame>.ibw.
Where <frame> is used as the sort key.

Args:
filename (str): The filename to return a sort key for.
pattern (str, optional):
The sort key pattern. Defaults to r"[^\/_]+_(\d+)_(\d+).ibw$".

Raises:
ValueError: If no match in the filename is found.

Returns:
int: The sort key.
"""
groups = re.search(pattern, filename)
if groups is not None:
return int(groups.group(2))
raise ValueError(
"Invalid filename: Expected file of the form ..._<scan>_<frame>.ibw."
)


def find_scan_sets(
filenames: List[str], pattern: str = r"[^\/_]+_(\d+)_(\d+).ibw$"
) -> Dict[int, Any]:
r"""
Returns a dict of scan sets where the key is the scan number
and the value is a list of filenames.
Default is to match filenames of the form ..._<scan>_<frame>.ibw.
Where <frame> is used as the sort key and <scan> is used to indicate the scan number.

Args:
filenames (List[str]): The filenames to sort into scan sets.
pattern (str, optional):
The pattern to search for scan groups.
The first regex group is used as a scan number.
Defaults to r"[^\/_]+_(\d+)_(\d+).ibw$".

Returns:
Dict[int, Any]: A dict of scan sets.
"""
scan_sets: Dict[int, Any] = {}
for fn in filenames:
groups = re.search(pattern, fn)
if groups is not None:
scan = int(groups.group(1))
if scan not in scan_sets:
scan_sets[scan] = []
scan_sets[scan].append(fn)
scan_sets[scan].sort(key=lambda fn: sort_key(fn, pattern))
return scan_sets


def axis_from(ibw_data: Dict[str, Any], dim: int) -> np.ndarray:
"""
Returns the axis values for a given dimension from the wave header.

Args:
ibw_data (Dict[str, Any]): The ibw data containing the wave_header.
dim (int): The dimension to return the axis for.

Returns:
np.ndarray: The axis values.
"""
wave_header = ibw_data["wave"]["wave_header"]
return (
wave_header["sfA"][dim] * np.arange(wave_header["nDim"][dim])
+ wave_header["sfB"][dim]
)


def axis_units_from(ibw_data: Dict[str, Any], dim: int) -> str:
""" "
Returns the unit for a given dimension from the wave header.

Args:
ibw_data (Dict[str, Any]): The ibw data containing the wave_header.
dim (int): The dimension to return the unit for.

Returns:
str: The axis units
"""
unit_arr = ibw_data["wave"]["wave_header"]["dimUnits"][dim]

unit = ""
for elem in unit_arr:
unit += elem.decode("utf-8")

return unit


class IgorReader(MultiFormatReader):
"""Reader for FHI specific igor binarywave files"""

supported_nxdls = ["NXmpes", "NXmpes_arpes"]

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.ibw_files = []
self.eln_data = None
self.ibw_data = {}
self.ibw_attrs = {}
self.scan_nos = []

self.extensions = {
".yml": self.handle_eln_file,
".yaml": self.handle_eln_file,
".ibw": self.collect_ibw_file,
}

def handle_eln_file(self, file_path: str) -> Dict[str, Any]:
self.eln_data = parse_yml(file_path)
return {}

def get_eln_data(self, path: str) -> Any:
"""Returns data from the given eln path."""
if self.eln_data is None:
return None

return self.eln_data.get(path)

def get_data(self, path: str) -> Any:
return self.ibw_data.get(f"{self.callbacks.entry_name}/{path}")

def get_attr(self, path: str) -> Any:
return self.ibw_attrs.get(path)

def get_entry_names(self) -> List[str]:
return [f"entry{scan_no}" for scan_no in self.scan_nos]

def post_process(self) -> None:
for scan_no, files in find_scan_sets(self.ibw_files).items():
self.scan_nos.append(scan_no)
waves = []
beta = []
theta = []
for file in files:
ibw = binarywave.load(file)
self.ibw_attrs = parse_note(ibw["wave"]["note"])
beta.append(float(self.ibw_attrs["Beta"]))
theta.append(float(self.ibw_attrs["Theta"]))
waves.append(ibw["wave"]["wData"])

data_entry = f"entry{scan_no}/data"
self.ibw_data[f"entry{scan_no}/theta"] = theta
self.ibw_data[
f"entry{scan_no}/process/energy_referencing/reference_peak"
] = "vacuum level"
self.ibw_data[f"{data_entry}/@axes"] = ["theta", "beta", "energy"]
self.ibw_data[f"{data_entry}/beta"] = beta
self.ibw_data[f"{data_entry}/beta/@units"] = "degrees"
self.ibw_data[f"{data_entry}/energy"] = axis_from(ibw, 0)
self.ibw_data[f"{data_entry}/energy/@units"] = axis_units_from(ibw, 0)
self.ibw_data[f"{data_entry}/theta"] = axis_from(ibw, 1)
self.ibw_data[f"{data_entry}/theta/@units"] = axis_units_from(ibw, 1)
self.ibw_data[f"{data_entry}/@signal"] = "data"
self.ibw_data[f"{data_entry}/data"] = (
np.array(waves).swapaxes(1, 2).swapaxes(0, 1)
)
self.ibw_data[f"{data_entry}/data/@units"] = "counts"
self.ibw_data[f"{data_entry}/energy/@type"] = "kinetic"

def collect_ibw_file(self, file_path: str) -> Dict[str, Any]:
self.ibw_files.append(file_path)
return {}


READER = IgorReader
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,13 @@ dev = [
"types-pyyaml",
"pip-tools",
]
igor = [
"igor2",
]

[project.entry-points."pynxtools.reader"]
mpes = "pynxtools_mpes.reader:MPESReader"
igor_fhi = "pynxtools_mpes.igor:IgorReader"

[tool.setuptools]
packages = ["pynxtools_mpes"]
Expand Down
2 changes: 1 addition & 1 deletion scripts/regenerate_examples.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ function update_mpes_eln_example {
}

project_dir=$(dirname $(dirname $(realpath $0)))
cd $project_dir/tests/data
cd $project_dir/tests/data/mpes

update_mpes_example
update_mpes_eln_example
15 changes: 15 additions & 0 deletions tests/data/igor/config_file.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"/@default": "entry1",
"/ENTRY[entry]/PROCESS_MPES[process]/energy_referencing/reference_peak": "@data:process/energy_referencing/reference_peak",
"/ENTRY[entry]/data/@axes": "@data:data/@axes",
"/ENTRY[entry]/data/@signal": "@data:data/@signal",
"/ENTRY[entry]/data/beta": "@data:data/beta",
"/ENTRY[entry]/data/beta/@units": "@data:data/beta/@units",
"/ENTRY[entry]/data/data": "@data:data/data",
"/ENTRY[entry]/data/data/@units": "@data:data/data/@units",
"/ENTRY[entry]/data/energy": "@data:data/energy",
"/ENTRY[entry]/data/energy/@units": "@data:data/energy/@units",
"/ENTRY[entry]/data/energy/@type": "@data:data/energy/@type",
"/ENTRY[entry]/data/theta": "@data:data/theta",
"/ENTRY[entry]/data/theta/@units": "@data:data/theta/@units"
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
"/ENTRY/experiment_institution": "Fritz Haber Institute - Max Planck Society",
"/ENTRY/experiment_facility": "Time Resolved ARPES",
"/ENTRY/experiment_laboratory": "Clean Room 4",
"/ENTRY/entry_identifier": "@attrs:metadata/entry_identifier",
"/ENTRY/entry_identifier": {
"identifier":"@attrs:metadata/entry_identifier"
},
"/ENTRY/end_time": "@attrs:metadata/timing/acquisition_stop",
"/ENTRY/duration": "@attrs:metadata/timing/acquisition_duration",
"/ENTRY/duration/@units": "s",
Expand Down
File renamed without changes.
Binary file not shown.
Binary file not shown.
20 changes: 20 additions & 0 deletions tests/test_igor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
from glob import glob
from pathlib import Path

import pytest
from pynxtools.dataconverter.convert import convert


@pytest.mark.skip(reason="Skip because igor data is not in the repo yet")
def test_igor_reader(tmp_path):
dir_path = Path(__file__).parent / "data" / "igor"
convert(
input_file=glob(str(dir_path / "data" / "*.ibw")),
reader="igor_fhi",
nxdl="NXmpes",
output=os.path.join(tmp_path / "igor_test.nxs"),
config_file=str(dir_path / "config_file.json"),
skip_verify=False,
ignore_undocumented=False,
)
4 changes: 2 additions & 2 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ def test_nexus_conversion(caplog, tmp_path):
Tests the conversion into nexus.
"""
caplog.clear()
dir_path = Path(__file__).parent / "data"
dir_path = Path(__file__).parent / "data" / "mpes"
test = ReaderTest(
nxdl="NXmpes",
reader_name="mpes",
Expand All @@ -33,7 +33,7 @@ def test_conversion_w_eln_data(caplog, tmp_path):
Tests the conversion with additional ELN data
"""
caplog.clear()
dir_path = Path(__file__).parent / "data"
dir_path = Path(__file__).parent / "data" / "mpes"
test = ReaderTest(
nxdl="NXmpes",
reader_name="mpes",
Expand Down