Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

get units from h5 file first #4

Merged
merged 2 commits into from
Feb 22, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 65 additions & 59 deletions pynxtools_mpes/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
import errno
import os
from functools import reduce
from typing import Any, Tuple
from typing import Any, Tuple, Union

import h5py
import numpy as np
import xarray as xr
import yaml
from pynxtools.dataconverter.readers.base.reader import BaseReader
Expand All @@ -48,90 +49,75 @@
"ky": "1/A",
}

def recursive_parse_metadata(
node: Union[h5py.Group, h5py.Dataset],
) -> dict:
"""Recurses through an hdf5 file, and parse it into a dictionary.

Args:
node (Union[h5py.Group, h5py.Dataset]): hdf5 group or dataset to parse into
dictionary.

def res_to_xarray(res, bin_names, bin_axes, metadata=None):
"""creates a BinnedArray (xarray subclass) out of the given np.array
Parameters:
res: np.array
nd array of binned data
bin_names (list): list of names of the binned axes
bin_axes (list): list of np.arrays with the values of the axes
Returns:
ba: BinnedArray (xarray)
an xarray-like container with binned data, axis, and all
available metadata
dict: Dictionary of elements in the hdf5 path contained in node
"""
dims = bin_names
coords = {}
for name, vals in zip(bin_names, bin_axes):
coords[name] = vals

xres = xr.DataArray(res, dims=dims, coords=coords)
if isinstance(node, h5py.Group):
dictionary = {}
for key, value in node.items():
dictionary[key] = recursive_parse_metadata(value)

for name in bin_names:
else:
entry = node[...]
try:
xres[name].attrs["unit"] = DEFAULT_UNITS[name]
except KeyError:
pass

xres.attrs["units"] = "counts"
xres.attrs["long_name"] = "photoelectron counts"
dictionary = entry.item()
if isinstance(dictionary, (bytes, bytearray)):
dictionary = dictionary.decode()
except ValueError:
dictionary = entry

if metadata is not None:
xres.attrs["metadata"] = metadata
return dictionary

return xres

def h5_to_xarray(faddr: str, mode: str = "r") -> xr.DataArray:
"""Read xarray data from formatted hdf5 file

def h5_to_xarray(faddr, mode="r"):
"""Rear xarray data from formatted hdf5 file
Args:
faddr (str): complete file name (including path)
mode (str): hdf5 read/write mode
mode (str, optional): hdf5 read/write mode. Defaults to "r".

Raises:
ValueError: Raised if data or axes are not found in the file.

Returns:
xarray (xarray.DataArray): output xarra data
xr.DataArray: output xarra data
"""
with h5py.File(faddr, mode) as h5_file:
# Reading data array
try:
data = h5_file["binned"]["BinnedData"]
except KeyError:
print("Wrong Data Format, data not found")
raise
data = np.asarray(h5_file["binned"]["BinnedData"])
except KeyError as exc:
raise ValueError(
f"Wrong Data Format, the BinnedData was not found.",
) from exc

# Reading the axes
axes = []
bin_axes = []
bin_names = []

try:
for axis in h5_file["axes"]:
axes.append(h5_file["axes"][axis])
bin_axes.append(h5_file["axes"][axis])
bin_names.append(h5_file["axes"][axis].attrs["name"])
except KeyError:
print("Wrong Data Format, axes not found")
raise
except KeyError as exc:
raise ValueError(
f"Wrong Data Format, the axes were not found.",
) from exc

# load metadata
metadata = None
if "metadata" in h5_file:

def recursive_parse_metadata(node):
if isinstance(node, h5py.Group):
dictionary = {}
for key, value in node.items():
dictionary[key] = recursive_parse_metadata(value)

else:
dictionary = node[...]
try:
dictionary = dictionary.item()
if isinstance(dictionary, (bytes, bytearray)):
dictionary = dictionary.decode()
except ValueError:
pass

return dictionary

metadata = recursive_parse_metadata(h5_file["metadata"])

# Segment to change Vset to V in lens voltages
if "file" in metadata.keys():
for k in list(metadata["file"]):
Expand All @@ -140,7 +126,27 @@ def recursive_parse_metadata(node):
metadata["file"][key] = metadata["file"][k]
del metadata["file"][k]

xarray = res_to_xarray(data, bin_names, axes, metadata)
coords = {}
for name, vals in zip(bin_names, bin_axes):
coords[name] = vals

xarray = xr.DataArray(data, dims=bin_names, coords=coords)

for axis in range(len(bin_axes)):
try:
xarray[bin_names[axis]].attrs["unit"] = h5_file["axes"][f"ax{axis}"].attrs["unit"]
except (KeyError, TypeError):
xarray[bin_names[axis]].attrs["unit"] = DEFAULT_UNITS[bin_names[axis]]
try:
xarray.attrs["units"] = h5_file["binned"]["BinnedData"].attrs["units"]
xarray.attrs["long_name"] = h5_file["binned"]["BinnedData"].attrs["long_name"]
except (KeyError, TypeError):
xarray.attrs["units"] = "counts"
xarray.attrs["long_name"] = "photoelectron counts"

if metadata is not None:
xarray.attrs["metadata"] = metadata

return xarray


Expand Down
Loading