Skip to content

Commit

Permalink
get units from h5 file first
Browse files Browse the repository at this point in the history
  • Loading branch information
rettigl committed Feb 22, 2024
1 parent 8bc597f commit 54e12b5
Showing 1 changed file with 65 additions and 59 deletions.
124 changes: 65 additions & 59 deletions pynxtools_mpes/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
import errno
import os
from functools import reduce
from typing import Any, Tuple
from typing import Any, Tuple, Union

import h5py
import numpy as np
import xarray as xr
import yaml
from pynxtools.dataconverter.readers.base.reader import BaseReader
Expand All @@ -48,90 +49,75 @@
"ky": "1/A",
}

def recursive_parse_metadata(
node: Union[h5py.Group, h5py.Dataset],
) -> dict:
"""Recurses through an hdf5 file, and parse it into a dictionary.
Args:
node (Union[h5py.Group, h5py.Dataset]): hdf5 group or dataset to parse into
dictionary.
def res_to_xarray(res, bin_names, bin_axes, metadata=None):
"""creates a BinnedArray (xarray subclass) out of the given np.array
Parameters:
res: np.array
nd array of binned data
bin_names (list): list of names of the binned axes
bin_axes (list): list of np.arrays with the values of the axes
Returns:
ba: BinnedArray (xarray)
an xarray-like container with binned data, axis, and all
available metadata
dict: Dictionary of elements in the hdf5 path contained in node
"""
dims = bin_names
coords = {}
for name, vals in zip(bin_names, bin_axes):
coords[name] = vals

xres = xr.DataArray(res, dims=dims, coords=coords)
if isinstance(node, h5py.Group):
dictionary = {}
for key, value in node.items():
dictionary[key] = recursive_parse_metadata(value)

for name in bin_names:
else:
entry = node[...]
try:
xres[name].attrs["unit"] = DEFAULT_UNITS[name]
except KeyError:
pass

xres.attrs["units"] = "counts"
xres.attrs["long_name"] = "photoelectron counts"
dictionary = entry.item()
if isinstance(dictionary, (bytes, bytearray)):
dictionary = dictionary.decode()
except ValueError:
dictionary = entry

if metadata is not None:
xres.attrs["metadata"] = metadata
return dictionary

return xres

def h5_to_xarray(faddr: str, mode: str = "r") -> xr.DataArray:
"""Read xarray data from formatted hdf5 file
def h5_to_xarray(faddr, mode="r"):
"""Rear xarray data from formatted hdf5 file
Args:
faddr (str): complete file name (including path)
mode (str): hdf5 read/write mode
mode (str, optional): hdf5 read/write mode. Defaults to "r".
Raises:
ValueError: Raised if data or axes are not found in the file.
Returns:
xarray (xarray.DataArray): output xarra data
xr.DataArray: output xarra data
"""
with h5py.File(faddr, mode) as h5_file:
# Reading data array
try:
data = h5_file["binned"]["BinnedData"]
except KeyError:
print("Wrong Data Format, data not found")
raise
data = np.asarray(h5_file["binned"]["BinnedData"])
except KeyError as exc:
raise ValueError(
f"Wrong Data Format, the BinnedData were not found. The error was{exc}.",
) from exc

# Reading the axes
axes = []
bin_axes = []
bin_names = []

try:
for axis in h5_file["axes"]:
axes.append(h5_file["axes"][axis])
bin_axes.append(h5_file["axes"][axis])
bin_names.append(h5_file["axes"][axis].attrs["name"])
except KeyError:
print("Wrong Data Format, axes not found")
raise
except KeyError as exc:
raise ValueError(
f"Wrong Data Format, the axes were not found. The error was {exc}",
) from exc

# load metadata
metadata = None
if "metadata" in h5_file:

def recursive_parse_metadata(node):
if isinstance(node, h5py.Group):
dictionary = {}
for key, value in node.items():
dictionary[key] = recursive_parse_metadata(value)

else:
dictionary = node[...]
try:
dictionary = dictionary.item()
if isinstance(dictionary, (bytes, bytearray)):
dictionary = dictionary.decode()
except ValueError:
pass

return dictionary

metadata = recursive_parse_metadata(h5_file["metadata"])

# Segment to change Vset to V in lens voltages
if "file" in metadata.keys():
for k in list(metadata["file"]):
Expand All @@ -140,7 +126,27 @@ def recursive_parse_metadata(node):
metadata["file"][key] = metadata["file"][k]
del metadata["file"][k]

xarray = res_to_xarray(data, bin_names, axes, metadata)
coords = {}
for name, vals in zip(bin_names, bin_axes):
coords[name] = vals

xarray = xr.DataArray(data, dims=bin_names, coords=coords)

for axis in range(len(bin_axes)):
try:
xarray[bin_names[axis]].attrs["unit"] = h5_file["axes"][f"ax{axis}"].attrs["unit"]
except (KeyError, TypeError):
xarray[bin_names[axis]].attrs["unit"] = DEFAULT_UNITS[bin_names[axis]]
try:
xarray.attrs["units"] = h5_file["binned"]["BinnedData"].attrs["units"]
xarray.attrs["long_name"] = h5_file["binned"]["BinnedData"].attrs["long_name"]
except (KeyError, TypeError):
xarray.attrs["units"] = "counts"
xarray.attrs["long_name"] = "photoelectron counts"

if metadata is not None:
xarray.attrs["metadata"] = metadata

return xarray


Expand Down

0 comments on commit 54e12b5

Please sign in to comment.