Skip to content

Commit

Permalink
Implemented xmap mapping, runthrough tests with all datasets in HDF5 …
Browse files Browse the repository at this point in the history
…family files, bugfixing
  • Loading branch information
markus.kuehbach committed Oct 23, 2023
1 parent 4076096 commit f947f9d
Show file tree
Hide file tree
Showing 15 changed files with 178 additions and 61 deletions.
3 changes: 3 additions & 0 deletions pynxtools/dataconverter/readers/em/concepts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Context

Mapping of pieces of information from concepts onto NeXus concepts.
4 changes: 4 additions & 0 deletions pynxtools/dataconverter/readers/em/examples/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Context

Specific code that is relevant only for the implementation of examples for
this em parser and the NOMAD OASIS research data management system.
3 changes: 3 additions & 0 deletions pynxtools/dataconverter/readers/em/examples/ebsd_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,3 +303,6 @@
"ZrO": "Zr;O"}

ProjectIdToCitation = {"Forsterite.ctf.nxs.mtex": {"data": "someurl", "paper": "someurl"}}

AssumePhaseNameToSpaceGroup = {"Silver": 225,
"Copper": 225}
4 changes: 4 additions & 0 deletions pynxtools/dataconverter/readers/em/geometry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Context

Utility code relevant to handle coordinate systems and geometrical entities
used or normalized by the em parser or components of the em parser.
11 changes: 1 addition & 10 deletions pynxtools/dataconverter/readers/em/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,15 +134,6 @@ def read(self,
# for dat_instance in case.dat_parser_type:
# print(f"Process pieces of information in {dat_instance} tech partner file...")
# continue
# if case.dat_parser_type == "orix":
# orix_parser = NxEmOmOrixEbsdParser(case.dat[0], entry_id)
# # h5oina parser evaluating content and plotting with orix on the fly
# orix_parser.parse(template)
# elif case.dat_parser_type == "mtex":
# mtex_parser = NxEmOmMtexEbsdParser(case.dat[0], entry_id)
# # ebsd parser because concept suggested for MTex by M. Kühbach
# # would include different HDF5 dumps for different MTex classes
# mtex_parser.parse(template)
# elif case.dat_parser_type == "zip":
# zip_parser = NxEmOmZipEbsdParser(case.dat[0], entry_id)
# zip_parser.parse(template)
Expand All @@ -167,7 +158,7 @@ def read(self,
if resolved_path != "":
nxs_plt.annotate_default_plot(template, resolved_path)

debugging = True
debugging = False
if debugging is True:
print("Reporting state of template before passing to HDF5 writing...")
for keyword in template.keys():
Expand Down
3 changes: 3 additions & 0 deletions pynxtools/dataconverter/readers/em/subparsers/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Context

Specific parsers for metadata and data stored in HDF5 files from technology partners.
8 changes: 4 additions & 4 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def check_if_supported(self):
def parse_and_normalize(self):
"""Read and normalize away EDAX/APEX-specific formatting with an equivalent in NXem."""
with h5py.File(f"{self.file_path}", "r") as h5r:
cache_id = 0
cache_id = 1
grp_nms = list(h5r["/"])
for grp_nm in grp_nms:
sub_grp_nms = list(h5r[grp_nm])
Expand Down Expand Up @@ -169,7 +169,7 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
# problematic because mapping is not bijective!
# if you know the space group we know laue and point group and symmetry
# but the opposite direction leaves room for ambiguities
space_group = "n/a"
space_group = None
self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group

if len(self.tmp[ckey]["space_group"]) > 0:
Expand Down Expand Up @@ -210,8 +210,8 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
# check shape of internal virtual chunked number array
r = Rotation.from_matrix([np.reshape(dat[i][0], (3, 3))])
self.tmp[ckey]["euler"][i, :] = r.to_euler(degrees=False)
self.tmp[ckey]["phase_id"][i] = dat[i][2]
self.tmp[ckey]["ci"][i] = dat[i][3]
self.tmp[ckey]["ci"][i] = dat[i][2]
self.tmp[ckey]["phase_id"][i] = dat[i][3]

# TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
# orix based transformation ends up in positive half space and with degrees=False
Expand Down
5 changes: 3 additions & 2 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def check_if_supported(self):
def parse_and_normalize(self):
"""Read and normalize away Bruker-specific formatting with an equivalent in NXem."""
with h5py.File(f"{self.file_path}", "r") as h5r:
cache_id = 0
cache_id = 1
grp_names = list(h5r["/"])
for grp_name in grp_names:
if grp_name not in ["Version", "Manufacturer"]:
Expand Down Expand Up @@ -159,8 +159,9 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group
else:
raise ValueError(f"Unable to decode improperly formatted space group {spc_grp} !")

# formatting is a nightmare F m#ovl3m for F m 3bar m...
# TODO::in some case instead a group IT (likely International Tables of Crystallography)
# was there so parse this instead of the above used mapping table
if len(self.tmp[ckey]["space_group"]) > 0:
self.tmp[ckey]["space_group"].append(space_group)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def check_if_supported(self):
def parse_and_normalize(self):
"""Read and normalize away community-specific formatting with an equivalent in NXem."""
with h5py.File(f"{self.file_path}", "r") as h5r:
cache_id = 0
cache_id = 1
grp_names = list(h5r["/"])
for grp_name in grp_names:
if grp_name not in ["Version", "Manufacturer"]:
Expand Down
26 changes: 18 additions & 8 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@

from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
read_strings_from_dataset, format_euler_parameterization
read_strings_from_dataset, read_first_scalar, format_euler_parameterization


class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
Expand Down Expand Up @@ -91,7 +91,7 @@ def check_if_supported(self):
def parse_and_normalize(self):
"""Read and normalize away EDAX-specific formatting with an equivalent in NXem."""
with h5py.File(f"{self.file_path}", "r") as h5r:
cache_id = 0
cache_id = 1
grp_names = list(h5r["/"])
for grp_name in grp_names:
if grp_name not in ["Version", "Manufacturer"]:
Expand Down Expand Up @@ -119,11 +119,14 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
if grid_type not in ["HexGrid", "SqrGrid"]:
raise ValueError(f"Grid Type {grid_type} is currently not supported !")
self.tmp[ckey]["grid_type"] = grid_type
self.tmp[ckey]["s_x"] = fp[f"{grp_name}/Step X"][()]
self.tmp[ckey]["s_x"] = read_first_scalar(fp[f"{grp_name}/Step X"])
self.tmp[ckey]["s_unit"] = "µm" # TODO::always micron?
self.tmp[ckey]["n_x"] = fp[f"{grp_name}/nColumns"][()]
self.tmp[ckey]["s_y"] = fp[f"{grp_name}/Step Y"][()]
self.tmp[ckey]["n_y"] = fp[f"{grp_name}/nRows"][()]
self.tmp[ckey]["n_x"] = read_first_scalar(fp[f"{grp_name}/nColumns"])
self.tmp[ckey]["s_y"] = read_first_scalar(fp[f"{grp_name}/Step Y"])
self.tmp[ckey]["n_y"] = read_first_scalar(fp[f"{grp_name}/nRows"])
# TODO::different version store the same concept with the same path name with different shape
# the read_first_scalar is not an optimal solution, in the future all reads from
# HDF5 should check for the shape instead
# TODO::check that all data are consistent

def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
Expand Down Expand Up @@ -165,10 +168,11 @@ def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
= np.asarray(angles, np.float32)

# Space Group not stored, only laue group, point group and symmetry
# https://doi.org/10.1107/S1600576718012724 is a relevant read here
# problematic because mapping is not bijective!
# if you know the space group we know laue and point group and symmetry
# but the opposite direction leaves room for ambiguities
space_group = "n/a"
space_group = None
self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group

if len(self.tmp[ckey]["space_group"]) > 0:
Expand Down Expand Up @@ -207,7 +211,13 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
# TODO::seems to be the situation in the example but there is no documentation
self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])

self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
# given no official EDAX OimAnalysis spec we cannot define for sure if
# phase_id == 0 means just all was indexed with the first/zeroth phase or nothing
# was indexed, TODO::assuming it means all indexed:
if np.all(fp[f"{grp_name}/Phase"][:] == 0):
self.tmp[ckey]["phase_id"] = np.zeros(n_pts, np.int32) + 1
else:
self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"][:], np.int32)
# promoting int8 to int32 no problem
self.tmp[ckey]["ci"] = np.asarray(fp[f"{grp_name}/CI"][:], np.float32)
self.tmp[ckey]["scan_point_x"] = np.asarray(
Expand Down
6 changes: 3 additions & 3 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ def check_if_supported(self):
def parse_and_normalize(self):
"""Read and normalize away Oxford-specific formatting with an equivalent in NXem."""
with h5py.File(f"{self.file_path}", "r") as h5r:
cache_id = 0
cache_id = 1
slice_ids = sorted(list(h5r["/"]))
for slice_id in slice_ids:
if slice_id.isdigit() is True and slice_id == "1":
if slice_id.isdigit() is True and slice_id == "1" and f"/{slice_id}/EBSD" in h5r:
# non-negative int, parse for now only the 1. slice
self.prfx = f"/{slice_id}"
ckey = self.init_named_cache(f"ebsd{cache_id}") # name of the cache to use
Expand Down Expand Up @@ -241,6 +241,6 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
# inconsistency f32 in file although specification states float

# Band Contrast, no, H5T_NATIVE_INT32, (size, 1)
self.tmp[ckey]["band_contrast"] = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32)
self.tmp[ckey]["bc"] = np.asarray(fp[f"{grp_name}/Band Contrast"], np.int32)
# inconsistency uint8 in file although specification states should be int32
# promoting uint8 to int32 no problem
123 changes: 91 additions & 32 deletions pynxtools/dataconverter/readers/em/subparsers/nxs_hfive.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,15 +121,6 @@ def parse(self, template: dict) -> dict:
return template
else: # none or something unsupported
return template

for key, val in self.cache.items():
print(f"{key}, type: {type(val)}, shape: {np.shape(val)}")

if self.cache["is_filled"] is True:
self.process_roi_overview(template)
self.process_roi_xmap(template)
self.process_roi_phases(template)
self.process_roi_inverse_pole_figures(template)
return template

def identify_hfive_type(self):
Expand Down Expand Up @@ -157,50 +148,118 @@ def identify_hfive_type(self):
return None

def process_into_template(self, inp: dict, template: dict) -> dict:
for key, val in inp.items():
if isinstance(val, dict):
for ckey, cval in val.items():
print(f"{ckey}, {cval}")
else:
print(f"{key}, {val}")
debugging = False
if debugging is True:
for key, val in inp.items():
if isinstance(val, dict):
for ckey, cval in val.items():
print(f"{ckey}, {cval}")
else:
print(f"{key}, {val}")

self.process_roi_overview(inp, template)
self.process_roi_ebsd_maps(inp, template)
return template

def process_roi_overview(self, inp: dict, template: dict) -> dict:
for ckey in inp.keys():
if ckey.startswith("ebsd"):
self.process_roi_overview_ebsd_based(
inp[ckey], ckey.replace("ebsd", ""), template)
break # only one roi for now
return template
# super().process_ebsd_cache(self.tmp, template)
# return template

def process_roi_overview(inp: dict, template: dict) -> dict:
def process_roi_overview_ebsd_based(self,
inp: dict,
roi_id: str,
template: dict) -> dict:
print("Parse ROI default plot...")
# prfx = f"/ENTRY[entry{self.entry_id}]/experiment/indexing/region_of_interest/roi{roi_id}"
prfx = f"/roi{roi_id}"
trg = f"{prfx}"
template[f"{trg}/title"] = str("Region-of-interest overview image")
template[f"{trg}/@signal"] = "data"
template[f"{trg}/@axes"] = ["axis_y", "axis_x"]
template[f"{trg}/@AXISNAME_indices[axis_x_indices]"] = np.uint32(0)
template[f"{trg}/@AXISNAME_indices[axis_y_indices]"] = np.uint32(1)
trg = f"{prfx}/data"
contrast_modes = [(None, "n/a"),
("bc", "normalized_band_contrast"),
("ci", "normalized_confidence_index"),
("mad", "normalized_mean_angular_deviation")]
success = False
for contrast_mode in contrast_modes:
if contrast_mode[0] in inp.keys() and success is False:
template[f"{trg}"] = {"compress": np.reshape(np.asarray(np.asarray((inp[contrast_mode[0]] / np.max(inp[contrast_mode[0]]) * 255.), np.uint32), np.uint8), (inp["n_y"], inp["n_x"]), order="C"), "strength": 1}
template[f"{prfx}/descriptor"] = contrast_mode[1]
success = True
if success is False:
raise ValueError(f"{__name__} unable to generate plot for {prfx} !")
# 0 is y while 1 is x !
template[f"{trg}/@long_name"] = "Signal"
template[f"{trg}/@CLASS"] = "IMAGE" # required by H5Web to plot RGB maps
template[f"{trg}/@IMAGE_VERSION"] = "1.2"
template[f"{trg}/@SUBCLASS_VERSION"] = np.int64(15)

trg = f"{prfx}/axis_x"
template[f"{trg}"] = {"compress": np.asarray(inp["scan_point_x"], np.float32), "strength": 1}
template[f"{trg}/@long_name"] = f"Coordinate along x-axis ({inp['s_unit']})"
template[f"{trg}/@units"] = f"{inp['s_unit']}"
trg = f"{prfx}/axis_y"
template[f"{trg}"] = {"compress": np.asarray(inp["scan_point_y"], np.float32), "strength": 1}
template[f"{trg}/@long_name"] = f"Coordinate along y-axis ({inp['s_unit']})"
template[f"{trg}/@units"] = f"{inp['s_unit']}"
return template

def process_roi_ebsd_maps(self, inp: dict, template: dict) -> dict:
for ckey in inp.keys():
if ckey.startswith("ebsd"):
roi_identifier = ckey.replace("ebsd", "")
self.process_roi_xmap(
inp[ckey], roi_identifier, template)
# self.process_roi_phases(
# inp[ckey], roi_identifier, template)
# self.process_roi_inverse_pole_figures(
# inp[ckey], roi_identifier, template)
break # only one roi for now
return template

def process_roi_xmap(inp: dict) -> dict:
"""Process standardized IPF orientation map using pyxem from normalized orientation data."""
# for NeXus would like to create a default
'''
if np.max(inp["n_x"], inp["n_y"]) < HFIVE_WEB_MAXIMUM_RGB:
def process_roi_xmap(self, inp: dict, roi_id: str, template: dict) -> dict:
"""Process crystal orientation map from normalized orientation data."""
# for NeXus to create a default representation of the EBSD map to explore
if np.max((inp["n_x"], inp["n_y"])) < HFIVE_WEB_MAXIMUM_RGB:
# can use the map discretization as is
coordinates, _ = create_coordinate_arrays(
(inp["n_x"], inp["n_y"]), (inp["s_x"], inp["s_y"]))
xaxis = coordinates["x"]
yaxis = coordinates["y"]
del coordinates
# else:
else:
raise ValueError(f"Downsampling for too large EBSD maps is currently not supported !")
# need to regrid to downsample too large maps
# TODO::implement 1NN-based downsampling approach
# build grid
# tree-based 1NN
# proceed as usual

pyxem_phase_identifier = inp["phase_identifier"] \
- (np.min(inp["phase_identifier"]) - (-1)) # pyxem, non-indexed has to be -1
print(np.unique(pyxem_phase_identifier))
pyxem_phase_identifier = inp["phase_id"] - 1
# inp["phase_id"] - (np.min(inp["phase_id"]) - (-1))
# for pyxem the non-indexed has to be -1 instead of 0 which is what NeXus uses
# -1 always because content of inp["phase_id"] is normalized
# to NeXus NXem_ebsd_crystal_structure concept already!
print(f"Unique pyxem_phase_identifier {np.unique(pyxem_phase_identifier)}")

self.xmap = CrystalMap(rotations=inp["rotation"],
x=self.xaxis, y=self.yaxis,
self.xmap = CrystalMap(rotations=Rotation.from_euler(euler=inp["euler"],
direction='lab2crystal',
degrees=False),
x=xaxis, y=yaxis,
phase_id=pyxem_phase_identifier,
phase_list=PhaseList(space_groups=inp["space_group"],
structures=inp["phase"]),
prop={"bc": inp["band_contrast"]},
scan_unit=inp["s_unit"])
prop={})
# "bc": inp["band_contrast"]}, scan_unit=inp["s_unit"])
print(self.xmap)
'''
return template

def process_roi_phases(self, template: dict) -> dict:
return template
Expand Down
3 changes: 3 additions & 0 deletions pynxtools/dataconverter/readers/em/utils/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Context

Utility code used by eventual multiple (sub)parsers of the em parser.
23 changes: 22 additions & 1 deletion pynxtools/dataconverter/readers/em/utils/hfive_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,17 @@
from itertools import groupby


EBSD_MAP_SPACEGROUP = {"F m#ovl3m": 225,
EBSD_MAP_SPACEGROUP = {"P 6#sub3mc": 186,
"P 6/mmm": 191,
"P 6#sub3/mmc": 194,
"F #ovl43m": 216,
"P m#ovl3m": 221,
"F m#ovl3m": 225,
"Fd#ovl3m(*)": 227,
"I m#ovl3m": 229}
# see here for typical examples http://img.chem.ucl.ac.uk/sgp/large/186az1.htm

DIRTY_FIX_SPACEGROUP = {}

def format_euler_parameterization(triplet_set):
"""Transform degrees to radiant and apply orientation space symmetry"""
Expand Down Expand Up @@ -81,6 +90,18 @@ def read_strings_from_dataset(obj):
# raise ValueError("Neither np.ndarray, nor bytes, nor str !")


def read_first_scalar(obj):
if hasattr(obj, "shape"):
if obj.shape == ():
return obj[()]
elif obj.shape == (1,):
return obj[0]
else:
raise ValueError(f"Unexpected shape found in {__name__} from object {obj} !")
else:
raise ValueError(f"Unexpected input passed to {__name__} with object {obj} !")


def all_equal(iterable):
g = groupby(iterable)
return next(g, True) and not next(g, False)
Loading

0 comments on commit f947f9d

Please sign in to comment.