Skip to content

Commit

Permalink
Refactoring and implementation for normalization oxford, bruker, apex…
Browse files Browse the repository at this point in the history
… and tested
  • Loading branch information
markus.kuehbach committed Oct 22, 2023
1 parent d5ae63f commit 9c708da
Show file tree
Hide file tree
Showing 11 changed files with 1,152 additions and 713 deletions.
13 changes: 10 additions & 3 deletions pynxtools/dataconverter/readers/em/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@

from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser

from pynxtools.dataconverter.readers.em.subparsers.nxs_hfive import NxEmNxsHfiveSubParser

from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver

from pynxtools.dataconverter.readers.em.geometry.convention_mapper \
Expand Down Expand Up @@ -118,11 +120,16 @@ def read(self,
conventions.parse(template)

print("Parse and map pieces of information within files from tech partners...")
sub_parser = "nxs_mtex"
subparser = NxEmNxsMTexSubParser(entry_id, file_paths[0])
subparser.parse(template)
# sub_parser = "nxs_mtex"
# subparser = NxEmNxsMTexSubParser(entry_id, file_paths[0])
# subparser.parse(template)

# add further with resolving cases
# if file_path is an HDF5 will use hfive parser
sub_parser = "nxs_hfive"
subparser = NxEmNxsHfiveSubParser(entry_id, file_paths[0])
subparser.parse(template)
exit(1)

# for dat_instance in case.dat_parser_type:
# print(f"Process pieces of information in {dat_instance} tech partner file...")
Expand Down
237 changes: 202 additions & 35 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,50 +19,217 @@

import numpy as np
import h5py
from pynxtools.dataconverter.readers.em.subparsers.hfive import HdfFiveGenericReader
from itertools import groupby
# import imageio.v3 as iio
from PIL import Image as pil

import diffsims
import orix
from diffpy.structure import Lattice, Structure
from orix import plot
from orix.crystal_map import create_coordinate_arrays, CrystalMap, PhaseList
from orix.quaternion import Rotation
from orix.vector import Vector3d

class HdfFiveEdaxApexReader(HdfFiveGenericReader):
import matplotlib.pyplot as plt

from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
from pynxtools.dataconverter.readers.em.utils.hfive_utils import read_strings_from_dataset


def om_eu(inp):
return inp[0:2]


class HdfFiveEdaxApexReader(HdfFiveBaseParser):
"""Read APEX edaxh5"""
def __init__(self, file_name: str = ""):
super().__init__(file_name)
# this specialized reader implements reading capabilities for the following formats
def __init__(self, file_path: str = ""):
super().__init__(file_path)
self.prfx = None
self.tmp = {}
self.supported_version = {}
self.version = {}
self.init_support()
self.supported = False
self.check_if_supported()

def init_support(self):
"""Init supported versions."""
self.supported_version["tech_partner"] = ["EDAX, LLC"]
self.supported_version["schema_name"] = ["EDAXH5"]
self.supported_version["schema_version"] = ["2.5.1001.0001"]
self.supported_version["writer_name"] = ["APEX"]
self.supported_version["writer_version"] = ["2.5.1001.0001"]
self.supported = True
# check if instance to process matches any of these constraints
h5r = h5py.File(self.file_name, "r")
# parse Company and PRODUCT_VERSION attribute values from the first group below / but these are not scalar but single value lists
# so much about interoperability
# but hehe for the APEX example from Sebastian and Sabine there is again no Company but PRODUCT_VERSION, 2 files, 2 "formats"
if "/Manufacturer" in h5r:
self.version["tech_partner"] \
= super().read_strings_from_dataset(h5r["/Manufacturer"][()])
if self.version["tech_partner"] not in self.supported_version["tech_partner"]:
# print(f"{self.version['tech_partner']} is not {self.supported_version['tech_partner']} !")
self.supported = False

def check_if_supported(self):
"""Check if instance matches all constraints to qualify as supported H5OINA"""
self.supported = True # try to falsify
with h5py.File(self.file_path, "r") as h5r:
# parse Company and PRODUCT_VERSION attribute values from the first group below / but these are not scalar but single value lists
# so much about interoperability
# but hehe for the APEX example from Sebastian and Sabine there is again no Company but PRODUCT_VERSION, 2 files, 2 "formats"
grp_names = list(h5r["/"])
if len(grp_names) == 1:
if read_strings_from_dataset(h5r[grp_names[0]].attrs["Company"][0]) \
not in self.supported_version["tech_partner"]:
self.supported = False
if read_strings_from_dataset(h5r[grp_names[0]].attrs["PRODUCT_VERSION"][0]) \
not in self.supported_version["schema_version"]:
self.supported = False
if self.supported is True:
self.version = self.supported_version.copy()

def parse_and_normalize(self):
"""Read and normalize away EDAX/APEX-specific formatting with an equivalent in NXem."""
with h5py.File(f"{self.file_path}", "r") as h5r:
cache_id = 0
grp_nms = list(h5r["/"])
for grp_nm in grp_nms:
sub_grp_nms = list(h5r[grp_nm])
for sub_grp_nm in sub_grp_nms:
sub_sub_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}"])
for sub_sub_grp_nm in sub_sub_grp_nms:
if sub_sub_grp_nm.startswith("Area"):
area_grp_nms = list(h5r[f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}"])
for area_grp_nm in area_grp_nms:
if area_grp_nm.startswith("OIM Map"):
self.prfx = f"/{grp_nm}/{sub_grp_nm}/{sub_sub_grp_nm}/{area_grp_nm}"
print(f"Parsing {self.prfx}")
ckey = self.init_named_cache(f"ebsd{cache_id}")
self.parse_and_normalize_group_ebsd_header(h5r, ckey)
self.parse_and_normalize_group_ebsd_phases(h5r, ckey)
self.parse_and_normalize_group_ebsd_data(h5r, ckey)
cache_id += 1

def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
# no official documentation yet from EDAX/APEX, deeply nested, chunking, virtual ds
if f"{self.prfx}/EBSD/ANG/DATA/DATA" not in fp:
raise ValueError(f"Unable to parse {self.prfx}/EBSD/ANG/DATA/DATA !")

grid_type = None
# for a regular tiling of R^2 with perfect hexagons
n_pts = 0
# their vertical center of mass distance is smaller than the horizontal
# center of mass distance (x cols, y rows)
req_fields = ["Grid Type",
"Step X", "Step Y",
"Number Of Rows", "Number Of Columns"]
for req_field in req_fields:
if f"{self.prfx}/Sample/{req_field}" not in fp:
raise ValueError(f"Unable to parse {self.prfx}/Sample/{req_field} !")

grid_type = read_strings_from_dataset(fp[f"{self.prfx}/Sample/Grid Type"][()])
if grid_type != "HexGrid":
raise ValueError(f"Grid Type {grid_type} is currently not supported !")
self.tmp[ckey]["s_x"] = fp[f"{self.prfx}/Sample/Step X"][0]
self.tmp[ckey]["s_unit"] = "µm" # TODO::always micron?
self.tmp[ckey]["n_x"] = fp[f"{self.prfx}/Sample/Number Of Columns"][0]
self.tmp[ckey]["s_y"] = fp[f"{self.prfx}/Sample/Step Y"][0]
self.tmp[ckey]["n_y"] = fp[f"{self.prfx}/Sample/Number Of Rows"][0]

def parse_and_normalize_group_ebsd_phases(self, fp, ckey: str):
grp_name = f"{self.prfx}/EBSD/ANG/HEADER/Phase"
# Phases, contains a subgroup for each phase where the name
# of each subgroup is the index of the phase starting at 1.
if f"{grp_name}" in fp:
phase_ids = sorted(list(fp[f"{grp_name}"]), key=int)
self.tmp[ckey]["phase"] = []
self.tmp[ckey]["space_group"] = []
self.tmp[ckey]["phases"] = {}
for phase_id in phase_ids:
if phase_id.isdigit() is True:
self.tmp[ckey]["phases"][int(phase_id)] = {}
sub_grp_name = f"{grp_name}/{phase_id}"
# Name
if f"{sub_grp_name}/Material Name" in fp:
phase_name = read_strings_from_dataset(fp[f"{sub_grp_name}/Material Name"][0])
self.tmp[ckey]["phases"][int(phase_id)]["phase_name"] = phase_name
else:
raise ValueError(f"Unable to parse {sub_grp_name}/Material Name !")

# Reference not available only Info but this can be empty
self.tmp[ckey]["phases"][int(phase_id)]["reference"] = "n/a"

req_fields = ["A", "B", "C", "Alpha", "Beta", "Gamma"]
for req_field in req_fields:
if f"{sub_grp_name}/Lattice Constant {req_field}" not in fp:
raise ValueError(f"Unable to parse ../Lattice Constant {req_field} !")
a_b_c = [fp[f"{sub_grp_name}/Lattice Constant A"][0],
fp[f"{sub_grp_name}/Lattice Constant B"][0],
fp[f"{sub_grp_name}/Lattice Constant C"][0]]
angles = [fp[f"{sub_grp_name}/Lattice Constant Alpha"][0],
fp[f"{sub_grp_name}/Lattice Constant Beta"][0],
fp[f"{sub_grp_name}/Lattice Constant Gamma"][0]]
self.tmp[ckey]["phases"][int(phase_id)]["a_b_c"] \
= np.asarray(a_b_c, np.float32) * 0.1
self.tmp[ckey]["phases"][int(phase_id)]["alpha_beta_gamma"] \
= np.asarray(angles, np.float32)

# Space Group not stored, only laue group, point group and symmetry
# problematic because mapping is not bijective!
# if you know the space group we know laue and point group and symmetry
# but the opposite direction leaves room for ambiguities
space_group = "n/a"
self.tmp[ckey]["phases"][int(phase_id)]["space_group"] = space_group

if len(self.tmp[ckey]["space_group"]) > 0:
self.tmp[ckey]["space_group"].append(space_group)
else:
self.tmp[ckey]["space_group"] = [space_group]

if len(self.tmp[ckey]["phase"]) > 0:
self.tmp[ckey]["phase"].append(
Structure(title=phase_name, atoms=None,
lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
angles[0], angles[1], angles[2])))
else:
self.tmp[ckey]["phase"] \
= [Structure(title=phase_name, atoms=None,
lattice=Lattice(a_b_c[0], a_b_c[1], a_b_c[2],
angles[0], angles[1], angles[2]))]
else:
self.supported = False
if "/Version" in h5r:
self.version["schema_version"] \
= super().read_strings_from_dataset(h5r["/Version"][()])
if self.version["schema_version"] not in self.supported_version["schema_version"]:
# print(f"{self.version['schema_version']} is not any of {self.supported_version['schema_version']} !")
self.supported = False
raise ValueError(f"Unable to parse {grp_name} !")

def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
grp_name = f"{self.prfx}/EBSD/ANG/DATA/DATA"
n_pts = self.tmp[ckey]["n_x"] * self.tmp[ckey]["n_y"]
if f"{grp_name}" in fp:
if np.shape(fp[f"{grp_name}"]) != (n_pts,) and n_pts > 0:
raise ValueError(f"Unexpected shape of {grp_name} !")

dat = fp[f"{grp_name}"]
self.tmp[ckey]["euler"] = np.zeros((n_pts, 3), np.float32)
# index of phase, 0 if not indexed
# # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
self.tmp[ckey]["phase_id"] = np.zeros((n_pts,), np.int32)
self.tmp[ckey]["ci"] = np.zeros((n_pts,), np.float32)

for i in np.arange(0, n_pts):
# check shape of internal virtual chunked number array
r = Rotation.from_matrix([np.reshape(dat[i][0], (3, 3))])
self.tmp[ckey]["euler"][i, :] = r.to_euler(degrees=False)
self.tmp[ckey]["phase_id"][i] = dat[i][2]
self.tmp[ckey]["ci"][i] = dat[i][3]

# TODO::convert orientation matrix to Euler angles via om_eu but what are conventions !
# orix based transformation ends up in positive half space and with degrees=False
# as radiants but the from_matrix command above might miss one rotation

# inconsistency f32 in file although specification states float
# Rotation.from_euler(euler=fp[f"{grp_name}/Euler"],
# direction='lab2crystal',
# degrees=is_degrees)

# compute explicit hexagon grid cells center of mass pixel positions
# TODO::currently assuming HexGrid
self.tmp[ckey]["scan_point_x"] = np.asarray(
np.linspace(0, self.tmp[ckey]["n_x"] - 1,
num=self.tmp[ckey]["n_x"],
endpoint=True) * self.tmp[ckey]["s_x"] + 0., np.float32)

self.tmp[ckey]["scan_point_y"] = np.asarray(
np.linspace(0, self.tmp[ckey]["n_y"] - 1,
num=self.tmp[ckey]["n_y"],
endpoint=True) * self.tmp[ckey]["s_y"] + 0., np.float32)
else:
self.supported = False
h5r.close()

if self.supported is True:
# print(f"Reading {self.file_name} is supported")
self.version["schema_name"] = self.supported_version["schema_name"]
self.version["writer_name"] = self.supported_version["writer_name"]
self.version["writer_version"] = self.supported_version["writer_version"]
# print(f"{self.version['schema_name']}, {self.supported_version['schema_version']}, {self.supported_version['writer_name']}, {self.supported_version['writer_version']}")
# else:
# print(f"Reading {self.file_name} is not supported!")
raise ValueError(f"Unable to parse {grp_name} !")
Loading

0 comments on commit 9c708da

Please sign in to comment.