Skip to content

Commit

Permalink
Started the refactoring to discretize always all point cloud data whi…
Browse files Browse the repository at this point in the history
…ch are not collected on a square grid that is smaller than the maximum possible extent supported by h5web, tested with use case 207_2081.edaxh5 resulting ROI map is a square likely due to improper handling of HexGrid, next steps: i) fix this bug for 207_2081, ii) replace xmap in ebsd map twod by discretized grid, iii) test with examples from all other tech partners, iv) run against all datasets
  • Loading branch information
atomprobe-tc committed Dec 6, 2023
1 parent 71a6a56 commit 97fcaa1
Show file tree
Hide file tree
Showing 11 changed files with 391 additions and 71 deletions.
3 changes: 3 additions & 0 deletions pynxtools/dataconverter/readers/em/examples/ebsd_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@
# is recoverable when there is no common agreement about the phases used and their
# exact atomic configuration

HEXAGONAL_GRID = "hexagonal_grid"
SQUARE_GRID = "square_grid"


FreeTextToUniquePhase = {"Actinolite": "Actinolite",
"al": "Al",
Expand Down
61 changes: 47 additions & 14 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
read_strings_from_dataset
from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
ASSUME_PHASE_NAME_TO_SPACE_GROUP
ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
get_scan_point_coords


class HdfFiveEdaxApexReader(HdfFiveBaseParser):
Expand Down Expand Up @@ -106,7 +108,6 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
if f"{self.prfx}/EBSD/ANG/DATA/DATA" not in fp:
raise ValueError(f"Unable to parse {self.prfx}/EBSD/ANG/DATA/DATA !")

grid_type = None
# for a regular tiling of R^2 with perfect hexagons
n_pts = 0
# their vertical center of mass distance is smaller than the horizontal
Expand All @@ -118,10 +119,14 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
if f"{self.prfx}/Sample/{req_field}" not in fp:
raise ValueError(f"Unable to parse {self.prfx}/Sample/{req_field} !")

self.tmp[ckey]["dimensionality"] = 2
grid_type = read_strings_from_dataset(fp[f"{self.prfx}/Sample/Grid Type"][()])
if grid_type not in ["HexGrid", "SqrGrid"]:
raise ValueError(f"Grid Type {grid_type} is currently not supported !")
self.tmp[ckey]["grid_type"] = grid_type
if grid_type == "HexGrid":
self.tmp[ckey]["grid_type"] = HEXAGONAL_GRID
elif grid_type == "SqrGrid":
self.tmp[ckey]["grid_type"] = SQUARE_GRID
else:
raise ValueError(f"Unable to parse {self.prfx}/Sample/Grid Type !")
self.tmp[ckey]["s_x"] = fp[f"{self.prfx}/Sample/Step X"][0]
self.tmp[ckey]["s_unit"] = "um" # "µm" # TODO::always micron?
self.tmp[ckey]["n_x"] = fp[f"{self.prfx}/Sample/Number Of Columns"][0]
Expand Down Expand Up @@ -226,12 +231,40 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
# TODO::currently assuming s_x and s_y are already the correct center of mass
# distances for hexagonal or square tiling of R^2
# self.tmp[ckey]["grid_type"] in ["HexGrid", "SqrGrid"]:
self.tmp[ckey]["scan_point_x"] = np.asarray(
np.linspace(0, self.tmp[ckey]["n_x"] - 1,
num=self.tmp[ckey]["n_x"],
endpoint=True) * self.tmp[ckey]["s_x"], np.float32)

self.tmp[ckey]["scan_point_y"] = np.asarray(
np.linspace(0, self.tmp[ckey]["n_y"] - 1,
num=self.tmp[ckey]["n_y"],
endpoint=True) * self.tmp[ckey]["s_y"], np.float32)
# if just SQUARE_GRID there is no point to explicitly compute the scan_point
# coordinates here (for every subparser) especially not when the respective
# quantity from the tech partner is just a pixel index i.e. zeroth, first px ...
# however, ideally the tech partners would use the scan_point fields to report
# calibrated absolute scan point positions in the local reference frame of the
# sample surface in which case these could indeed not just scaled positions
# having the correct x and y spacing but eventually even the absolute coordinate
# where the scan was performed on the sample surface whereby one could conclude
# more precisely where the scanned area was located, in practice though this precision
# is usually not needed because scientists assume that the ROI is representative for
# the material which they typically never scan (time, interest, costs, instrument
# availability) completely!
if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
print(f"WARNING: {self.tmp[ckey]['grid_type']}: check carefully the " \
f"correct interpretation of scan_point coords!")
# the case of EDAX APEX shows the key problem with implicit assumptions
# edaxh5 file not necessarily store the scan_point_{dim} positions
# therefore the following code is deprecated as the axes coordinates anyway
# have to be recomputed based on whether results are rediscretized on a coarser
# grid or not !
# mind also that the code below anyway would give only the NeXus dim axis but
# not the array of pairs of x, y coordinates for each scan point
# TODO::also keep in mind that the order in which the scan points are stored
# i.e. which index on self.tmp[ckey]["euler"] belongs to which scan point
# depends not only on the scan grid but also the flight plan i.e. how the grid
# gets visited
# only because of the fact that in most cases people seem to accept that
# scanning snake like first a line along +x and then +y meandering over the
# scan area from the top left corner to the bottom right corner is JUST an
# assumption for a random or dynamically adaptive scan strategy the scan positions
# have to be reported anyway, TODO::tech partners should be convinced to export
# scaled and calibrated scan positions as they are not necessarily redundant information
# that can be stripped to improve performance of their commercial product, I mean
# we talk typically <5k pattern per second demanding to store 5k * 2 * 8B, indeed
# this is the non-harmonized content one is facing in the field of EBSD despite
# almost two decades of commercialization of the technique now
get_scan_point_coords(self.tmp[ckey])
11 changes: 10 additions & 1 deletion pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
ASSUME_PHASE_NAME_TO_SPACE_GROUP
ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID


class HdfFiveBrukerEspritReader(HdfFiveBaseParser):
Expand Down Expand Up @@ -107,6 +107,12 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
if f"{grp_name}" not in fp:
raise ValueError(f"Unable to parse {grp_name} !")

self.tmp[ckey]["dimensionality"] = 2 # TODO::QUBE can also yield 3D datasets
if read_strings_from_dataset(fp[f"{grp_name}/Grid Type"]) == "isometric":
self.tmp[ckey]["grid_type"] = SQUARE_GRID
else:
raise ValueError(f"Unable to parse {grp_name}/Grid Type !")

req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
for req_field in req_fields:
if f"{grp_name}/{req_field}" not in fp:
Expand Down Expand Up @@ -221,6 +227,9 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
# there is X SAMPLE and Y SAMPLE but these are not defined somewhere instead
# here adding x and y assuming that we scan first lines along positive x and then
# moving downwards along +y
# TODO::calculation below x/y only valid if self.tmp[ckey]["grid_type"] == SQUARE_GRID
if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
self.tmp[ckey]["scan_point_x"] \
= np.asarray(np.tile(np.linspace(0.,
self.tmp[ckey]["n_x"] - 1.,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
ASSUME_PHASE_NAME_TO_SPACE_GROUP
ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID

# DREAM3D implements essentially a data analysis workflow with individual steps
# in the DREAM3D jargon each step is referred to as a filter, filters have well-defined
Expand Down Expand Up @@ -312,6 +312,10 @@ def parse_and_normalize_ebsd_header(self, ckey: str):
spc = h5r[f"{self.path_registry['group_geometry']}" \
f"/_SIMPL_GEOMETRY/SPACING"][:].flatten()
idx = 0

# TODO::is it correct an assumption that DREAM3D regrids using square voxel
self.tmp[ckey]["dimensionality"] = 3
self.tmp[ckey]["grid_type"] = SQUARE_GRID
for dim in ["x", "y", "z"]:
self.tmp[ckey][f"n_{dim}"] = dims[idx]
self.tmp[ckey][f"s_{dim}"] = spc[idx]
Expand Down Expand Up @@ -388,6 +392,8 @@ def parse_and_normalize_ebsd_data(self, ckey: str):
# in effect, the phase_id == 0 rightly so marks position indexed with the null-model

# normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
for dim in ["x", "y", "z"]:
self.tmp[ckey][f"scan_point_{dim}"] \
= np.asarray(np.linspace(0, self.tmp[ckey][f"n_{dim}"] - 1,
Expand Down
25 changes: 12 additions & 13 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
ASSUME_PHASE_NAME_TO_SPACE_GROUP
ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID


class HdfFiveCommunityReader(HdfFiveBaseParser):
Expand Down Expand Up @@ -108,6 +108,12 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
if f"{grp_name}" not in fp:
raise ValueError(f"Unable to parse {grp_name} !")

self.tmp[ckey]["dimensionality"] = 2
if read_strings_from_dataset(fp[f"{grp_name}/Grid Type"][()]) == "isometric":
self.tmp[ckey]["grid_type"] = SQUARE_GRID
else:
raise ValueError(f"Unable to parse {grp_name}/Grid Type !")

req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
for req_field in req_fields:
if f"{grp_name}/{req_field}" not in fp:
Expand Down Expand Up @@ -223,7 +229,10 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
# X and Y
# there exist X SAMPLE and Y SAMPLE which give indeed calibrated coordinates
# relative to the sample coordinate system, ignore this for now an
# and TOD::just calibrate on image dimension
# and TODO::just calibrate on image dimension
# TODO::calculation below x/y only valid if self.tmp[ckey]["grid_type"] == SQUARE_GRID
if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
self.tmp[ckey]["scan_point_x"] \
= np.asarray(np.tile(np.linspace(0.,
self.tmp[ckey]["n_x"] - 1.,
Expand All @@ -236,17 +245,7 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
num=self.tmp[ckey]["n_y"],
endpoint=True) * self.tmp[ckey]["s_y"],
self.tmp[ckey]["n_x"]), np.float32)

# if np.shape(fp[f"{grp_name}/X SAMPLE"][:])[0] == n_pts:
# self.tmp[ckey]["scan_point_x"] \
# = np.asarray(fp[f"{grp_name}/X SAMPLE"][:], np.float32)
# else:
# raise ValueError(f"{grp_name}/X SAMPLE has unexpected shape !")
# if np.shape(fp[f"{grp_name}/Y SAMPLE"][:])[0] == n_pts:
# self.tmp[ckey]["scan_point_y"] \
# = np.asarray(fp[f"{grp_name}/Y SAMPLE"], np.float32)
# else:
# raise ValueError(f"{grp_name}/Y SAMPLE has unexpected shape !")
# X SAMPLE and Y SAMPLE seem to be something different!

# Band Contrast is not stored in Bruker but Radon Quality or MAD
# but this is s.th. different as it is the mean angular deviation between
Expand Down
21 changes: 12 additions & 9 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from pynxtools.dataconverter.readers.em.utils.hfive_utils import EULER_SPACE_SYMMETRY, \
read_strings_from_dataset, read_first_scalar, format_euler_parameterization
from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
ASSUME_PHASE_NAME_TO_SPACE_GROUP
ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID


class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
Expand Down Expand Up @@ -110,17 +110,20 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
if f"{grp_name}" not in fp:
raise ValueError(f"Unable to parse {grp_name} !")

grid_type = None
n_pts = 0
req_fields = ["Grid Type", "Step X", "Step Y", "nColumns", "nRows"]
for req_field in req_fields:
if f"{grp_name}/{req_field}" not in fp:
raise ValueError(f"Unable to parse {grp_name}/{req_field} !")

self.tmp[ckey]["dimensionality"] = 2
grid_type = read_strings_from_dataset(fp[f"{grp_name}/Grid Type"][()])
if grid_type not in ["HexGrid", "SqrGrid"]:
raise ValueError(f"Grid Type {grid_type} is currently not supported !")
self.tmp[ckey]["grid_type"] = grid_type
if grid_type == "HexGrid":
self.tmp[ckey]["grid_type"] = HEXAGONAL_GRID
elif grid_type == "SqrGrid":
self.tmp[ckey]["grid_type"] = SQUARE_GRID
else:
raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
self.tmp[ckey]["s_x"] = read_first_scalar(fp[f"{grp_name}/Step X"])
self.tmp[ckey]["s_unit"] = "um" # "µm" # TODO::always micron?
self.tmp[ckey]["n_x"] = read_first_scalar(fp[f"{grp_name}/nColumns"])
Expand Down Expand Up @@ -248,17 +251,17 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
# as the step size has already been accounted for by the tech partner when writing!
if self.version["schema_version"] in ["OIM Analysis 8.5.1002 x64 [07-17-20]"]:
print(f"{self.version['schema_version']}, tech partner accounted for calibration")
if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
self.tmp[ckey]["scan_point_x"] \
= np.asarray(fp[f"{grp_name}/X Position"][:], np.float32)
self.tmp[ckey]["scan_point_y"] \
= np.asarray(fp[f"{grp_name}/Y Position"][:], np.float32)
else:
print(f"{self.version['schema_version']}, parser has to do the calibration")
if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
self.tmp[ckey]["scan_point_x"] = np.asarray(
fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"], np.float32)
self.tmp[ckey]["scan_point_y"] = np.asarray(
fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"], np.float32)
print(f"xmin {np.min(self.tmp[ckey]['scan_point_x'])}," \
f"xmax {np.max(self.tmp[ckey]['scan_point_x'])}," \
f"ymin {np.min(self.tmp[ckey]['scan_point_y'])}," \
f"ymax {np.max(self.tmp[ckey]['scan_point_y'])}")
8 changes: 8 additions & 0 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
from pynxtools.dataconverter.readers.em.subparsers.hfive_base import HdfFiveBaseParser
from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
read_strings_from_dataset, format_euler_parameterization
from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
HEXAGONAL_GRID, SQUARE_GRID


class HdfFiveOxfordReader(HdfFiveBaseParser):
Expand Down Expand Up @@ -118,6 +120,10 @@ def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str):
if f"{grp_name}" not in fp:
raise ValueError(f"Unable to parse {grp_name} !")

# TODO::check if Oxford always uses SquareGrid like assumed here
self.tmp[ckey]["dimensionality"] = 2
self.tmp[ckey]["grid_type"] = SQUARE_GRID

req_fields = ["X Cells", "Y Cells", "X Step", "Y Step"]
for req_field in req_fields:
if f"{grp_name}/{req_field}" not in fp:
Expand Down Expand Up @@ -231,6 +237,8 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
# expected is order on x is first all possible x values while y == 0
# followed by as many copies of this linear sequence for each y increment
# no action needed Oxford reports already the pixel coordinate multiplied by step
if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
# X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
self.tmp[ckey]["scan_point_x"] = np.asarray(fp[f"{grp_name}/X"], np.float32)
# inconsistency f32 in file although specification states float
Expand Down
Loading

0 comments on commit 97fcaa1

Please sign in to comment.