Proliferate tiling and phase_id, scan_point consistency for other par…

…sers
FAIRmat-NFDI · Dec 7, 2023 · 1824163 · 1824163
1 parent c2ce3a0
commit 1824163
Show file tree

Hide file tree

Showing 6 changed files with 65 additions and 32 deletions.
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -39,7 +39,9 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
+from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
+    get_scan_point_coords
 
 
 class HdfFiveBrukerEspritReader(HdfFiveBaseParser):
@@ -112,6 +114,9 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
         else:
             raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
 
         req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
         for req_field in req_fields:
@@ -230,18 +235,19 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # TODO::calculation below x/y only valid if self.tmp[ckey]["grid_type"] == SQUARE_GRID
         if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
             print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
-        self.tmp[ckey]["scan_point_x"] \
-            = np.asarray(np.tile(np.linspace(0.,
-                                             self.tmp[ckey]["n_x"] - 1.,
-                                             num=self.tmp[ckey]["n_x"],
-                                             endpoint=True) * self.tmp[ckey]["s_x"],
-                                             self.tmp[ckey]["n_y"]), np.float32)
-        self.tmp[ckey]["scan_point_y"] \
-            = np.asarray(np.repeat(np.linspace(0.,
-                                               self.tmp[ckey]["n_y"] - 1.,
-                                               num=self.tmp[ckey]["n_y"],
-                                               endpoint=True) * self.tmp[ckey]["s_y"],
-                                               self.tmp[ckey]["n_x"]), np.float32)
+        # self.tmp[ckey]["scan_point_x"] \
+        #     = np.asarray(np.tile(np.linspace(0.,
+        #                                      self.tmp[ckey]["n_x"] - 1.,
+        #                                      num=self.tmp[ckey]["n_x"],
+        #                                      endpoint=True) * self.tmp[ckey]["s_x"],
+        #                                      self.tmp[ckey]["n_y"]), np.float32)
+        # self.tmp[ckey]["scan_point_y"] \
+        #     = np.asarray(np.repeat(np.linspace(0.,
+        #                                        self.tmp[ckey]["n_y"] - 1.,
+        #                                        num=self.tmp[ckey]["n_y"],
+        #                                        endpoint=True) * self.tmp[ckey]["s_y"],
+        #                                        self.tmp[ckey]["n_x"]), np.float32)
+        get_scan_point_coords(self.tmp[ckey])
 
         # Band Contrast is not stored in Bruker but Radon Quality or MAD
         # but this is s.th. different as it is the mean angular deviation between

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -38,7 +38,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 
 # DREAM3D implements essentially a data analysis workflow with individual steps
 # in the DREAM3D jargon each step is referred to as a filter, filters have well-defined
@@ -316,6 +316,9 @@ def parse_and_normalize_ebsd_header(self, ckey: str):
             # TODO::is it correct an assumption that DREAM3D regrids using square voxel
             self.tmp[ckey]["dimensionality"] = 3
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
+            # the next two lines encode the typical assumption that is not reported in tech partner file!
+            self.tmp[ckey]["tiling"] = REGULAR_TILING
+            self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
             for dim in ["x", "y", "z"]:
                 self.tmp[ckey][f"n_{dim}"] = dims[idx]
                 self.tmp[ckey][f"s_{dim}"] = spc[idx]
@@ -394,6 +397,9 @@ def parse_and_normalize_ebsd_data(self, ckey: str):
             # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
             if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
                 print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
+            # TODO::all other hfive parsers normalize scan_point_{dim} arrays into
+            # tiled and repeated coordinate tuples and not like below
+            # only the dimension scale axes values!
             for dim in ["x", "y", "z"]:
                 self.tmp[ckey][f"scan_point_{dim}"] \
                     = np.asarray(np.linspace(0, self.tmp[ckey][f"n_{dim}"] - 1,

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -38,7 +38,9 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     EBSD_MAP_SPACEGROUP, read_strings_from_dataset, all_equal, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
+from pynxtools.dataconverter.readers.em.utils.get_scan_points import \
+    get_scan_point_coords
 
 
 class HdfFiveCommunityReader(HdfFiveBaseParser):
@@ -113,6 +115,9 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
         else:
             raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
 
         req_fields = ["NCOLS", "NROWS", "XSTEP", "YSTEP"]
         for req_field in req_fields:
@@ -233,19 +238,20 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
         # TODO::calculation below x/y only valid if self.tmp[ckey]["grid_type"] == SQUARE_GRID
         if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
             print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
-        self.tmp[ckey]["scan_point_x"] \
-            = np.asarray(np.tile(np.linspace(0.,
-                                             self.tmp[ckey]["n_x"] - 1.,
-                                             num=self.tmp[ckey]["n_x"],
-                                             endpoint=True) * self.tmp[ckey]["s_x"],
-                                             self.tmp[ckey]["n_y"]), np.float32)
-        self.tmp[ckey]["scan_point_y"] \
-            = np.asarray(np.repeat(np.linspace(0.,
-                                               self.tmp[ckey]["n_y"] - 1.,
-                                               num=self.tmp[ckey]["n_y"],
-                                               endpoint=True) * self.tmp[ckey]["s_y"],
-                                               self.tmp[ckey]["n_x"]), np.float32)
+        # self.tmp[ckey]["scan_point_x"] \
+        #     = np.asarray(np.tile(np.linspace(0.,
+        #                                      self.tmp[ckey]["n_x"] - 1.,
+        #                                      num=self.tmp[ckey]["n_x"],
+        #                                      endpoint=True) * self.tmp[ckey]["s_x"],
+        #                                      self.tmp[ckey]["n_y"]), np.float32)
+        # self.tmp[ckey]["scan_point_y"] \
+        #     = np.asarray(np.repeat(np.linspace(0.,
+        #                                        self.tmp[ckey]["n_y"] - 1.,
+        #                                        num=self.tmp[ckey]["n_y"],
+        #                                        endpoint=True) * self.tmp[ckey]["s_y"],
+        #                                        self.tmp[ckey]["n_x"]), np.float32)
         # X SAMPLE and Y SAMPLE seem to be something different!
+        get_scan_point_coords(self.tmp[ckey])
 
         # Band Contrast is not stored in Bruker but Radon Quality or MAD
         # but this is s.th. different as it is the mean angular deviation between
@@ -254,3 +260,6 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
             self.tmp[ckey]["mad"] = np.asarray(fp[f"{grp_name}/MAD"][:], np.float32)
         else:
             raise ValueError(f"{grp_name}/MAD has unexpected shape !")
+
+
+
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -39,7 +39,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import EULER_SPACE_SYMMETRY, \
     read_strings_from_dataset, read_first_scalar, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID
+    ASSUME_PHASE_NAME_TO_SPACE_GROUP, HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 
 
 class HdfFiveEdaxOimAnalysisReader(HdfFiveBaseParser):
@@ -124,6 +124,10 @@ def parse_and_normalize_group_ebsd_header(self, fp, ckey: str):
             self.tmp[ckey]["grid_type"] = SQUARE_GRID
         else:
             raise ValueError(f"Unable to parse {grp_name}/Grid Type !")
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
+
         self.tmp[ckey]["s_x"] = read_first_scalar(fp[f"{grp_name}/Step X"])
         self.tmp[ckey]["s_unit"] = "um"  # "µm"  # TODO::always micron?
         self.tmp[ckey]["n_x"] = read_first_scalar(fp[f"{grp_name}/nColumns"])
@@ -227,7 +231,9 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
 
         # given no official EDAX OimAnalysis spec we cannot define for sure if
         # phase_id == 0 means just all was indexed with the first/zeroth phase or nothing
-        # was indexed, TODO::assuming it means all indexed with first phase:
+        # was indexed, here we assume it means all indexed with first phase
+        # and we assume EDAX uses -1 for notIndexed, this assumption is also
+        # substantiated by the situation in the hfive_apex parser
         if np.all(fp[f"{grp_name}/Phase"][:] == 0):
             self.tmp[ckey]["phase_id"] = np.zeros(n_pts, np.int32) + 1
         else:
@@ -265,3 +271,5 @@ def parse_and_normalize_group_ebsd_data(self, fp, ckey: str):
                     fp[f"{grp_name}/X Position"][:] * self.tmp[ckey]["s_x"], np.float32)
             self.tmp[ckey]["scan_point_y"] = np.asarray(
                     fp[f"{grp_name}/Y Position"][:] * self.tmp[ckey]["s_y"], np.float32)
+        # despite differences in reported calibrations the scan_point_{dim} arrays are
+        # already provided by the tech partner as tile and repeat coordinates
diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -39,7 +39,7 @@
 from pynxtools.dataconverter.readers.em.utils.hfive_utils import \
     read_strings_from_dataset, format_euler_parameterization
 from pynxtools.dataconverter.readers.em.examples.ebsd_database import \
-    HEXAGONAL_GRID, SQUARE_GRID
+    HEXAGONAL_GRID, SQUARE_GRID, REGULAR_TILING, FLIGHT_PLAN
 
 
 class HdfFiveOxfordReader(HdfFiveBaseParser):
@@ -123,6 +123,9 @@ def parse_and_normalize_slice_ebsd_header(self, fp, ckey: str):
         # TODO::check if Oxford always uses SquareGrid like assumed here
         self.tmp[ckey]["dimensionality"] = 2
         self.tmp[ckey]["grid_type"] = SQUARE_GRID
+        # the next two lines encode the typical assumption that is not reported in tech partner file!
+        self.tmp[ckey]["tiling"] = REGULAR_TILING
+        self.tmp[ckey]["flight_plan"] = FLIGHT_PLAN
 
         req_fields = ["X Cells", "Y Cells", "X Step", "Y Step"]
         for req_field in req_fields:
@@ -230,7 +233,7 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         self.tmp[ckey]["euler"] = format_euler_parameterization(self.tmp[ckey]["euler"])
 
         # Phase, yes, H5T_NATIVE_INT32, (size, 1), Index of phase, 0 if not indexed
-        # no normalization needed, also in NXem_ebsd the null model notIndexed is phase_identifier 0
+        # no normalization needed, also in NXem the null model notIndexed is phase_identifier 0
         self.tmp[ckey]["phase_id"] = np.asarray(fp[f"{grp_name}/Phase"], np.int32)
 
         # normalize pixel coordinates to physical positions even though the origin can still dangle somewhere
@@ -240,6 +243,7 @@ def parse_and_normalize_slice_ebsd_data(self, fp, ckey: str):
         if self.tmp[ckey]["grid_type"] != SQUARE_GRID:
             print(f"WARNING: Check carefully correct interpretation of scan_point coords!")
         # X, no, H5T_NATIVE_FLOAT, (size, 1), X position of each pixel in micrometers (origin: top left corner)
+        # for Oxford instrument this is already the required tile and repeated array of shape (size,1)
         self.tmp[ckey]["scan_point_x"] = np.asarray(fp[f"{grp_name}/X"], np.float32)
         # inconsistency f32 in file although specification states float
 

diff --git a/pyxem.batch.sh b/pyxem.batch.sh
@@ -32,7 +32,7 @@ examples="207_2081.edaxh5"
 # examples="229_2097.oh5"
 # examples="067_0003.dream3d SmallIN100_Final.dream3d 244_0014.dream3d"
 # examples="244_0014.dream3d"
-# examples="SmallIN100_Final.dream3d"
+examples="SmallIN100_Final.dream3d"
 # examples="067_0003.dream3d"  # very large 3D EBSD takes ~40GB RAM for processing
 
 for example in $examples; do