From e16d8eae396c1dc6ac28e72f78e093913ec87ffa Mon Sep 17 00:00:00 2001 From: mkuehbach Date: Tue, 7 Nov 2023 17:34:04 +0100 Subject: [PATCH] Tested auto-detection of relevant group to harvest 3D EBSD data from and ran successfully on all datasets, 3/13 contained relevant content --- pynxtools/dataconverter/readers/em/reader.py | 2 +- .../em/subparsers/hfive_dreamthreed.py | 80 +++++++++++++++++-- test.ebsd3d_hdf5.sh | 15 +++- 3 files changed, 87 insertions(+), 10 deletions(-) diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py index 7f2d15aaa..57e5acfc4 100644 --- a/pynxtools/dataconverter/readers/em/reader.py +++ b/pynxtools/dataconverter/readers/em/reader.py @@ -130,7 +130,7 @@ def read(self, sub_parser = "nxs_hfive" subparser = NxEmNxsHfiveSubParser(entry_id, file_paths[0]) subparser.parse(template) - # exit(1) + exit(1) # for dat_instance in case.dat_parser_type: # print(f"Process pieces of information in {dat_instance} tech partner file...") diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py index 3eda21871..b2cc382e1 100644 --- a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py +++ b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py @@ -115,47 +115,107 @@ def check_if_supported(self): with h5py.File(self.file_path, "r") as h5r: if len(h5r["/"].attrs.keys()) < 2: self.supported = False - print("Not enough attrs") return req_fields = ["DREAM3D Version", "FileVersion"] for req_field in req_fields: if f"{req_field}" not in h5r["/"].attrs.keys(): self.supported = False - print(f"{req_field} not proper!") return - print(read_strings_from_dataset(h5r["/"].attrs["DREAM3D Version"])) if read_strings_from_dataset(h5r["/"].attrs["DREAM3D Version"]) in self.supported_version["writer_version"]: self.supported += 1 - print(read_strings_from_dataset(h5r["/"].attrs["FileVersion"])) if read_strings_from_dataset(h5r["/"].attrs["FileVersion"]) in self.supported_version["schema_version"]: self.supported += 1 - print(f"{self.supported}") if self.supported == 2: self.supported = True self.version = self.supported_version.copy() else: - print("Some other!") self.supported = False def search_normalizable_content(self): """Check if that highly customizable DREAM3D file has here supported content.""" super().open() super().get_content() - super().report_content() + # super().report_content() super().close() # the logic to find if there is at all a 3D EBSD reconstruction in it # search for a node: + target_path = [] # named _SIMPL_GEOMETRY + candidate_paths = [] + for hdf_node_path in self.datasets.keys(): + idx = hdf_node_path.find("/_SIMPL_GEOMETRY") + if idx > -1: + candidate_paths.append((hdf_node_path, idx)) # which has childs "DIMENSIONS, ORIGIN, SPACING" + for path_idx in candidate_paths: + head = path_idx[0][0:path_idx[1]] + tail = path_idx[0][path_idx[1]:] + found = 0 + req_fields = ["DIMENSIONS", "ORIGIN", "SPACING"] + for req_field in req_fields: + if f"{head}/_SIMPL_GEOMETRY/{req_field}" in self.datasets.keys(): + found += 1 + if found == 3: + target_path.append(head) + break + del candidate_paths # if only one such node found parse only if + if len(target_path) != 1: + return + else: + target_path = target_path[0] # that node has one sibling node called CellData - # which has a group of named EulerAngles shape 4d, (i, j, k, 1) + + found = 0 + i_j_k = (None, None, None) + group_name = None + for entry in self.datasets.keys(): + if entry.startswith(f"{target_path}") is True and entry.endswith(f"EulerAngles") is True: + group_name = entry[0:-12] # removing the trailing fwslash + # which has a dset of named EulerAngles shape 4d, (i, j, k, 1) + + shp = self.datasets[entry][2] + if isinstance(shp, tuple) and len(shp) == 4: + if shp[3] == 3: + i_j_k = (shp[0], shp[1], shp[2]) + found += 1 + break + if group_name is None: + return # which has a dset named BC or CI or MAD shape 4d (i, j, k, 1) + + one_key_required = ["BC", "Band Contrast", "CI", "Confidence Index", "MAD"] + for key in one_key_required: + if f"{group_name}/{key}" in self.datasets.keys(): + shp = self.datasets[f"{group_name}/{key}"][2] + if isinstance(shp, tuple) and len(shp) == 4: + if (shp[0], shp[1], shp[2]) == i_j_k: + found += 1 + break # which has a dset named Phases shape 4d (i, j, k, 1) + + if f"{group_name}/Phases" in self.datasets.keys(): + shp = self.datasets[f"{group_name}/Phases"][2] + if isinstance(shp, tuple) and len(shp) == 4: + if (shp[0], shp[1], shp[2]) == i_j_k: + found += 1 # that node has one sibling node called Phase Data + if found != 3: + return # which has a dset named CrystalStructures, LatticeConstants, MaterialName + req_fields = ["CrystalStructures", "LatticeConstants", "MaterialName"] + found = 0 + possible_locs = ["Phase Data", "CellEnsembleData"] + # TODO::these group names were found in the examples but likely they can be changed depending on how the filters are set + for req_field in req_fields: + for loc in possible_locs: + if f"{target_path}/{loc}/{req_field}" in self.datasets.keys(): # (which should also have specific shape) + found += 1 + if found != 3: + print(f"Relevant 3D EBSD content found") + print(f"{target_path}") + print(f"{group_name}") + return + print(f"No relevant 3D EBSD content found!") + # but see if that logic does not also check the shape and numerical content # there are still possibilities where this logic fails to detect a concept # reliably, this shows clearly that documenting and offering versioned description @@ -164,6 +224,10 @@ def search_normalizable_content(self): # is understood before being normalized so that results in the RDMS are really # useful and comparable + # this is one approach how to find relevant groups + # another would be to interpret really the filters applied and hunt + # for the output within the parameters of a specific filter + def parse_and_normalize(self): """Read and normalize away community-specific formatting with an equivalent in NXem.""" self.search_normalizable_content() diff --git a/test.ebsd3d_hdf5.sh b/test.ebsd3d_hdf5.sh index 9d72ae40d..ad55a2290 100755 --- a/test.ebsd3d_hdf5.sh +++ b/test.ebsd3d_hdf5.sh @@ -2,7 +2,20 @@ Examples="067_0003 177_0004 177_0005 177_0006 177_0007 177_0008 177_0009 226_0010 226_0011 226_0012 226_0013 244_0014 SmallIN100_Final" -Examples="SmallIN100_Final" +# skip +# 177_0007 as it is one of the weird examples where the h5py library cannot traverse the content... let's not follow-up on this rabbit hole right now +# 177_0004 has only vertices +# 177_0005 has only edges +# 177_0006 has only surface facets +# 177_0008 out because old 6.0 format which does not store DIMENSIONS, ORIGIN, SHAPE under _SIMPL yet +# 177_0009 follows the new structure but has no EulerAngles only Phases thus without following with yet another logic the source for the +# respective filter we have no chance to find the orientation data +# 226_0010 and _0011 are out because they do have only plain images (backscattered electron likely) +# 226_0013 is out because it has only plain optical image data no EBSD +# 244_0014 is out because it does not have any quantity whereby to generate a band contrast, confidence index, or mad on to generate a default plot + + +# Examples="SmallIN100_Final" for example in $Examples; do echo $example dataconverter --reader em --nxdl NXroot --input-file $example.dream3d --output debug.$example.dream3d.nxs 1>stdout.$example.dream3d.nxs.txt 2>stderr.$example.dream3d.nxs.txt