Skip to content

Commit

Permalink
Tested auto-detection of relevant group to harvest 3D EBSD data from …
Browse files Browse the repository at this point in the history
…and ran successfully on all datasets, 3/13 contained relevant content
  • Loading branch information
mkuehbach committed Nov 7, 2023
1 parent acf860d commit e16d8ea
Show file tree
Hide file tree
Showing 3 changed files with 87 additions and 10 deletions.
2 changes: 1 addition & 1 deletion pynxtools/dataconverter/readers/em/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ def read(self,
sub_parser = "nxs_hfive"
subparser = NxEmNxsHfiveSubParser(entry_id, file_paths[0])
subparser.parse(template)
# exit(1)
exit(1)

# for dat_instance in case.dat_parser_type:
# print(f"Process pieces of information in {dat_instance} tech partner file...")
Expand Down
80 changes: 72 additions & 8 deletions pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,47 +115,107 @@ def check_if_supported(self):
with h5py.File(self.file_path, "r") as h5r:
if len(h5r["/"].attrs.keys()) < 2:
self.supported = False
print("Not enough attrs")
return
req_fields = ["DREAM3D Version", "FileVersion"]
for req_field in req_fields:
if f"{req_field}" not in h5r["/"].attrs.keys():
self.supported = False
print(f"{req_field} not proper!")
return
print(read_strings_from_dataset(h5r["/"].attrs["DREAM3D Version"]))
if read_strings_from_dataset(h5r["/"].attrs["DREAM3D Version"]) in self.supported_version["writer_version"]:
self.supported += 1
print(read_strings_from_dataset(h5r["/"].attrs["FileVersion"]))
if read_strings_from_dataset(h5r["/"].attrs["FileVersion"]) in self.supported_version["schema_version"]:
self.supported += 1
print(f"{self.supported}")

if self.supported == 2:
self.supported = True
self.version = self.supported_version.copy()
else:
print("Some other!")
self.supported = False

def search_normalizable_content(self):
"""Check if that highly customizable DREAM3D file has here supported content."""
super().open()
super().get_content()
super().report_content()
# super().report_content()
super().close()
# the logic to find if there is at all a 3D EBSD reconstruction in it
# search for a node:
target_path = []
# named _SIMPL_GEOMETRY
candidate_paths = []
for hdf_node_path in self.datasets.keys():
idx = hdf_node_path.find("/_SIMPL_GEOMETRY")
if idx > -1:
candidate_paths.append((hdf_node_path, idx))
# which has childs "DIMENSIONS, ORIGIN, SPACING"
for path_idx in candidate_paths:
head = path_idx[0][0:path_idx[1]]
tail = path_idx[0][path_idx[1]:]
found = 0
req_fields = ["DIMENSIONS", "ORIGIN", "SPACING"]
for req_field in req_fields:
if f"{head}/_SIMPL_GEOMETRY/{req_field}" in self.datasets.keys():
found += 1
if found == 3:
target_path.append(head)
break
del candidate_paths
# if only one such node found parse only if
if len(target_path) != 1:
return
else:
target_path = target_path[0]
# that node has one sibling node called CellData
# which has a group of named EulerAngles shape 4d, (i, j, k, 1) +
found = 0
i_j_k = (None, None, None)
group_name = None
for entry in self.datasets.keys():
if entry.startswith(f"{target_path}") is True and entry.endswith(f"EulerAngles") is True:
group_name = entry[0:-12] # removing the trailing fwslash
# which has a dset of named EulerAngles shape 4d, (i, j, k, 1) +
shp = self.datasets[entry][2]
if isinstance(shp, tuple) and len(shp) == 4:
if shp[3] == 3:
i_j_k = (shp[0], shp[1], shp[2])
found += 1
break
if group_name is None:
return
# which has a dset named BC or CI or MAD shape 4d (i, j, k, 1) +
one_key_required = ["BC", "Band Contrast", "CI", "Confidence Index", "MAD"]
for key in one_key_required:
if f"{group_name}/{key}" in self.datasets.keys():
shp = self.datasets[f"{group_name}/{key}"][2]
if isinstance(shp, tuple) and len(shp) == 4:
if (shp[0], shp[1], shp[2]) == i_j_k:
found += 1
break
# which has a dset named Phases shape 4d (i, j, k, 1) +
if f"{group_name}/Phases" in self.datasets.keys():
shp = self.datasets[f"{group_name}/Phases"][2]
if isinstance(shp, tuple) and len(shp) == 4:
if (shp[0], shp[1], shp[2]) == i_j_k:
found += 1
# that node has one sibling node called Phase Data
if found != 3:
return
# which has a dset named CrystalStructures, LatticeConstants, MaterialName
req_fields = ["CrystalStructures", "LatticeConstants", "MaterialName"]
found = 0
possible_locs = ["Phase Data", "CellEnsembleData"]
# TODO::these group names were found in the examples but likely they can be changed depending on how the filters are set
for req_field in req_fields:
for loc in possible_locs:
if f"{target_path}/{loc}/{req_field}" in self.datasets.keys():
# (which should also have specific shape)
found += 1
if found != 3:
print(f"Relevant 3D EBSD content found")
print(f"{target_path}")
print(f"{group_name}")
return
print(f"No relevant 3D EBSD content found!")

# but see if that logic does not also check the shape and numerical content
# there are still possibilities where this logic fails to detect a concept
# reliably, this shows clearly that documenting and offering versioned description
Expand All @@ -164,6 +224,10 @@ def search_normalizable_content(self):
# is understood before being normalized so that results in the RDMS are really
# useful and comparable

# this is one approach how to find relevant groups
# another would be to interpret really the filters applied and hunt
# for the output within the parameters of a specific filter

def parse_and_normalize(self):
"""Read and normalize away community-specific formatting with an equivalent in NXem."""
self.search_normalizable_content()
Expand Down
15 changes: 14 additions & 1 deletion test.ebsd3d_hdf5.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,20 @@

Examples="067_0003 177_0004 177_0005 177_0006 177_0007 177_0008 177_0009 226_0010 226_0011 226_0012 226_0013 244_0014 SmallIN100_Final"

Examples="SmallIN100_Final"
# skip
# 177_0007 as it is one of the weird examples where the h5py library cannot traverse the content... let's not follow-up on this rabbit hole right now
# 177_0004 has only vertices
# 177_0005 has only edges
# 177_0006 has only surface facets
# 177_0008 out because old 6.0 format which does not store DIMENSIONS, ORIGIN, SHAPE under _SIMPL yet
# 177_0009 follows the new structure but has no EulerAngles only Phases thus without following with yet another logic the source for the
# respective filter we have no chance to find the orientation data
# 226_0010 and _0011 are out because they do have only plain images (backscattered electron likely)
# 226_0013 is out because it has only plain optical image data no EBSD
# 244_0014 is out because it does not have any quantity whereby to generate a band contrast, confidence index, or mad on to generate a default plot


# Examples="SmallIN100_Final"
for example in $Examples; do
echo $example
dataconverter --reader em --nxdl NXroot --input-file $example.dream3d --output debug.$example.dream3d.nxs 1>stdout.$example.dream3d.nxs.txt 2>stderr.$example.dream3d.nxs.txt
Expand Down

0 comments on commit e16d8ea

Please sign in to comment.