diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh index 1d2c5f660..949de3dfb 100755 --- a/debug/spctrscpy.batch.sh +++ b/debug/spctrscpy.batch.sh @@ -2,7 +2,7 @@ datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/" datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/" - +datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/" # apex examples ikz, pdi # examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina" @@ -11,8 +11,11 @@ examples="AlGaO.nxs" examples="GeSi.nxs" examples="GeSn_13.nxs" # examples="VInP_108_L2.h5" +examples="CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd" +examples="CG71113 1138 Ceta 660 mm Camera.emd" +examples="CG71113 1125 Ceta 1.1 Mx Camera.emd" -for example in $examples; do +for example in "$examples"; do echo $example - dataconverter --reader em --nxdl NXroot --input-file $datasource$example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt + dataconverter --reader em --nxdl NXroot --input-file "$datasource$example" --output "debug.$example.nxs" 1>"stdout.$example.nxs.txt" 2>"stderr.$example.nxs.txt" done diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py index c6d97b16a..72c33bd5e 100644 --- a/pynxtools/dataconverter/readers/em/reader.py +++ b/pynxtools/dataconverter/readers/em/reader.py @@ -27,7 +27,7 @@ from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser # from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser # from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser -# from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser +from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper @@ -121,21 +121,23 @@ def read(self, # add further with resolving cases # if file_path is an HDF5 will use hfive parser # sub_parser = "nxs_pyxem" - subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0]) - subparser.parse(template) + # subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0]) + # subparser.parse(template) # TODO::check correct loop through! # sub_parser = "image_tiff" # subparser = NxEmImagesSubParser(entry_id, file_paths[0]) # subparser.parse(template) + # TODO::check correct loop through! # sub_parser = "zipped_nion_project" # subparser = NxEmZippedNionProjectSubParser(entry_id, file_paths[0]) # subparser.parse(template, verbose=True) + # TODO::check correct loop through! # sub_parser = "velox_emd" - # subparser = RsciioVeloxSubParser(entry_id, file_paths[0]) - # subparser.parse(template, verbose=True) + subparser = RsciioVeloxSubParser(entry_id, file_paths[0]) + subparser.parse(template, verbose=True) # for dat_instance in case.dat_parser_type: # print(f"Process pieces of information in {dat_instance} tech partner file...") diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py index 4c310002b..2eabd2ea2 100644 --- a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py +++ b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py @@ -17,10 +17,17 @@ # """(Sub-)parser for reading content from ThermoFisher Velox *.emd (HDF5) via rosettasciio.""" +import flatdict as fd +import numpy as np + from typing import Dict, List from rsciio import emd from pynxtools.dataconverter.readers.em.subparsers.rsciio_base import RsciioBaseParser +from pynxtools.dataconverter.readers.em.utils.rsciio_hyperspy_utils \ + import get_named_axis, get_axes_dims, get_axes_units +from pynxtools.dataconverter.readers.shared.shared_utils \ + import get_sha256_of_file_content class RsciioVeloxSubParser(RsciioBaseParser): @@ -31,8 +38,11 @@ def __init__(self, entry_id: int = 1, file_path: str = ""): self.entry_id = entry_id else: self.entry_id = 1 - self.id_mgn: Dict = {} - self.prfx = None + self.id_mgn: Dict = {"event": 1, + "event_img": 1, + "event_spc": 1, + "roi": 1} + self.file_path_sha256 = None self.tmp: Dict = {} self.supported_version: Dict = {} self.version: Dict = {} @@ -47,11 +57,15 @@ def check_if_supported(self): # only the collection of the concepts without the actual instance data # based on this one could then plan how much memory has to be reserved # in the template and stream out accordingly + with open(self.file_path, "rb", 0) as fp: + self.file_path_sha256 = get_sha256_of_file_content(fp) + + print(f"Parsing {self.file_path} with SHA256 {self.file_path_sha256} ...") self.supported = True except IOError: print(f"Loading {self.file_path} using {self.__name__} is not supported !") - def parse_and_normalize_and_process_into_template(self, template: dict) -> dict: + def parse(self, template: dict, verbose=False) -> dict: """Perform actual parsing filling cache self.tmp.""" if self.supported is True: self.tech_partner_to_nexus_normalization(template) @@ -62,20 +76,196 @@ def parse_and_normalize_and_process_into_template(self, template: dict) -> dict: def tech_partner_to_nexus_normalization(self, template: dict) -> dict: """Translate tech partner concepts to NeXus concepts.""" - self.normalize_bfdf_content(template) # conventional bright/dark field - self.normalize_adf_content(template) # (high-angle) annular dark field - self.normalize_edxs_content(template) # EDS in the TEM - self.normalize_eels_content(template) # electron energy loss spectroscopy + reqs = ["data", "axes", "metadata", "original_metadata", "mapping"] + for idx, obj in enumerate(self.objs): + if not isinstance(obj, dict): + continue + parse = True + for req in reqs: + if req not in obj: + parse = False + if parse == False: + continue + + content_type = self.content_resolver(obj) + print(f"Parsing {idx}-th object in {self.file_path} content type is {content_type}") + if content_type == "imgs": + self.normalize_imgs_content(obj, template) # generic imaging modes + # TODO:: could later make an own one for bright/dark field, but + # currently no distinction in hyperspy + elif content_type == "adf": + self.normalize_adf_content(obj, template) # (high-angle) annular dark field + elif content_type == "diff": # diffraction image in reciprocal space + self.normalize_diff_content(obj, template) # diffraction images + elif content_type == "eds": + self.normalize_eds_content(obj,template) # ED(X)S in the TEM + elif content_type == "eels": + self.normalize_eels_content(obj, template) # electron energy loss spectroscopy + else: # == "n/a" + print(f"WARNING::Unable to resolve content of {idx}-th object in {self.file_path}!") return template - def normalize_bfdf_content(self, template: dict) -> dict: + def content_resolver(self, obj: dict) -> str: + """Try to identify which content the obj describes best.""" + # assume rosettasciio-specific formatting of the emd parser + # i.e. a dictionary with the following keys: + # "data", "axes", "metadata", "original_metadata", "mapping" + meta = fd.FlatDict(obj["metadata"], "/") + orgmeta = fd.FlatDict(obj["original_metadata"], "/") + dims = get_axes_dims(obj["axes"]) + units = get_axes_units(obj["axes"]) + if "General/title" not in meta.keys(): + return "n/a" + if (meta["General/title"] in ("BF")) or (meta["General/title"].startswith("DF")): + # TODO::the problem with using here the explicit name DF4 is that this may only + # work for a particular microscope: + # Core/MetadataDefinitionVersion: 7.9, Core/MetadataSchemaVersion: v1/2013/07 + # Instrument/ControlSoftwareVersion: 1.15.4, Instrument/Manufacturer: FEI Company + # Instrument/InstrumentId: 6338, Instrument/InstrumentModel: Talos F200X + # instead there should be a logic added which resolves which concept + # the data in this obj are best described by when asking a community-wide + # glossary but not the FEI-specific glossary + # all that logic is unneeded and thereby the data more interoperable + # if FEI would harmonize their obvious company metadata standard with the + # electron microscopy community! + return "imgs" + if meta["General/title"] in ("HAADF"): + return "adf" + # all units indicating we are in real or complex i.e. reciprocal space + vote_r_c = [0, 0] # real space, complex space + for unit in units: + if unit.startswith("1 /"): + vote_r_c[1] += 1 + else: + vote_r_c[0] += 1 + if vote_r_c[0] == len(units) and vote_r_c[1] == 0: + return "imgs" + if vote_r_c[0] == 0 and vote_r_c[1] == len(units): + return "diff" + del vote_r_c + return "n/a" + + def normalize_imgs_content(self, obj: dict, template: dict) -> dict: + """Map generic scanned images (e.g. BF/DF) to NeXus.""" + meta = fd.FlatDict(obj["metadata"], "/") + dims = get_axes_dims(obj["axes"]) + trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \ + f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \ + f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]" + template[f"{trg}/PROCESS[process]/source/type"] = "file" + template[f"{trg}/PROCESS[process]/source/path"] = self.file_path + template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256 + template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256" + template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"] + template[f"{trg}/image_twod/@NX_class"] = "NXdata" # TODO::writer should do! + template[f"{trg}/image_twod/@signal"] = "intensity" + template[f"{trg}/image_twod/@axes"] = [] + for dim in dims: + template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}") + template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}]"] \ + = np.uint32(dim[1]) + support, unit = get_named_axis(obj["axes"], dim[0]) + if support is not None and unit is not None: + template[f"{trg}/image_twod/axis_{dim[0]}"] \ + = {"compress": support, "strength": 1} + template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] \ + = f"{dim[0]}-axis position ({unit})" + template[f"{trg}/image_twod/title"] = meta["General/title"] + template[f"{trg}/image_twod/intensity"] \ + = {"compress": np.asarray(obj["data"]), "strength": 1} + # template[f"{trg}/image_twod/intensity/@units"] + # TODO::add metadata + self.id_mgn["event_img"] += 1 + self.id_mgn["event"] += 1 return template - def normalize_adf_content(self, template: dict) -> dict: + def normalize_adf_content(self, obj: dict, template: dict) -> dict: + """Map relevant (high-angle) annular dark field images to NeXus.""" + meta = fd.FlatDict(obj["metadata"], "/") + dims = get_axes_dims(obj["axes"]) + trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \ + f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \ + f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]" + template[f"{trg}/PROCESS[process]/source/type"] = "file" + template[f"{trg}/PROCESS[process]/source/path"] = self.file_path + template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256 + template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256" + template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"] + template[f"{trg}/image_twod/@NX_class"] = "NXdata" # TODO::writer should do! + template[f"{trg}/image_twod/@signal"] = "intensity" + template[f"{trg}/image_twod/@axes"] = [] + for dim in dims: + template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}") + template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}]"] \ + = np.uint32(dim[1]) + support, unit = get_named_axis(obj["axes"], dim[0]) + if support is not None and unit is not None: + template[f"{trg}/image_twod/axis_{dim[0]}"] \ + = {"compress": support, "strength": 1} + template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] \ + = f"{dim[0]}-axis position ({unit})" + template[f"{trg}/image_twod/title"] = meta["General/title"] + template[f"{trg}/image_twod/intensity"] \ + = {"compress": np.asarray(obj["data"]), "strength": 1} + # template[f"{trg}/image_twod/intensity/@units"] + # TODO::coll. angles given in original_metadata map to half_angle_interval + # TODO::add metadata + self.id_mgn["event_img"] += 1 + self.id_mgn["event"] += 1 + return template + + def normalize_diff_content(self, obj: dict, template: dict) -> dict: + """Map relevant diffraction images to NeXus.""" + # TODO::the above-mentioned constraint is not general enough + # this can work only for cases where we know that we not only have a + # Ceta camera but also use it for taking diffraction pattern + # TODO::this is an example that more logic is needed to identify whether + # the information inside obj really has a similarity with the concept of + # somebody having taken a diffraction image + # one can compare the situation with the following: + # assume you wish to take pictures of apples and have an NXapple_picture + # but all you get is an image from a digital camera where the dataset is + # named maybe DCIM, without a logic one cannot make the mapping robustly! + # can one map y, x, on j, i indices + idx_map = {"y": "j", "x": "i"} + dims = get_axes_dims(obj["axes"]) + print(dims) + for dim in dims: + if dim[0] not in idx_map.keys(): + raise ValueError(f"Unable to map index {dim[0]} on something!") + + trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \ + f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \ + f"IMAGE_C_SET[image_c_set{self.id_mgn['event_img']}]" + template[f"{trg}/PROCESS[process]/source/type"] = "file" + template[f"{trg}/PROCESS[process]/source/path"] = self.file_path + template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256 + template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256" + template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"] + template[f"{trg}/image_twod/@NX_class"] = "NXdata" # TODO::writer should do! + template[f"{trg}/image_twod/@signal"] = "magnitude" + template[f"{trg}/image_twod/@axes"] = [] + for dim in dims: + template[f"{trg}/image_twod/@axes"].append(f"axis_{idx_map[dim[0]]}") + template[f"{trg}/image_twod/@AXISNAME_indices[axis_{idx_map[dim[0]]}]"] \ + = np.uint32(dim[1]) + support, unit = get_named_axis(obj["axes"], dim[0]) + if support is not None and unit is not None and unit.startswith("1 /"): + template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}"] \ + = {"compress": support, "strength": 1} + template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}/@long_name"] \ + = f"{idx_map[dim[0]]}-axis position ({unit})" + template[f"{trg}/image_twod/title"] = meta["General/title"] + template[f"{trg}/image_twod/magnitude"] \ + = {"compress": np.asarray(obj["data"]), "strength": 1} + # template[f"{trg}/image_twod/magnitude/@units"] + # TODO::add metadata + self.id_mgn["event_img"] += 1 + self.id_mgn["event"] += 1 return template - def normalize_edxs_content(self, template: dict) -> dict: + def normalize_eds_content(self, obj: dict, template: dict) -> dict: return template - def normalize_eels_content(self, template: dict) -> dict: + def normalize_eels_content(self, obj: dict, template: dict) -> dict: return template diff --git a/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py new file mode 100644 index 000000000..b4aeaeaa4 --- /dev/null +++ b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py @@ -0,0 +1,68 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +"""Utility functions to interpret data from hyperspy-project-specific representation.""" + +import numpy as np + + +def get_named_axis(axes_metadata, dim_name): + """Return numpy array with tuple (axis pos, unit) along dim_name or None.""" + retval = None + if len(axes_metadata) >= 1: + for axis in axes_metadata: + if isinstance(axis, dict): + if ("name" in axis): + if axis["name"] == dim_name: + reqs = ["index_in_array", "offset", "scale", "size", "units", "navigate"] # "name" + for req in reqs: + if req not in axis: + raise ValueError(f"{req} not in {axis}!") + retval = ( + np.asarray( + axis["offset"] + (np.linspace(0., + axis["size"] - 1., + num=int(axis["size"]), + endpoint=True) + * axis["scale"]), + np.float64), + axis["units"]) + return retval + + +def get_axes_dims(axes_metadata): + """Return list of (axis) name, index_in_array tuple or empty list.""" + retval = [] + if len(axes_metadata) >= 1: + for axis in axes_metadata: + if isinstance(axis, dict): + if ("name" in axis) and ("index_in_array" in axis): + retval.append((axis["name"], axis["index_in_array"])) + # TODO::it seems that hyperspy sorts this by index_in_array + return retval + + +def get_axes_units(axes_metadata): + """Return list of units or empty list.""" + retval = [] + if len(axes_metadata) >= 1: + for axis in axes_metadata: + if isinstance(axis, dict): + if "units" in axis: + retval.append(axis["units"]) + # TODO::it seems that hyperspy sorts this by index_in_array + return retval