Skip to content

Commit

Permalink
Added first version of a imaging mode case distinction logic, tested …
Browse files Browse the repository at this point in the history
…imgs, adf, and ceta imaging modes. NeXus files were generated successfully but weird h5web display error coming up within ipynb, removechild tested if spaces in filenames cause this but no, in hdfviewer file shows without any issues
  • Loading branch information
atomprobe-tc committed Jan 17, 2024
1 parent 4a9de81 commit 3892c70
Show file tree
Hide file tree
Showing 4 changed files with 282 additions and 19 deletions.
9 changes: 6 additions & 3 deletions debug/spctrscpy.batch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/"
datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/ikz/"

datasource="../../../../../paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/fhi/"

# apex examples ikz, pdi
# examples="ikz/VInP_108_L2.h5 ikz/GeSn_13.h5 pynx/46_ES-LP_L1_brg.bcf pynx/1613_Si_HAADF_610_kx.emd pynx/EELS_map_2_ROI_1_location_4.dm3 pynx/H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina"
Expand All @@ -11,8 +11,11 @@ examples="AlGaO.nxs"
examples="GeSi.nxs"
examples="GeSn_13.nxs"
# examples="VInP_108_L2.h5"
examples="CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd"
examples="CG71113 1138 Ceta 660 mm Camera.emd"
examples="CG71113 1125 Ceta 1.1 Mx Camera.emd"

for example in $examples; do
for example in "$examples"; do
echo $example
dataconverter --reader em --nxdl NXroot --input-file $datasource$example --output debug.$example.nxs 1>stdout.$example.nxs.txt 2>stderr.$example.nxs.txt
dataconverter --reader em --nxdl NXroot --input-file "$datasource$example" --output "debug.$example.nxs" 1>"stdout.$example.nxs.txt" 2>"stderr.$example.nxs.txt"
done
12 changes: 7 additions & 5 deletions pynxtools/dataconverter/readers/em/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
# from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
# from pynxtools.dataconverter.readers.em.subparsers.nxs_nion import NxEmZippedNionProjectSubParser
# from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser
from pynxtools.dataconverter.readers.em.subparsers.rsciio_velox import RsciioVeloxSubParser
from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
# from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper

Expand Down Expand Up @@ -121,21 +121,23 @@ def read(self,
# add further with resolving cases
# if file_path is an HDF5 will use hfive parser
# sub_parser = "nxs_pyxem"
subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
subparser.parse(template)
# subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
# subparser.parse(template)
# TODO::check correct loop through!

# sub_parser = "image_tiff"
# subparser = NxEmImagesSubParser(entry_id, file_paths[0])
# subparser.parse(template)
# TODO::check correct loop through!

# sub_parser = "zipped_nion_project"
# subparser = NxEmZippedNionProjectSubParser(entry_id, file_paths[0])
# subparser.parse(template, verbose=True)
# TODO::check correct loop through!

# sub_parser = "velox_emd"
# subparser = RsciioVeloxSubParser(entry_id, file_paths[0])
# subparser.parse(template, verbose=True)
subparser = RsciioVeloxSubParser(entry_id, file_paths[0])
subparser.parse(template, verbose=True)

# for dat_instance in case.dat_parser_type:
# print(f"Process pieces of information in {dat_instance} tech partner file...")
Expand Down
212 changes: 201 additions & 11 deletions pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,17 @@
#
"""(Sub-)parser for reading content from ThermoFisher Velox *.emd (HDF5) via rosettasciio."""

import flatdict as fd
import numpy as np

from typing import Dict, List
from rsciio import emd

from pynxtools.dataconverter.readers.em.subparsers.rsciio_base import RsciioBaseParser
from pynxtools.dataconverter.readers.em.utils.rsciio_hyperspy_utils \
import get_named_axis, get_axes_dims, get_axes_units
from pynxtools.dataconverter.readers.shared.shared_utils \
import get_sha256_of_file_content


class RsciioVeloxSubParser(RsciioBaseParser):
Expand All @@ -31,8 +38,11 @@ def __init__(self, entry_id: int = 1, file_path: str = ""):
self.entry_id = entry_id
else:
self.entry_id = 1
self.id_mgn: Dict = {}
self.prfx = None
self.id_mgn: Dict = {"event": 1,
"event_img": 1,
"event_spc": 1,
"roi": 1}
self.file_path_sha256 = None
self.tmp: Dict = {}
self.supported_version: Dict = {}
self.version: Dict = {}
Expand All @@ -47,11 +57,15 @@ def check_if_supported(self):
# only the collection of the concepts without the actual instance data
# based on this one could then plan how much memory has to be reserved
# in the template and stream out accordingly
with open(self.file_path, "rb", 0) as fp:
self.file_path_sha256 = get_sha256_of_file_content(fp)

print(f"Parsing {self.file_path} with SHA256 {self.file_path_sha256} ...")
self.supported = True
except IOError:
print(f"Loading {self.file_path} using {self.__name__} is not supported !")

def parse_and_normalize_and_process_into_template(self, template: dict) -> dict:
def parse(self, template: dict, verbose=False) -> dict:
"""Perform actual parsing filling cache self.tmp."""
if self.supported is True:
self.tech_partner_to_nexus_normalization(template)
Expand All @@ -62,20 +76,196 @@ def parse_and_normalize_and_process_into_template(self, template: dict) -> dict:

def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
"""Translate tech partner concepts to NeXus concepts."""
self.normalize_bfdf_content(template) # conventional bright/dark field
self.normalize_adf_content(template) # (high-angle) annular dark field
self.normalize_edxs_content(template) # EDS in the TEM
self.normalize_eels_content(template) # electron energy loss spectroscopy
reqs = ["data", "axes", "metadata", "original_metadata", "mapping"]
for idx, obj in enumerate(self.objs):
if not isinstance(obj, dict):
continue
parse = True
for req in reqs:
if req not in obj:
parse = False
if parse == False:
continue

content_type = self.content_resolver(obj)
print(f"Parsing {idx}-th object in {self.file_path} content type is {content_type}")
if content_type == "imgs":
self.normalize_imgs_content(obj, template) # generic imaging modes
# TODO:: could later make an own one for bright/dark field, but
# currently no distinction in hyperspy
elif content_type == "adf":
self.normalize_adf_content(obj, template) # (high-angle) annular dark field
elif content_type == "diff": # diffraction image in reciprocal space
self.normalize_diff_content(obj, template) # diffraction images
elif content_type == "eds":
self.normalize_eds_content(obj,template) # ED(X)S in the TEM
elif content_type == "eels":
self.normalize_eels_content(obj, template) # electron energy loss spectroscopy
else: # == "n/a"
print(f"WARNING::Unable to resolve content of {idx}-th object in {self.file_path}!")
return template

def normalize_bfdf_content(self, template: dict) -> dict:
def content_resolver(self, obj: dict) -> str:
"""Try to identify which content the obj describes best."""
# assume rosettasciio-specific formatting of the emd parser
# i.e. a dictionary with the following keys:
# "data", "axes", "metadata", "original_metadata", "mapping"
meta = fd.FlatDict(obj["metadata"], "/")
orgmeta = fd.FlatDict(obj["original_metadata"], "/")
dims = get_axes_dims(obj["axes"])
units = get_axes_units(obj["axes"])
if "General/title" not in meta.keys():
return "n/a"
if (meta["General/title"] in ("BF")) or (meta["General/title"].startswith("DF")):
# TODO::the problem with using here the explicit name DF4 is that this may only
# work for a particular microscope:
# Core/MetadataDefinitionVersion: 7.9, Core/MetadataSchemaVersion: v1/2013/07
# Instrument/ControlSoftwareVersion: 1.15.4, Instrument/Manufacturer: FEI Company
# Instrument/InstrumentId: 6338, Instrument/InstrumentModel: Talos F200X
# instead there should be a logic added which resolves which concept
# the data in this obj are best described by when asking a community-wide
# glossary but not the FEI-specific glossary
# all that logic is unneeded and thereby the data more interoperable
# if FEI would harmonize their obvious company metadata standard with the
# electron microscopy community!
return "imgs"
if meta["General/title"] in ("HAADF"):
return "adf"
# all units indicating we are in real or complex i.e. reciprocal space
vote_r_c = [0, 0] # real space, complex space
for unit in units:
if unit.startswith("1 /"):
vote_r_c[1] += 1
else:
vote_r_c[0] += 1
if vote_r_c[0] == len(units) and vote_r_c[1] == 0:
return "imgs"
if vote_r_c[0] == 0 and vote_r_c[1] == len(units):
return "diff"
del vote_r_c
return "n/a"

def normalize_imgs_content(self, obj: dict, template: dict) -> dict:
"""Map generic scanned images (e.g. BF/DF) to NeXus."""
meta = fd.FlatDict(obj["metadata"], "/")
dims = get_axes_dims(obj["axes"])
trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]"
template[f"{trg}/PROCESS[process]/source/type"] = "file"
template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"]
template[f"{trg}/image_twod/@NX_class"] = "NXdata" # TODO::writer should do!
template[f"{trg}/image_twod/@signal"] = "intensity"
template[f"{trg}/image_twod/@axes"] = []
for dim in dims:
template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}")
template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}]"] \
= np.uint32(dim[1])
support, unit = get_named_axis(obj["axes"], dim[0])
if support is not None and unit is not None:
template[f"{trg}/image_twod/axis_{dim[0]}"] \
= {"compress": support, "strength": 1}
template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] \
= f"{dim[0]}-axis position ({unit})"
template[f"{trg}/image_twod/title"] = meta["General/title"]
template[f"{trg}/image_twod/intensity"] \
= {"compress": np.asarray(obj["data"]), "strength": 1}
# template[f"{trg}/image_twod/intensity/@units"]
# TODO::add metadata
self.id_mgn["event_img"] += 1
self.id_mgn["event"] += 1
return template

def normalize_adf_content(self, template: dict) -> dict:
def normalize_adf_content(self, obj: dict, template: dict) -> dict:
"""Map relevant (high-angle) annular dark field images to NeXus."""
meta = fd.FlatDict(obj["metadata"], "/")
dims = get_axes_dims(obj["axes"])
trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]"
template[f"{trg}/PROCESS[process]/source/type"] = "file"
template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"]
template[f"{trg}/image_twod/@NX_class"] = "NXdata" # TODO::writer should do!
template[f"{trg}/image_twod/@signal"] = "intensity"
template[f"{trg}/image_twod/@axes"] = []
for dim in dims:
template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}")
template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}]"] \
= np.uint32(dim[1])
support, unit = get_named_axis(obj["axes"], dim[0])
if support is not None and unit is not None:
template[f"{trg}/image_twod/axis_{dim[0]}"] \
= {"compress": support, "strength": 1}
template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] \
= f"{dim[0]}-axis position ({unit})"
template[f"{trg}/image_twod/title"] = meta["General/title"]
template[f"{trg}/image_twod/intensity"] \
= {"compress": np.asarray(obj["data"]), "strength": 1}
# template[f"{trg}/image_twod/intensity/@units"]
# TODO::coll. angles given in original_metadata map to half_angle_interval
# TODO::add metadata
self.id_mgn["event_img"] += 1
self.id_mgn["event"] += 1
return template

def normalize_diff_content(self, obj: dict, template: dict) -> dict:
"""Map relevant diffraction images to NeXus."""
# TODO::the above-mentioned constraint is not general enough
# this can work only for cases where we know that we not only have a
# Ceta camera but also use it for taking diffraction pattern
# TODO::this is an example that more logic is needed to identify whether
# the information inside obj really has a similarity with the concept of
# somebody having taken a diffraction image
# one can compare the situation with the following:
# assume you wish to take pictures of apples and have an NXapple_picture
# but all you get is an image from a digital camera where the dataset is
# named maybe DCIM, without a logic one cannot make the mapping robustly!
# can one map y, x, on j, i indices
idx_map = {"y": "j", "x": "i"}
dims = get_axes_dims(obj["axes"])
print(dims)
for dim in dims:
if dim[0] not in idx_map.keys():
raise ValueError(f"Unable to map index {dim[0]} on something!")

trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
f"IMAGE_C_SET[image_c_set{self.id_mgn['event_img']}]"
template[f"{trg}/PROCESS[process]/source/type"] = "file"
template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
template[f"{trg}/PROCESS[process]/detector_identifier"] = meta["General/title"]
template[f"{trg}/image_twod/@NX_class"] = "NXdata" # TODO::writer should do!
template[f"{trg}/image_twod/@signal"] = "magnitude"
template[f"{trg}/image_twod/@axes"] = []
for dim in dims:
template[f"{trg}/image_twod/@axes"].append(f"axis_{idx_map[dim[0]]}")
template[f"{trg}/image_twod/@AXISNAME_indices[axis_{idx_map[dim[0]]}]"] \
= np.uint32(dim[1])
support, unit = get_named_axis(obj["axes"], dim[0])
if support is not None and unit is not None and unit.startswith("1 /"):
template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}"] \
= {"compress": support, "strength": 1}
template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}/@long_name"] \
= f"{idx_map[dim[0]]}-axis position ({unit})"
template[f"{trg}/image_twod/title"] = meta["General/title"]
template[f"{trg}/image_twod/magnitude"] \
= {"compress": np.asarray(obj["data"]), "strength": 1}
# template[f"{trg}/image_twod/magnitude/@units"]
# TODO::add metadata
self.id_mgn["event_img"] += 1
self.id_mgn["event"] += 1
return template

def normalize_edxs_content(self, template: dict) -> dict:
def normalize_eds_content(self, obj: dict, template: dict) -> dict:
return template

def normalize_eels_content(self, template: dict) -> dict:
def normalize_eels_content(self, obj: dict, template: dict) -> dict:
return template
68 changes: 68 additions & 0 deletions pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Utility functions to interpret data from hyperspy-project-specific representation."""

import numpy as np


def get_named_axis(axes_metadata, dim_name):
"""Return numpy array with tuple (axis pos, unit) along dim_name or None."""
retval = None
if len(axes_metadata) >= 1:
for axis in axes_metadata:
if isinstance(axis, dict):
if ("name" in axis):
if axis["name"] == dim_name:
reqs = ["index_in_array", "offset", "scale", "size", "units", "navigate"] # "name"
for req in reqs:
if req not in axis:
raise ValueError(f"{req} not in {axis}!")
retval = (
np.asarray(
axis["offset"] + (np.linspace(0.,
axis["size"] - 1.,
num=int(axis["size"]),
endpoint=True)
* axis["scale"]),
np.float64),
axis["units"])
return retval


def get_axes_dims(axes_metadata):
"""Return list of (axis) name, index_in_array tuple or empty list."""
retval = []
if len(axes_metadata) >= 1:
for axis in axes_metadata:
if isinstance(axis, dict):
if ("name" in axis) and ("index_in_array" in axis):
retval.append((axis["name"], axis["index_in_array"]))
# TODO::it seems that hyperspy sorts this by index_in_array
return retval


def get_axes_units(axes_metadata):
"""Return list of units or empty list."""
retval = []
if len(axes_metadata) >= 1:
for axis in axes_metadata:
if isinstance(axis, dict):
if "units" in axis:
retval.append(axis["units"])
# TODO::it seems that hyperspy sorts this by index_in_array
return retval

0 comments on commit 3892c70

Please sign in to comment.