Fixed error in floating point value interpretation logic, added writi…

…ng of NXimage_r_set instance based on TIFF content
FAIRmat-NFDI · Dec 11, 2023 · 345d8e1 · 345d8e1
1 parent df72956
commit 345d8e1
Show file tree

Hide file tree

Showing 4 changed files with 114 additions and 41 deletions.
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
@@ -126,8 +126,7 @@ def read(self,
         # sub_parser = "image_tiff"
         subparser = NxEmImagesSubParser(entry_id, file_paths[0])
         subparser.parse(template)
-        exit(1)
-
+
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")
         #    continue
@@ -147,13 +146,15 @@ def read(self,
         # we only need to decorate the template to point to the mandatory ROI overview
         # print("Create NeXus default plottable data...")
         # em_default_plot_generator(template, 1)
-        nxs_plt = NxEmDefaultPlotResolver()
-        # if nxs_mtex is the sub-parser
-        resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
-            entry_id, file_paths[0])
-        # print(f"DEFAULT PLOT IS {resolved_path}")
-        if resolved_path != "":
-            nxs_plt.annotate_default_plot(template, resolved_path)
+
+        if True is False:
+            nxs_plt = NxEmDefaultPlotResolver()
+            # if nxs_mtex is the sub-parser
+            resolved_path = nxs_plt.nxs_mtex_get_nxpath_to_default_plot(
+                entry_id, file_paths[0])
+            # print(f"DEFAULT PLOT IS {resolved_path}")
+            if resolved_path != "":
+                nxs_plt.annotate_default_plot(template, resolved_path)
 
         debugging = True
         if debugging is True:

diff --git a/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py b/pynxtools/dataconverter/readers/em/subparsers/image_tiff_tfs.py
@@ -31,16 +31,18 @@
 
 
 class TfsTiffSubParser(TiffSubParser):
-    def __init__(self, file_path: str = ""):
+    def __init__(self, file_path: str = "", entry_id: int = 1):
         super().__init__(file_path)
+        self.entry_id = entry_id
+        self.event_id = 1
         self.prfx = None
-        self.tmp: Dict = {}
+        self.tmp: Dict = {"data": None,
+                          "meta": {}}
         self.supported_version: Dict = {}
         self.version: Dict = {}
         self.tags: Dict = {}
         self.supported = False
         self.check_if_tiff()
-        self.tfs: Dict = {}
 
     def check_if_tiff_tfs(self):
         """Check if resource behind self.file_path is a TaggedImageFormat file."""
@@ -101,10 +103,11 @@ def get_metadata(self):
                     pos_e = sequence[idx + 1][1]
                 else:
                     pos_e = np.iinfo(np.uint64).max
+                    # TODO::better use official convention to not read beyond the end of file
                 idx += 1
                 if pos_s is None or pos_e is None:
                     raise ValueError(f"Definition of byte boundaries for reading childs of [{parent}] was unsuccessful !")
-                print(f"Search for [{parent}] in between byte offsets {pos_s} and {pos_e}")
+                # print(f"Search for [{parent}] in between byte offsets {pos_s} and {pos_e}")
 
                 # fish metadata of e.g. the system section
                 for term in get_fei_childs(parent):
@@ -113,21 +116,20 @@ def get_metadata(self):
                     if pos < pos_e:  # check if pos_e is None
                         s.seek(pos, 0)
                         value = f"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}"
-                        self.tfs[f"{parent}/{term}"] = None
+                        self.tmp["meta"][f"{parent}/{term}"] = None
                         if isinstance(value, str):
                             if value != "":
-                                if if_str_represents_float(value) is True:
-                                    self.tfs[f"{parent}/{term}"] = np.float64(value)
-                                elif value.isdigit() is True:
-                                    self.tfs[f"{parent}/{term}"] = np.int64(value)
+                                # execution order of the check here matters!
+                                if value.isdigit() is True:
+                                    self.tmp["meta"][f"{parent}/{term}"] = np.int64(value)
+                                elif if_str_represents_float(value) is True:
+                                    self.tmp["meta"][f"{parent}/{term}"] = np.float64(value)
                                 else:
-                                    self.tfs[f"{parent}/{term}"] = value
+                                    self.tmp["meta"][f"{parent}/{term}"] = value
                         else:
-                            print(f"{parent}/{term} ---> {type(value)}")                
+                            raise ValueError(f"Detected an unexpected case {parent}/{term}, type: {type(value)} !")                
                     else:
                         pass
-            for key, val in self.tfs.items():
-                print(f"{key}, {val}")
 
     def parse_and_normalize(self):
         """Perform actual parsing filling cache self.tmp."""
@@ -137,3 +139,84 @@ def parse_and_normalize(self):
         else:
             print(f"{self.file_path} is not a ThermoFisher-specific "
                   f"TIFF file that this parser can process !")
+
+    def process_into_template(self, template: dict) -> dict:
+        self.process_event_data_em_metadata(template)
+        self.process_event_data_em_data(template)
+        return template
+
+    def process_event_data_em_metadata(self, template: dict) -> dict:
+        """Add respective event_data_em header."""
+        # contextualization to understand how the image relates to the EM session
+        print(f"Mapping some of the TFS/FEI metadata concepts onto NeXus concepts")
+        return template
+
+    def process_event_data_em_data(self, template: dict) -> dict:
+        """Add respective heavy image data."""
+        # default display of the image(s) representing the data collected in this event
+        print(f"Writing TFS/FEI TIFF image as a onto the respective NeXus concept")
+        # read image in-place
+        with Image.open(self.file_path, mode="r") as fp:
+            nparr = np.array(fp)
+            # print(f"type: {type(nparr)}, dtype: {nparr.dtype}, shape: {np.shape(nparr)}")
+            # TODO::discussion points
+            # - how do you know we have an image of real space vs. imaginary space (from the metadata?)
+            # - how do deal with the (ugly) scale bar that is typically stamped into the TIFF image content?
+            # with H5Web and NeXus most of this is obsolete unless there are metadata stamped which are not
+            # available in NeXus or in the respective metadata in the metadata section of the TIFF image
+            # remember H5Web images can be scaled based on the metadata allowing basically the same
+            # explorative viewing using H5Web than what traditionally typical image viewers are meant for
+            image_identifier = 1
+            trg = f"/ENTRY[entry{self.entry_id}]/measurement/EVENT_DATA_EM_SET[event_data_em_set]/" \
+                    f"EVENT_DATA_EM[event_data_em{self.event_id}]/" \
+                    f"IMAGE_R_SET[image_r_set{image_identifier}]/DATA[image]"
+            # TODO::writer should decorate automatically!
+            template[f"{trg}/title"] = f"Image"
+            template[f"{trg}/@NX_class"] = f"NXdata"  # TODO::writer should decorate automatically!
+            template[f"{trg}/@signal"] = "intensity"
+            dims = ["x", "y"]
+            idx = 0
+            for dim in dims:
+                template[f"{trg}/@AXISNAME_indices[axis_{dim}_indices]"] = np.uint32(idx)
+                idx += 1
+            template[f"{trg}/@axes"] = []
+            for dim in dims[::-1]:
+                template[f"{trg}/@axes"].append(f"axis_{dim}")
+            template[f"{trg}/intensity"] = {"compress": np.array(fp), "strength": 1}
+            #  0 is y while 1 is x for 2d, 0 is z, 1 is y, while 2 is x for 3d
+            template[f"{trg}/intensity/@long_name"] = f"Signal"         
+
+            sxy = {"x": self.tmp["meta"]["EScan/PixelWidth"],
+                   "y": self.tmp["meta"]["EScan/PixelHeight"]}
+            shp = np.shape(np.array(fp))
+            nxy = {"x": shp[1], "y": shp[0]}
+            scan_unit = {"x": "m", "y": "m"}  # assuming FEI reports SI units
+            # TODO::be careful we assume here a very specific coordinate system
+            # however the TIFF file gives no clue, TIFF just documents in which order
+            # it arranges a bunch of pixels that have stream in into a n-d tiling
+            # e.g. a 2D image
+            # also we have to be careful because TFS just gives us here
+            # typical case of an image without an information without its location
+            # on the physical sample surface, therefore we can only scale
+            # pixel_identifier by physical scaling quantities s_x, s_y
+            # also the dimensions of the image are on us to fish with the image
+            # reading library instead of TFS for consistency checks adding these
+            # to the metadata the reason is that TFS TIFF use the TIFF tagging mechanism
+            # and there is already a proper TIFF tag for the width and height of an
+            # image in number of pixel
+            for dim in dims:
+                template[f"{trg}/AXISNAME[axis_{dim}]"] \
+                    = {"compress": np.asarray(np.linspace(0,
+                                                            nxy[dim] - 1,
+                                                            num=nxy[dim],
+                                                            endpoint=True) * sxy[dim], np.float64), "strength": 1}
+                template[f"{trg}/AXISNAME[axis_{dim}]/@long_name"] \
+                    = f"Coordinate along {dim}-axis ({scan_unit[dim]})"
+                template[f"{trg}/AXISNAME[axis_{dim}]/@units"] = f"{scan_unit[dim]}"
+        return template
+
+    def process_event_data_em_state(self, template: dict) -> dict:
+        """Add em-state as they were during the event_data_em event."""
+        # state of the microscope not repeating static/long-valid microscope metadata
+        print(f"Writing TFS/FEI event_data_em state")
+        return template
diff --git a/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py b/pynxtools/dataconverter/readers/em/subparsers/nxs_imgs.py
@@ -19,8 +19,10 @@
 
 import numpy as np
 # from typing import Dict, Any, List
+from PIL import Image
 
 from pynxtools.dataconverter.readers.em.subparsers.image_tiff_tfs import TfsTiffSubParser
+from pynxtools.dataconverter.readers.em.utils.hfive_web_utils import hfive_web_decorate_nxdata
 
 
 class NxEmImagesSubParser:
@@ -53,22 +55,10 @@ def parse(self, template: dict) -> dict:
         # and its interaction with tech-partner-specific hfive_* subparsers
 
         if image_parser_type == "tiff_tfs":
-            tiff = TfsTiffSubParser(self.file_path)
+            tiff = TfsTiffSubParser(self.file_path, self.entry_id)
             tiff.parse_and_normalize()
-            self.process_into_template(tiff.tmp, template)
-        else:  # none or something unsupported
-            return template
-        return template
-
-    def process_into_template(self, inp: dict, template: dict) -> dict:
-        debugging = False
-        if debugging is True:
-            for key, val in inp.items():
-                if isinstance(val, dict):
-                    for ckey, cval in val.items():
-                        print(f"{ckey}, {cval}")
-                else:
-                    print(f"{key}, {val}")
-        # TODO:: implement actual mapping on template
-        # self.process_roi_overview(inp, template)
+            tiff.process_into_template(template)
+        # else:
+            # TODO::add here specific content parsers for other tech partner
+            # or other custom parsing of images
         return template
diff --git a/pynxtools/dataconverter/readers/em/utils/image_utils.py b/pynxtools/dataconverter/readers/em/utils/image_utils.py
@@ -34,7 +34,6 @@ def sort_ascendingly_by_second_argument(tup):
 
 def if_str_represents_float(s):
     try:
-        float(s)
-        return str(float(s)) == s
+        return isinstance(float(s), float)
     except ValueError:
         return False