Added eds_map tested FHI Rohner emd file

FAIRmat-NFDI · Jan 17, 2024 · 020225d · 020225d
1 parent 3892c70
commit 020225d
Show file tree

Hide file tree

Showing 4 changed files with 133 additions and 63 deletions.
diff --git a/debug/spctrscpy.batch.sh b/debug/spctrscpy.batch.sh
@@ -14,6 +14,7 @@ examples="GeSn_13.nxs"
 examples="CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd"
 examples="CG71113 1138 Ceta 660 mm Camera.emd"
 examples="CG71113 1125 Ceta 1.1 Mx Camera.emd"
+examples="CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd"
 
 for example in "$examples"; do
 	echo $example

diff --git a/debug/spctrscpy.dev.ipynb b/debug/spctrscpy.dev.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
    "metadata": {},
    "outputs": [],
@@ -13,23 +13,17 @@
     "import h5py\n",
     "from matplotlib import pyplot as plt\n",
     "import xraydb\n",
-    "from ase.data import chemical_symbols"
+    "import flatdict as fd\n",
+    "from ase.data import chemical_symbols\n",
+    "print(chemical_symbols[1::])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy/pdi/InGaN_nanowires_spectra.edaxh5\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "src = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/development_spctrscpy\"\n",
     "fnms = [(\"pdi\", \"InGaN_nanowires_spectra.edaxh5\"),\n",
@@ -38,19 +32,22 @@
     "        (\"ikz\", \"GeSn_13.nxs\"),\n",
     "        (\"ikz\", \"VInP_108_L2.h5\"),\n",
     "        (\"fhi\", \"CG71113 1513 HAADF-DF4-DF2-BF 1.2 Mx STEM.emd\"),\n",
+    "        (\"fhi\", \"CG71113 1138 Ceta 660 mm Camera.emd\"),\n",
+    "        (\"fhi\", \"CG71113 1125 Ceta 1.1 Mx Camera.emd\"),\n",
+    "        (\"fhi\", \"CG71113 1412 EDS-HAADF-DF4-DF2-BF 4.8 Mx SI.emd\"),\n",
     "        (\"adrien\", \"1613_Si_HAADF_610_kx.emd\"),\n",
     "        (\"adrien\", \"46_ES-LP_L1_brg.bcf\"),\n",
     "        (\"benedikt\", \"EELS_map_2_ROI_1_location_4.dm3\"),\n",
     "        (\"phillippe\", \"H5OINA_examples_Specimen_1_Map_EDS_+_EBSD_Map_Data_2.h5oina\")]\n",
     "# pyUSID, HSMA\n",
-    "case = 0  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
+    "case = 8  # 5 # len(fnms) - 1  # len(fnms) - 1\n",
     "fnm = f\"{src}/{fnms[case][0]}/{fnms[case][1]}\"\n",
     "print(fnm)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "a4d9ef96-3c70-4c12-80ba-ea4a7d716d47",
    "metadata": {},
    "outputs": [],
@@ -60,44 +57,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "221abf67-0d88-4088-9cc7-e0d9b85c4699",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/x-hdf5": "/home/kaiobach/Research/hu_hu_hu/sprint17/pynx/pynxtools/debug/spctrscpy/debug.InGaN_nanowires_spectra.edaxh5.nxs",
-      "text/plain": [
-       "<jupyterlab_h5web.widget.H5Web object>"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
    "source": [
     "# resulting NeXus artifact\n",
-    "H5Web(f\"spctrscpy/debug.{fnms[case][1]}.nxs\")"
+    "# tmp = \"debug.CG71113 1138 Ceta 660 mm Camera.emd.nxs\"\n",
+    "# print(tmp)\n",
+    "# H5Web(tmp)\n",
+    "H5Web(f\"debug.{fnms[case][1]}.nxs\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "id": "75b32c8f-8efa-4b40-bfc8-6f95300902ea",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "objs = emd.file_reader(fnm)\n",
     "print(len(objs))\n",
-    "for obj in objs:\n",
+    "for idx, obj in enumerate(objs):\n",
     "    if not isinstance(obj, dict):\n",
     "        raise ValueError(\"No dict!\")\n",
     "    print(obj.keys())\n",
-    "    for key, val in obj.items():\n",
-    "        print(f\"{key}, {np.shape(val)}\")\n",
-    "    print(obj[\"metadata\"])\n",
-    "    # print(obj[\"original_metadata\"])\n",
+    "    meta = fd.FlatDict(obj[\"metadata\"], \"/\")\n",
+    "    if meta[\"General/title\"] == \"Te\":\n",
+    "        for key, val in obj.items():\n",
+    "            print(f\"{idx}, {key}, {np.shape(val)}\")\n",
+    "        print(f\"\\n\\n{obj['axes']}\")\n",
+    "        print(\"\\n\\n\")\n",
+    "        for key, val in meta.items():\n",
+    "            print(f\"{key}: {val}\")\n",
+    "        orgmeta = fd.FlatDict(obj[\"original_metadata\"], \"/\")\n",
+    "        print(\"\\n\\n\")\n",
+    "        for key, val in orgmeta.items():\n",
+    "            print(f\"{key}: {val}\")\n",
     "# print(f\"{type(objs[0])}\")\n",
     "# print(objs[0].keys())"
    ]

diff --git a/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py b/pynxtools/dataconverter/readers/em/subparsers/rsciio_velox.py
@@ -22,13 +22,17 @@
 
 from typing import Dict, List
 from rsciio import emd
+from ase.data import chemical_symbols
 
 from pynxtools.dataconverter.readers.em.subparsers.rsciio_base import RsciioBaseParser
 from pynxtools.dataconverter.readers.em.utils.rsciio_hyperspy_utils \
     import get_named_axis, get_axes_dims, get_axes_units
 from pynxtools.dataconverter.readers.shared.shared_utils \
     import get_sha256_of_file_content
 
+REAL_SPACE = 0
+COMPLEX_SPACE = 1
+
 
 class RsciioVeloxSubParser(RsciioBaseParser):
     """Read Velox EMD File Format emd."""
@@ -41,7 +45,8 @@ def __init__(self, entry_id: int = 1, file_path: str = ""):
         self.id_mgn: Dict = {"event": 1,
                              "event_img": 1,
                              "event_spc": 1,
-                             "roi": 1}
+                             "roi": 1,
+                             "eds_img": 1}
         self.file_path_sha256 = None
         self.tmp: Dict = {}
         self.supported_version: Dict = {}
@@ -84,11 +89,12 @@ def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
             for req in reqs:
                 if req not in obj:
                     parse = False
-            if parse == False:
+            if parse is False:
                 continue
 
             content_type = self.content_resolver(obj)
             print(f"Parsing {idx}-th object in {self.file_path} content type is {content_type}")
+            print(f"dims: {obj['axes']}")
             if content_type == "imgs":
                 self.normalize_imgs_content(obj, template)  # generic imaging modes
                 # TODO:: could later make an own one for bright/dark field, but
@@ -97,8 +103,8 @@ def tech_partner_to_nexus_normalization(self, template: dict) -> dict:
                 self.normalize_adf_content(obj, template)  # (high-angle) annular dark field
             elif content_type == "diff":  # diffraction image in reciprocal space
                 self.normalize_diff_content(obj, template)  # diffraction images
-            elif content_type == "eds":
-                self.normalize_eds_content(obj,template)  # ED(X)S in the TEM
+            elif content_type == "eds_map":
+                self.normalize_eds_map_content(obj, template)  # ED(X)S in the TEM
             elif content_type == "eels":
                 self.normalize_eels_content(obj, template)  # electron energy loss spectroscopy
             else:  # == "n/a"
@@ -111,12 +117,17 @@ def content_resolver(self, obj: dict) -> str:
         # i.e. a dictionary with the following keys:
         # "data", "axes", "metadata", "original_metadata", "mapping"
         meta = fd.FlatDict(obj["metadata"], "/")
-        orgmeta = fd.FlatDict(obj["original_metadata"], "/")
+        # orgmeta = fd.FlatDict(obj["original_metadata"], "/")
         dims = get_axes_dims(obj["axes"])
         units = get_axes_units(obj["axes"])
+
         if "General/title" not in meta.keys():
             return "n/a"
+
         if (meta["General/title"] in ("BF")) or (meta["General/title"].startswith("DF")):
+            uniq = set()
+            for dim in dims:
+                uniq.add(dim[0])
             # TODO::the problem with using here the explicit name DF4 is that this may only
             # work for a particular microscope:
             # Core/MetadataDefinitionVersion: 7.9, Core/MetadataSchemaVersion: v1/2013/07
@@ -128,27 +139,45 @@ def content_resolver(self, obj: dict) -> str:
             # all that logic is unneeded and thereby the data more interoperable
             # if FEI would harmonize their obvious company metadata standard with the
             # electron microscopy community!
-            return "imgs"
+            if sorted(uniq) == ["x", "y"]:
+                return "imgs"
+
         if meta["General/title"] in ("HAADF"):
             return "adf"
+
         # all units indicating we are in real or complex i.e. reciprocal space
+        if meta["General/title"] in ("EDS"):
+            return "eds_spc"
+
+        for symbol in chemical_symbols[1::]:  # an eds_map
+            # TODO::does rosettasciio via hyperspy identify the symbol or is the
+            # title by default already in Velox set (by default) to the chemical symbol?
+            if meta["General/title"] != symbol:
+                continue
+            return "eds_map"
+
         vote_r_c = [0, 0]  # real space, complex space
         for unit in units:
-            if unit.startswith("1 /"):
-                vote_r_c[1] += 1
-            else:
-                vote_r_c[0] += 1
-        if vote_r_c[0] == len(units) and vote_r_c[1] == 0:
+            if unit.lower().replace(" ", "") \
+                    in ["m", "cm", "mm", "µm", "nm", "pm"]:
+                vote_r_c[REAL_SPACE] += 1
+            if unit.lower().replace(" ", "") \
+                    in ["1/m", "1/cm", "1/mm", "1/µm", "1/nm", "1/pm"]:
+                vote_r_c[COMPLEX_SPACE] += 1
+
+        if (vote_r_c[0] == len(units)) and (vote_r_c[1] == 0):
             return "imgs"
-        if vote_r_c[0] == 0 and vote_r_c[1] == len(units):
+        if (vote_r_c[0] == 0) and (vote_r_c[1] == len(units)):
             return "diff"
-        del vote_r_c
+
         return "n/a"
 
     def normalize_imgs_content(self, obj: dict, template: dict) -> dict:
         """Map generic scanned images (e.g. BF/DF) to NeXus."""
         meta = fd.FlatDict(obj["metadata"], "/")
         dims = get_axes_dims(obj["axes"])
+        if len(dims) != 2:
+            raise ValueError(f"{obj['axes']}")
         trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
               f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
               f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]"
@@ -183,6 +212,8 @@ def normalize_adf_content(self, obj: dict, template: dict) -> dict:
         """Map relevant (high-angle) annular dark field images to NeXus."""
         meta = fd.FlatDict(obj["metadata"], "/")
         dims = get_axes_dims(obj["axes"])
+        if len(dims) != 2:
+            raise ValueError(f"{obj['axes']}")
         trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
               f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
               f"IMAGE_R_SET[image_r_set{self.id_mgn['event_img']}]"
@@ -228,15 +259,17 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
         # named maybe DCIM, without a logic one cannot make the mapping robustly!
         # can one map y, x, on j, i indices
         idx_map = {"y": "j", "x": "i"}
+        meta = fd.FlatDict(obj["metadata"], "/")
         dims = get_axes_dims(obj["axes"])
-        print(dims)
+        if len(dims) != 2:
+            raise ValueError(f"{obj['axes']}")
         for dim in dims:
             if dim[0] not in idx_map.keys():
                 raise ValueError(f"Unable to map index {dim[0]} on something!")
 
         trg = f"/ENTRY[entry{self.entry_id}]/measurement/event_data_em_set/" \
-                f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
-                f"IMAGE_C_SET[image_c_set{self.id_mgn['event_img']}]"
+              f"EVENT_DATA_EM[event_data_em{self.id_mgn['event']}]/" \
+              f"IMAGE_C_SET[image_c_set{self.id_mgn['event_img']}]"
         template[f"{trg}/PROCESS[process]/source/type"] = "file"
         template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
         template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
@@ -250,7 +283,7 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
             template[f"{trg}/image_twod/@AXISNAME_indices[axis_{idx_map[dim[0]]}]"] \
                 = np.uint32(dim[1])
             support, unit = get_named_axis(obj["axes"], dim[0])
-            if support is not None and unit is not None and unit.startswith("1 /"):
+            if support is not None and unit is not None:
                 template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}"] \
                     = {"compress": support, "strength": 1}
                 template[f"{trg}/image_twod/axis_{idx_map[dim[0]]}/@long_name"] \
@@ -264,7 +297,45 @@ def normalize_diff_content(self, obj: dict, template: dict) -> dict:
         self.id_mgn["event"] += 1
         return template
 
-    def normalize_eds_content(self, obj: dict, template: dict) -> dict:
+    def normalize_eds_map_content(self, obj: dict, template: dict) -> dict:
+        """Map relevant EDS map to NeXus."""
+        meta = fd.FlatDict(obj["metadata"], "/")
+        dims = get_axes_dims(obj["axes"])
+        if len(dims) != 2:
+            raise ValueError(f"{obj['axes']}")
+        trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/indexing"
+        template[f"{trg}/source"] = meta["General/title"]
+        trg = f"/ENTRY[entry{self.entry_id}]/ROI[roi{self.id_mgn['roi']}]/eds/indexing/" \
+              f"IMAGE_R_SET[image_r_set{self.id_mgn['eds_img']}]"
+        template[f"{trg}/PROCESS[process]/source/type"] = "file"
+        template[f"{trg}/PROCESS[process]/source/path"] = self.file_path
+        template[f"{trg}/PROCESS[process]/source/checksum"] = self.file_path_sha256
+        template[f"{trg}/PROCESS[process]/source/algorithm"] = "SHA256"
+        template[f"{trg}/PROCESS[process]/detector_identifier"] \
+            = f"Check carefully how rsciio/hyperspy knows this {meta['General/title']}!"
+        # template[f"{trg}/description"] = ""
+        # template[f"{trg}/energy_range"] = (0., 0.)
+        # template[f"{trg}/energy_range/@units"] = "keV"
+        # template[f"{trg}/iupac_line_candidates"] = ""
+        template[f"{trg}/image_twod/@NX_class"] = "NXdata"  # TODO::should be autodecorated
+        template[f"{trg}/image_twod/@signal"] = "intensity"
+        template[f"{trg}/image_twod/@axes"] = []
+        for dim in dims:
+            template[f"{trg}/image_twod/@axes"].append(f"axis_{dim[0]}")
+            template[f"{trg}/image_twod/@AXISNAME_indices[axis_{dim[0]}_indices]"] \
+                = np.uint32(dim[1])
+            support, unit = get_named_axis(obj["axes"], dim[0])
+            if support is not None and unit is not None:
+                template[f"{trg}/image_twod/AXISNAME[axis_{dim[0]}]"] \
+                    = {"compress": support, "strength": 1}
+                template[f"{trg}/image_twod/axis_{dim[0]}/@long_name"] \
+                    = f"{dim[0]}-axis position ({unit})"
+        template[f"{trg}/title"] = f"EDS map {meta['General/title']}"
+        template[f"{trg}/image_twod/intensity"] \
+            = {"compress": np.asarray(obj["data"]), "strength": 1}
+        # template[f"{trg}/image_twod/intensity/@long_name"] = f"Signal"
+        self.id_mgn["eds_img"] += 1
+        self.id_mgn["roi"] += 1  # TODO not necessarily has to be incremented!
         return template
 
     def normalize_eels_content(self, obj: dict, template: dict) -> dict:

diff --git a/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py b/pynxtools/dataconverter/readers/em/utils/rsciio_hyperspy_utils.py
@@ -32,15 +32,13 @@ def get_named_axis(axes_metadata, dim_name):
                         for req in reqs:
                             if req not in axis:
                                 raise ValueError(f"{req} not in {axis}!")
-                        retval = (
-                            np.asarray(
-                                axis["offset"] + (np.linspace(0.,
-                                                              axis["size"] - 1.,
-                                                              num=int(axis["size"]),
-                                                              endpoint=True)
-                                                  * axis["scale"]),
-                                       np.float64),
-                                  axis["units"])
+                        retval = (np.asarray(axis["offset"]
+                                             + (np.linspace(0.,
+                                                            axis["size"] - 1.,
+                                                            num=int(axis["size"]),
+                                                            endpoint=True)
+                                             * axis["scale"]),
+                                             np.float64), axis["units"])
     return retval