Decode tech partner magic

FAIRmat-NFDI · Dec 8, 2023 · 00f4556 · 00f4556
1 parent f949e3b
commit 00f4556
Show file tree

Hide file tree

Showing 14 changed files with 479 additions and 34 deletions.
diff --git a/imgs.batch.bash → imgs.batch.sh b/imgs.batch.bash → imgs.batch.sh
diff --git a/imgs.dev.ipynb b/imgs.dev.ipynb
@@ -0,0 +1,236 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3889c47f-11c4-4bf3-97de-04fc52f0798d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/ALN_baoh_021.tif\"\n",
+    "fnm = \"/home/kaiobach/Research/paper_paper_paper/scidat_nomad_ebsd/bb_analysis/data/production_imgs/FeMoOx_AntiA_04_1k5x_CN.tif\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6da1aea0-545b-446b-a3d1-1574af72f6c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "from PIL.TiffTags import TAGS\n",
+    "# print(TAGS)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1963afb6-6e48-4628-a0e8-d2da0874701e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with Image.open(fnm, mode=\"r\") as fp:\n",
+    "    for key in fp.tag_v2:\n",
+    "        if key in [34118, 34119]:\n",
+    "            print(type(fp.tag[key]))\n",
+    "            print(len(fp.tag[key]))        \n",
+    "            # print(f\"{key}, {fp.tag[key]}\")\n",
+    "        if key not in TAGS.keys():\n",
+    "            print(f\"--->tag {key}, is not in PIL.TiffTAGS !\")\n",
+    "    # self.tags = {TAGS[key] : fp.tag[key] for key in fp.tag_v2}\n",
+    "    # for key, val in self.tags.items():\n",
+    "    #     print(f\"{key}, {val}\")\n",
+    "    nparr = np.array(fp)\n",
+    "    print(f\"{type(nparr)}\")\n",
+    "    print(f\"{nparr.dtype}\")\n",
+    "    print(f\"{np.shape(nparr)}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9ef2a35-a260-4a54-9b83-eae1d588966f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with Image.open(fnm, mode=\"r\") as fp:\n",
+    "    czi_keys = [34118, 34119]\n",
+    "    for czi_key in czi_keys:\n",
+    "        if czi_key in fp.tag_v2:\n",
+    "            utf = fp.tag[czi_key]\n",
+    "            print(type(utf))\n",
+    "            if len(utf) == 1:\n",
+    "                print(utf[0])\n",
+    "    exit(1)\n",
+    "    tfs_keys = [34682]\n",
+    "    for tfs_key in tfs_keys:\n",
+    "        if tfs_key in fp.tag_v2:\n",
+    "            utf = fp.tag[tfs_key]\n",
+    "            print(type(utf))\n",
+    "            if len(utf) == 1:\n",
+    "                print(utf[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a8ada062-e308-4288-8f00-b3e620f3c890",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "# https://www.geeksforgeeks.org/python-program-to-sort-a-list-of-tuples-by-second-item/\n",
+    "def sort_tuple(tup):\n",
+    "    # convert the list of tuples to a numpy array with data type (object, int)\n",
+    "    arr = np.array(tup, dtype=[('col1', object), ('col2', int)])\n",
+    "    # get the indices that would sort the array based on the second column\n",
+    "    indices = np.argsort(arr['col2'])\n",
+    "    # use the resulting indices to sort the array\n",
+    "    sorted_arr = arr[indices]\n",
+    "    # convert the sorted numpy array back to a list of tuples\n",
+    "    sorted_tup = [(row['col1'], row['col2']) for row in sorted_arr]\n",
+    "    return sorted_tup"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d27df293-626c-4d37-80df-96c182d4f401",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def if_str_represents_float(s):\n",
+    "    try:\n",
+    "        float(s)\n",
+    "        return str(float(s)) == s\n",
+    "    except ValueError:\n",
+    "        return False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1a2f0864-f8b3-4d53-bf9d-08a5787c32fb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# TFS sections based on IKZ ALN_baoh_021.tif example\n",
+    "import mmap\n",
+    "\n",
+    "tfs_section_names = [\"[User]\",\n",
+    "                     \"[System]\",\n",
+    "                     \"[Beam]\",\n",
+    "                     \"[EBeam]\",                 \n",
+    "                     \"[GIS]\",\n",
+    "                     \"[Scan]\",\n",
+    "                     \"[EScan]\",\n",
+    "                     \"[Stage]\",\n",
+    "                     \"[Image]\",\n",
+    "                     \"[Vacuum]\",\n",
+    "                     \"[Specimen]\",\n",
+    "                     \"[Detectors]\",\n",
+    "                     \"[T2]\",\n",
+    "                     \"[Accessories]\",\n",
+    "                     \"[EBeamDeceleration]\",\n",
+    "                     \"[CompoundLensFilter]\",\n",
+    "                     \"[PrivateFei]\",\n",
+    "                     \"[HiResIllumination]\",\n",
+    "                     \"[EasyLift]\",\n",
+    "                     \"[HotStageMEMS]\",\n",
+    "                     \"[HotStage]\",\n",
+    "                     \"[HotStageHVHS]\",\n",
+    "                     \"[ColdStage]\"]\n",
+    "\n",
+    "tfs_section_details = {\"[System]\": [\"Type\", \"Dnumber\", \"Software\", \"BuildNr\", \"Source\", \"Column\", \"FinalLens\", \"Chamber\", \"Stage\", \"Pump\",\n",
+    "              \"ESEM\", \"Aperture\", \"Scan\", \"Acq\", \"EucWD\", \"SystemType\", \"DisplayWidth\", \"DisplayHeight\"]}\n",
+    "tfs_section_offsets = {}\n",
+    "\n",
+    "with open(fnm, 'rb', 0) as file:\n",
+    "    s = mmap.mmap(file.fileno(), 0, access=mmap.ACCESS_READ)\n",
+    "    for section_name in tfs_section_names:\n",
+    "        pos = s.find(bytes(section_name, \"utf8\"))  # != -1\n",
+    "        tfs_section_offsets[section_name] = pos\n",
+    "    print(tfs_section_offsets)\n",
+    "\n",
+    "    # define search offsets\n",
+    "    tpl = []\n",
+    "    for key, value in tfs_section_offsets.items():\n",
+    "        tpl.append((key, value))\n",
+    "    # print(tpl)\n",
+    "    tpl = sort_tuple(tpl)\n",
+    "    print(tpl)\n",
+    "    # if section_name == \"[System]\":\n",
+    "    pos_s = None\n",
+    "    pos_e = None\n",
+    "    for idx in np.arange(0, len(tpl)):\n",
+    "        if tpl[idx][0] != \"[System]\":\n",
+    "            continue\n",
+    "        else:\n",
+    "            pos_s = tpl[idx][1]\n",
+    "            if idx <= len(tpl) - 1:\n",
+    "                pos_e = tpl[idx + 1][1]\n",
+    "            break\n",
+    "    print(f\"Search in between byte offsets {pos_s} and {pos_e}\")\n",
+    "    # fish metadata of e.g. the system section\n",
+    "    section_metadata = {}\n",
+    "    for term in tfs_section_details[\"[System]\"]:\n",
+    "        \n",
+    "        s.seek(pos_s, 0)\n",
+    "        pos = s.find(bytes(term, \"utf8\"))\n",
+    "        if pos < pos_e:  # check if pos_e is None\n",
+    "            s.seek(pos, 0)\n",
+    "            section_metadata[f\"{term}\"] = f\"{s.readline().strip().decode('utf8').replace(f'{term}=', '')}\"\n",
+    "            if if_str_represents_float(section_metadata[f\"{term}\"]) is True:\n",
+    "                section_metadata[f\"{term}\"] = np.float64(section_metadata[f\"{term}\"])\n",
+    "            elif section_metadata[f\"{term}\"].isdigit() is True:\n",
+    "                section_metadata[f\"{term}\"] = np.int64(section_metadata[f\"{term}\"])\n",
+    "            else:\n",
+    "                pass\n",
+    "            # print(f\"{term}, {pos}, {pos + len(term) + 1}\")\n",
+    "    #        tfs_section_offswr\n",
+    "    #        file.seek(pos, 0)  #\n",
+    "    print(section_metadata)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f3eb287-8f55-424c-a016-a07fc59f068a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "'2'.isdigit()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1341e30-fcce-4a3d-a099-d342b8bbe318",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/pynxtools/dataconverter/readers/em/reader.py b/pynxtools/dataconverter/readers/em/reader.py
@@ -22,15 +22,11 @@
 from typing import Tuple, Any
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
-
 from pynxtools.dataconverter.readers.em.concepts.nexus_concepts import NxEmAppDef
-
 # from pynxtools.dataconverter.readers.em.subparsers.nxs_mtex import NxEmNxsMTexSubParser
-
 from pynxtools.dataconverter.readers.em.subparsers.nxs_pyxem import NxEmNxsPyxemSubParser
-
+from pynxtools.dataconverter.readers.em.subparsers.nxs_imgs import NxEmImagesSubParser
 from pynxtools.dataconverter.readers.em.utils.default_plots import NxEmDefaultPlotResolver
-
 # from pynxtools.dataconverter.readers.em.geometry.convention_mapper import NxEmConventionMapper
 
 # remaining subparsers to be implemented and merged into this one
@@ -118,13 +114,19 @@ def read(self,
         # sub_parser = "nxs_mtex"
         # subparser = NxEmNxsMTexSubParser(entry_id, file_paths[0])
         # subparser.parse(template)
+        # TODO::check correct loop through!
 
         # add further with resolving cases
         # if file_path is an HDF5 will use hfive parser
         # sub_parser = "nxs_pyxem"
-        subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
+        # subparser = NxEmNxsPyxemSubParser(entry_id, file_paths[0])
+        # subparser.parse(template)
+        # TODO::check correct loop through!
+
+        # sub_parser = "image_tiff"
+        subparser = NxEmImagesSubParser(entry_id, file_paths[0])
         subparser.parse(template)
-        # exit(1)
+        exit(1)
 
         # for dat_instance in case.dat_parser_type:
         #     print(f"Process pieces of information in {dat_instance} tech partner file...")

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_apex.py
@@ -40,9 +40,10 @@ def __init__(self, file_path: str = ""):
         self.tmp = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_base.py
@@ -68,8 +68,10 @@ def __init__(self, file_path: str = ""):
         self.template_attributes: List = []
         self.templates: Dict = {}
         self.h5r = None
+        self.is_hdf = True
         if file_path is not None and file_path != "":
             self.file_path = file_path
+            # TODO::check if HDF5 file using magic cookie
         else:
             raise ValueError(f"{__name__} needs proper instantiation !")
 

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_bruker.py
@@ -39,9 +39,10 @@ def __init__(self, file_path: str = ""):
         self.tmp: Dict = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_dreamthreed.py
@@ -87,9 +87,10 @@ def __init__(self, file_path: str = ""):
         self.path_registry: Dict = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_ebsd.py
@@ -39,9 +39,10 @@ def __init__(self, file_path: str = ""):
         self.tmp = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_edax.py
@@ -37,9 +37,10 @@ def __init__(self, file_path: str = ""):
         self.tmp = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_emsoft.py
@@ -33,9 +33,10 @@ def __init__(self, file_path: str = ""):
         self.tmp = {}
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""

diff --git a/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py b/pynxtools/dataconverter/readers/em/subparsers/hfive_oxford.py
@@ -42,9 +42,10 @@ def __init__(self, file_path: str = ""):
         # duplicate the code of the base hfive parser for generating NeXus default plots
         self.supported_version: Dict = {}
         self.version: Dict = {}
-        self.init_support()
         self.supported = False
-        self.check_if_supported()
+        if self.is_hdf is True:
+            self.init_support()
+            self.check_if_supported()
 
     def init_support(self):
         """Init supported versions."""