Starting the refactoring of the apm reader, added support for ATO, EN…

…V, FAU/Erlangen's pyccapt calibration and ranging, and FAU/Erlangen's Matlab Atom probe Toolbox fig ranging definitions, currently deactivated ELN and configuration parsing also in the io_case initial check of the apm reader, these functionalities need to use the updated and refactored ifes_apt_tc_data_modeling library, which however as of 2023/12/31 has not yet been placed on pypi (but committed into the repo, that's why this commit works with a local py3.11.5 where the sitepackage ifes_apt_tc_data_modeling was modified manually to avoid frequent publishing of dysfunctional ifes version and test them here first, next steps: i) run the reader against all 150+ test cases, ii) fix bugs of these examples, iii) move changes to ifes library and publish that on pypi, iv) update pyproject toml, v) reactivate ELN and config parsing, vi) update apm definitions to use the refactored apm base classes, vii) integrate ruff changes, viii) commit on pynxtools master
FAIRmat-NFDI · Dec 31, 2023 · 3a0cb9d · 3a0cb9d
1 parent 5ea8824
commit 3a0cb9d
Show file tree

Hide file tree

Showing 7 changed files with 266 additions and 183 deletions.
diff --git a/pynxtools/dataconverter/readers/apm/reader.py b/pynxtools/dataconverter/readers/apm/reader.py
@@ -17,30 +17,23 @@
 #
 """Generic parser for loading atom probe microscopy data into NXapm."""
 
-# pylint: disable=no-member
+# pylint: disable=no-member,too-few-public-methods
 
 from typing import Tuple, Any
 
 from pynxtools.dataconverter.readers.base.reader import BaseReader
-
 from pynxtools.dataconverter.readers.apm.utils.apm_define_io_cases \
     import ApmUseCaseSelector
-
 from pynxtools.dataconverter.readers.apm.utils.apm_load_deployment_specifics \
     import NxApmNomadOasisConfigurationParser
-
 from pynxtools.dataconverter.readers.apm.utils.apm_load_generic_eln \
     import NxApmNomadOasisElnSchemaParser
-
 from pynxtools.dataconverter.readers.apm.utils.apm_load_reconstruction \
     import ApmReconstructionParser
-
 from pynxtools.dataconverter.readers.apm.utils.apm_load_ranging \
     import ApmRangingDefinitionsParser
-
 from pynxtools.dataconverter.readers.apm.utils.apm_create_nx_default_plots \
     import apm_default_plot_generator
-
 from pynxtools.dataconverter.readers.apm.utils.apm_generate_synthetic_data \
     import ApmCreateExampleData
 
@@ -69,8 +62,6 @@ class ApmReader(BaseReader):
 
     """
 
-    # pylint: disable=too-few-public-methods
-
     # Whitelist for the NXDLs that the reader supports and can process
     supported_nxdls = ["NXapm"]
 
@@ -98,6 +89,7 @@ def read(self,
             assert case.is_valid is True, \
                 "Such a combination of input-file(s, if any) is not supported !"
 
+            """
             print("Parse (meta)data coming from an ELN...")
             if len(case.eln) == 1:
                 nx_apm_eln = NxApmNomadOasisElnSchemaParser(case.eln[0], entry_id)
@@ -111,6 +103,7 @@ def read(self,
                 nx_apm_cfg = NxApmNomadOasisConfigurationParser(case.cfg[0], entry_id)
                 nx_apm_cfg.report(template)
             # having and or using a deployment-specific configuration is optional
+            """
 
             print("Parse (numerical) data and metadata from ranging definitions file...")
             if len(case.reconstruction) == 1:

diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py b/pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py
@@ -20,6 +20,9 @@
 # pylint: disable=no-member,duplicate-code
 
 from typing import Tuple, Dict, List
+VALID_FILE_NAME_SUFFIX_RECON = [".apt", ".pos", ".epos", ".ato", ".csv", ".h5"]
+VALID_FILE_NAME_SUFFIX_RANGE = [".rng", ".rrng", ".env", ".fig.txt", "range_.h5"]
+VALID_FILE_NAME_SUFFIX_CONFIG = [".yaml", ".yml"]
 
 
 class ApmUseCaseSelector:  # pylint: disable=too-few-public-methods
@@ -41,50 +44,58 @@ def __init__(self, file_paths: Tuple[str] = None):
         self.reconstruction: List[str] = []
         self.ranging: List[str] = []
         self.is_valid = False
-        self.supported_mime_types = [
-            "pos", "epos", "apt", "rrng", "rng", "txt", "yaml", "yml"]
-        for mime_type in self.supported_mime_types:
-            self.case[mime_type] = []
-
-        self.sort_files_by_mime_type(file_paths)
+        self.supported_file_name_suffixes = VALID_FILE_NAME_SUFFIX_RECON \
+            + VALID_FILE_NAME_SUFFIX_RANGE + VALID_FILE_NAME_SUFFIX_CONFIG
+        print(f"self.supported_file_name_suffixes: {self.supported_file_name_suffixes}")
+        self.sort_files_by_file_name_suffix(file_paths)
         self.check_validity_of_file_combinations()
 
-    def sort_files_by_mime_type(self, file_paths: Tuple[str] = None):
-        """Sort all input-files based on their mimetype to prepare validity check."""
-        for file_name in file_paths:
-            index = file_name.lower().rfind(".")
-            if index >= 0:
-                suffix = file_name.lower()[index + 1::]
-                if suffix in self.supported_mime_types:
-                    if file_name not in self.case[suffix]:
-                        self.case[suffix].append(file_name)
+    def sort_files_by_file_name_suffix(self, file_paths: Tuple[str] = None):
+        """Sort all input-files based on their name suffix to prepare validity check."""
+        for suffix in self.supported_file_name_suffixes:
+            self.case[suffix] = []
+        for fpath in file_paths:
+            for suffix in self.supported_file_name_suffixes:
+                if suffix not in [".h5", "range_.h5"]:
+                    if (fpath.lower().endswith(suffix)) and (fpath not in self.case[suffix]):
+                        self.case[suffix].append(fpath)
+                else:
+                    if fpath.lower().endswith("range_.h5") is True:
+                        self.case["range_.h5"].append(fpath)
+                    elif fpath.lower().endswith(".h5") is True:
+                        self.case[".h5"].append(fpath)
+                    else:
+                        continue
+                # HDF5 files need special treatment, this already shows that magic numbers
+                # should better have been used or signatures to avoid having to have as
+                # complicated content checks as we had to implement e.g. for the em reader
 
     def check_validity_of_file_combinations(self):
         """Check if this combination of types of files is supported."""
-        recon_input = 0  # reconstruction relevant file e.g. POS, ePOS, APT
-        range_input = 0  # ranging definition file, e.g. RNG, RRNG
+        recon_input = 0  # reconstruction relevant file e.g. POS, ePOS, APT, ATO, CSV
+        range_input = 0  # ranging definition file, e.g. RNG, RRNG, ENV, FIG.TXT
         other_input = 0  # generic ELN or OASIS-specific configurations
-        for mime_type, value in self.case.items():
-            if mime_type in ["pos", "epos", "apt"]:
+        for suffix, value in self.case.items():
+            if suffix in VALID_FILE_NAME_SUFFIX_RECON:
                 recon_input += len(value)
-            elif mime_type in ["rrng", "rng", "txt"]:
+            elif suffix in VALID_FILE_NAME_SUFFIX_RANGE:
                 range_input += len(value)
-            elif mime_type in ["yaml", "yml"]:
+            elif suffix in VALID_FILE_NAME_SUFFIX_CONFIG:
                 other_input += len(value)
             else:
                 continue
 
-        if (recon_input == 1) and (range_input == 1) and (1 <= other_input <= 2):
+        if (recon_input == 1) and (range_input == 1):  # and (1 <= other_input <= 2):
             self.is_valid = True
             self.reconstruction: List[str] = []
             self.ranging: List[str] = []
-            for mime_type in ["pos", "epos", "apt"]:
-                self.reconstruction += self.case[mime_type]
-            for mime_type in ["rrng", "rng", "txt"]:
-                self.ranging += self.case[mime_type]
+            for suffix in VALID_FILE_NAME_SUFFIX_RECON:
+                self.reconstruction += self.case[suffix]
+            for suffix in VALID_FILE_NAME_SUFFIX_RANGE:
+                self.ranging += self.case[suffix]
             yml: List[str] = []
-            for mime_type in ["yaml", "yml"]:
-                yml += self.case[mime_type]
+            for suffix in VALID_FILE_NAME_SUFFIX_CONFIG:
+                yml += self.case[suffix]
             for entry in yml:
                 if entry.endswith(".oasis.specific.yaml") \
                         or entry.endswith(".oasis.specific.yml"):

diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_load_deployment_specifics.py b/pynxtools/dataconverter/readers/apm/utils/apm_load_deployment_specifics.py
@@ -20,30 +20,28 @@
 # pylint: disable=no-member
 
 import flatdict as fd
-
 import yaml
 
 from pynxtools.dataconverter.readers.apm.map_concepts.apm_deployment_specifics_to_nx_map \
     import NxApmDeploymentSpecificInput
-
 from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
     import apply_modifier, variadic_path_to_specific_path
 
 
 class NxApmNomadOasisConfigurationParser:  # pylint: disable=too-few-public-methods
     """Parse deployment specific configuration."""
 
-    def __init__(self, file_name: str, entry_id: int):
-        print(f"Extracting data from deployment specific configuration file: {file_name}")
-        if (file_name.rsplit('/', 1)[-1].endswith(".oasis.specific.yaml")
-                or file_name.endswith(".oasis.specific.yml")) and entry_id > 0:
+    def __init__(self, file_path: str, entry_id: int):
+        print(f"Extracting data from deployment specific configuration file: {file_path}")
+        if (file_path.rsplit('/', 1)[-1].endswith(".oasis.specific.yaml")
+                or file_path.endswith(".oasis.specific.yml")) and entry_id > 0:
             self.entry_id = entry_id
-            self.file_name = file_name
-            with open(self.file_name, "r", encoding="utf-8") as stream:
+            self.file_path = file_path
+            with open(self.file_path, "r", encoding="utf-8") as stream:
                 self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/")
         else:
             self.entry_id = 1
-            self.file_name = ""
+            self.file_path = ""
             self.yml = {}
 
     def report(self, template: dict) -> dict:

diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_load_generic_eln.py b/pynxtools/dataconverter/readers/apm/utils/apm_load_generic_eln.py
@@ -20,17 +20,13 @@
 # pylint: disable=no-member,duplicate-code,too-many-nested-blocks
 
 import flatdict as fd
-
 import yaml
 
 from ase.data import chemical_symbols
-
 from pynxtools.dataconverter.readers.apm.map_concepts.apm_eln_to_nx_map \
     import NxApmElnInput, NxUserFromListOfDict
-
 from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
     import variadic_path_to_specific_path, apply_modifier
-
 from pynxtools.dataconverter.readers.apm.utils.apm_parse_composition_table \
     import parse_composition_table
 
@@ -53,17 +49,17 @@ class NxApmNomadOasisElnSchemaParser:  # pylint: disable=too-few-public-methods
     during the verification of the template dictionary.
     """
 
-    def __init__(self, file_name: str, entry_id: int):
-        print(f"Extracting data from ELN file: {file_name}")
-        if (file_name.rsplit('/', 1)[-1].startswith("eln_data")
-                or file_name.startswith("eln_data")) and entry_id > 0:
+    def __init__(self, file_path: str, entry_id: int):
+        print(f"Extracting data from ELN file: {file_path}")
+        if (file_path.rsplit('/', 1)[-1].startswith("eln_data")
+                or file_path.startswith("eln_data")) and entry_id > 0:
             self.entry_id = entry_id
-            self.file_name = file_name
-            with open(self.file_name, "r", encoding="utf-8") as stream:
+            self.file_path = file_path
+            with open(self.file_path, "r", encoding="utf-8") as stream:
                 self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/")
         else:
             self.entry_id = 1
-            self.file_name = ""
+            self.file_path = ""
             self.yml = {}
 
     def parse_sample_composition(self, template: dict) -> dict: