Skip to content

Commit

Permalink
Starting the refactoring of the apm reader, added support for ATO, EN…
Browse files Browse the repository at this point in the history
…V, FAU/Erlangen's pyccapt calibration and ranging, and FAU/Erlangen's Matlab Atom probe Toolbox fig ranging definitions, currently deactivated ELN and configuration parsing also in the io_case initial check of the apm reader, these functionalities need to use the updated and refactored ifes_apt_tc_data_modeling library, which however as of 2023/12/31 has not yet been placed on pypi (but committed into the repo, that's why this commit works with a local py3.11.5 where the sitepackage ifes_apt_tc_data_modeling was modified manually to avoid frequent publishing of dysfunctional ifes version and test them here first, next steps: i) run the reader against all 150+ test cases, ii) fix bugs of these examples, iii) move changes to ifes library and publish that on pypi, iv) update pyproject toml, v) reactivate ELN and config parsing, vi) update apm definitions to use the refactored apm base classes, vii) integrate ruff changes, viii) commit on pynxtools master
  • Loading branch information
atomprobe-tc committed Dec 31, 2023
1 parent 5ea8824 commit 3a0cb9d
Show file tree
Hide file tree
Showing 7 changed files with 266 additions and 183 deletions.
13 changes: 3 additions & 10 deletions pynxtools/dataconverter/readers/apm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,30 +17,23 @@
#
"""Generic parser for loading atom probe microscopy data into NXapm."""

# pylint: disable=no-member
# pylint: disable=no-member,too-few-public-methods

from typing import Tuple, Any

from pynxtools.dataconverter.readers.base.reader import BaseReader

from pynxtools.dataconverter.readers.apm.utils.apm_define_io_cases \
import ApmUseCaseSelector

from pynxtools.dataconverter.readers.apm.utils.apm_load_deployment_specifics \
import NxApmNomadOasisConfigurationParser

from pynxtools.dataconverter.readers.apm.utils.apm_load_generic_eln \
import NxApmNomadOasisElnSchemaParser

from pynxtools.dataconverter.readers.apm.utils.apm_load_reconstruction \
import ApmReconstructionParser

from pynxtools.dataconverter.readers.apm.utils.apm_load_ranging \
import ApmRangingDefinitionsParser

from pynxtools.dataconverter.readers.apm.utils.apm_create_nx_default_plots \
import apm_default_plot_generator

from pynxtools.dataconverter.readers.apm.utils.apm_generate_synthetic_data \
import ApmCreateExampleData

Expand Down Expand Up @@ -69,8 +62,6 @@ class ApmReader(BaseReader):
"""

# pylint: disable=too-few-public-methods

# Whitelist for the NXDLs that the reader supports and can process
supported_nxdls = ["NXapm"]

Expand Down Expand Up @@ -98,6 +89,7 @@ def read(self,
assert case.is_valid is True, \
"Such a combination of input-file(s, if any) is not supported !"

"""
print("Parse (meta)data coming from an ELN...")
if len(case.eln) == 1:
nx_apm_eln = NxApmNomadOasisElnSchemaParser(case.eln[0], entry_id)
Expand All @@ -111,6 +103,7 @@ def read(self,
nx_apm_cfg = NxApmNomadOasisConfigurationParser(case.cfg[0], entry_id)
nx_apm_cfg.report(template)
# having and or using a deployment-specific configuration is optional
"""

print("Parse (numerical) data and metadata from ranging definitions file...")
if len(case.reconstruction) == 1:
Expand Down
67 changes: 39 additions & 28 deletions pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
# pylint: disable=no-member,duplicate-code

from typing import Tuple, Dict, List
VALID_FILE_NAME_SUFFIX_RECON = [".apt", ".pos", ".epos", ".ato", ".csv", ".h5"]
VALID_FILE_NAME_SUFFIX_RANGE = [".rng", ".rrng", ".env", ".fig.txt", "range_.h5"]
VALID_FILE_NAME_SUFFIX_CONFIG = [".yaml", ".yml"]


class ApmUseCaseSelector: # pylint: disable=too-few-public-methods
Expand All @@ -41,50 +44,58 @@ def __init__(self, file_paths: Tuple[str] = None):
self.reconstruction: List[str] = []
self.ranging: List[str] = []
self.is_valid = False
self.supported_mime_types = [
"pos", "epos", "apt", "rrng", "rng", "txt", "yaml", "yml"]
for mime_type in self.supported_mime_types:
self.case[mime_type] = []

self.sort_files_by_mime_type(file_paths)
self.supported_file_name_suffixes = VALID_FILE_NAME_SUFFIX_RECON \
+ VALID_FILE_NAME_SUFFIX_RANGE + VALID_FILE_NAME_SUFFIX_CONFIG
print(f"self.supported_file_name_suffixes: {self.supported_file_name_suffixes}")
self.sort_files_by_file_name_suffix(file_paths)
self.check_validity_of_file_combinations()

def sort_files_by_mime_type(self, file_paths: Tuple[str] = None):
"""Sort all input-files based on their mimetype to prepare validity check."""
for file_name in file_paths:
index = file_name.lower().rfind(".")
if index >= 0:
suffix = file_name.lower()[index + 1::]
if suffix in self.supported_mime_types:
if file_name not in self.case[suffix]:
self.case[suffix].append(file_name)
def sort_files_by_file_name_suffix(self, file_paths: Tuple[str] = None):
"""Sort all input-files based on their name suffix to prepare validity check."""
for suffix in self.supported_file_name_suffixes:
self.case[suffix] = []
for fpath in file_paths:
for suffix in self.supported_file_name_suffixes:
if suffix not in [".h5", "range_.h5"]:
if (fpath.lower().endswith(suffix)) and (fpath not in self.case[suffix]):
self.case[suffix].append(fpath)
else:
if fpath.lower().endswith("range_.h5") is True:
self.case["range_.h5"].append(fpath)
elif fpath.lower().endswith(".h5") is True:
self.case[".h5"].append(fpath)
else:
continue
# HDF5 files need special treatment, this already shows that magic numbers
# should better have been used or signatures to avoid having to have as
# complicated content checks as we had to implement e.g. for the em reader

def check_validity_of_file_combinations(self):
"""Check if this combination of types of files is supported."""
recon_input = 0 # reconstruction relevant file e.g. POS, ePOS, APT
range_input = 0 # ranging definition file, e.g. RNG, RRNG
recon_input = 0 # reconstruction relevant file e.g. POS, ePOS, APT, ATO, CSV
range_input = 0 # ranging definition file, e.g. RNG, RRNG, ENV, FIG.TXT
other_input = 0 # generic ELN or OASIS-specific configurations
for mime_type, value in self.case.items():
if mime_type in ["pos", "epos", "apt"]:
for suffix, value in self.case.items():
if suffix in VALID_FILE_NAME_SUFFIX_RECON:
recon_input += len(value)
elif mime_type in ["rrng", "rng", "txt"]:
elif suffix in VALID_FILE_NAME_SUFFIX_RANGE:
range_input += len(value)
elif mime_type in ["yaml", "yml"]:
elif suffix in VALID_FILE_NAME_SUFFIX_CONFIG:
other_input += len(value)
else:
continue

if (recon_input == 1) and (range_input == 1) and (1 <= other_input <= 2):
if (recon_input == 1) and (range_input == 1): # and (1 <= other_input <= 2):
self.is_valid = True
self.reconstruction: List[str] = []
self.ranging: List[str] = []
for mime_type in ["pos", "epos", "apt"]:
self.reconstruction += self.case[mime_type]
for mime_type in ["rrng", "rng", "txt"]:
self.ranging += self.case[mime_type]
for suffix in VALID_FILE_NAME_SUFFIX_RECON:
self.reconstruction += self.case[suffix]
for suffix in VALID_FILE_NAME_SUFFIX_RANGE:
self.ranging += self.case[suffix]
yml: List[str] = []
for mime_type in ["yaml", "yml"]:
yml += self.case[mime_type]
for suffix in VALID_FILE_NAME_SUFFIX_CONFIG:
yml += self.case[suffix]
for entry in yml:
if entry.endswith(".oasis.specific.yaml") \
or entry.endswith(".oasis.specific.yml"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,30 +20,28 @@
# pylint: disable=no-member

import flatdict as fd

import yaml

from pynxtools.dataconverter.readers.apm.map_concepts.apm_deployment_specifics_to_nx_map \
import NxApmDeploymentSpecificInput

from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
import apply_modifier, variadic_path_to_specific_path


class NxApmNomadOasisConfigurationParser: # pylint: disable=too-few-public-methods
"""Parse deployment specific configuration."""

def __init__(self, file_name: str, entry_id: int):
print(f"Extracting data from deployment specific configuration file: {file_name}")
if (file_name.rsplit('/', 1)[-1].endswith(".oasis.specific.yaml")
or file_name.endswith(".oasis.specific.yml")) and entry_id > 0:
def __init__(self, file_path: str, entry_id: int):
print(f"Extracting data from deployment specific configuration file: {file_path}")
if (file_path.rsplit('/', 1)[-1].endswith(".oasis.specific.yaml")
or file_path.endswith(".oasis.specific.yml")) and entry_id > 0:
self.entry_id = entry_id
self.file_name = file_name
with open(self.file_name, "r", encoding="utf-8") as stream:
self.file_path = file_path
with open(self.file_path, "r", encoding="utf-8") as stream:
self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/")
else:
self.entry_id = 1
self.file_name = ""
self.file_path = ""
self.yml = {}

def report(self, template: dict) -> dict:
Expand Down
18 changes: 7 additions & 11 deletions pynxtools/dataconverter/readers/apm/utils/apm_load_generic_eln.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,13 @@
# pylint: disable=no-member,duplicate-code,too-many-nested-blocks

import flatdict as fd

import yaml

from ase.data import chemical_symbols

from pynxtools.dataconverter.readers.apm.map_concepts.apm_eln_to_nx_map \
import NxApmElnInput, NxUserFromListOfDict

from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
import variadic_path_to_specific_path, apply_modifier

from pynxtools.dataconverter.readers.apm.utils.apm_parse_composition_table \
import parse_composition_table

Expand All @@ -53,17 +49,17 @@ class NxApmNomadOasisElnSchemaParser: # pylint: disable=too-few-public-methods
during the verification of the template dictionary.
"""

def __init__(self, file_name: str, entry_id: int):
print(f"Extracting data from ELN file: {file_name}")
if (file_name.rsplit('/', 1)[-1].startswith("eln_data")
or file_name.startswith("eln_data")) and entry_id > 0:
def __init__(self, file_path: str, entry_id: int):
print(f"Extracting data from ELN file: {file_path}")
if (file_path.rsplit('/', 1)[-1].startswith("eln_data")
or file_path.startswith("eln_data")) and entry_id > 0:
self.entry_id = entry_id
self.file_name = file_name
with open(self.file_name, "r", encoding="utf-8") as stream:
self.file_path = file_path
with open(self.file_path, "r", encoding="utf-8") as stream:
self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/")
else:
self.entry_id = 1
self.file_name = ""
self.file_path = ""
self.yml = {}

def parse_sample_composition(self, template: dict) -> dict:
Expand Down
Loading

0 comments on commit 3a0cb9d

Please sign in to comment.