From f15e1cfbff02b563f1c1827e162d641e3cd73708 Mon Sep 17 00:00:00 2001 From: mkuehbach Date: Tue, 2 Jan 2024 16:51:21 +0100 Subject: [PATCH] Fixed steps i), ii), all 176 examples processed successfully, PSE coverage 58 elements, 5.9 billions ions in total, 77 projects from research of all major groups worldwide, next steps: iii) move to ifes, vi) nxdef refactor, v) eln, cfg parsing, RUN ALL and check proper annotation, styling, linting ruff --- pynxtools/dataconverter/readers/apm/reader.py | 2 ++ .../readers/apm/utils/apm_define_io_cases.py | 27 ++++++++++++------- .../readers/apm/utils/apm_load_ranging.py | 22 +++++++-------- .../apm/utils/apm_load_reconstruction.py | 2 +- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/pynxtools/dataconverter/readers/apm/reader.py b/pynxtools/dataconverter/readers/apm/reader.py index 44033615b..2b72228f0 100644 --- a/pynxtools/dataconverter/readers/apm/reader.py +++ b/pynxtools/dataconverter/readers/apm/reader.py @@ -119,8 +119,10 @@ def read(self, print("No input-file defined for ranging definitions!") return {} + """ print("Create NeXus default plottable data...") apm_default_plot_generator(template, n_entries) + """ # print("Reporting state of template before passing to HDF5 writing...") # for keyword in template.keys(): diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py b/pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py index 6fd7986e7..36c494718 100644 --- a/pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py +++ b/pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py @@ -59,11 +59,14 @@ def sort_files_by_file_name_suffix(self, file_paths: Tuple[str] = None): if suffix not in [".h5", "range_.h5"]: if (fpath.lower().endswith(suffix)) and (fpath not in self.case[suffix]): self.case[suffix].append(fpath) + break else: - if fpath.lower().endswith("range_.h5") is True: + if fpath.lower().endswith("range_.h5") == True: self.case["range_.h5"].append(fpath) - elif fpath.lower().endswith(".h5") is True: + break + elif fpath.lower().endswith(".h5") == True: self.case[".h5"].append(fpath) + break else: continue # HDF5 files need special treatment, this already shows that magic numbers @@ -76,14 +79,20 @@ def check_validity_of_file_combinations(self): range_input = 0 # ranging definition file, e.g. RNG, RRNG, ENV, FIG.TXT other_input = 0 # generic ELN or OASIS-specific configurations for suffix, value in self.case.items(): - if suffix in VALID_FILE_NAME_SUFFIX_RECON: - recon_input += len(value) - elif suffix in VALID_FILE_NAME_SUFFIX_RANGE: - range_input += len(value) - elif suffix in VALID_FILE_NAME_SUFFIX_CONFIG: - other_input += len(value) + if suffix not in [".h5", "range_.h5"]: + if suffix in VALID_FILE_NAME_SUFFIX_RECON: + recon_input += len(value) + elif suffix in VALID_FILE_NAME_SUFFIX_RANGE: + range_input += len(value) + elif suffix in VALID_FILE_NAME_SUFFIX_CONFIG: + other_input += len(value) + else: + continue else: - continue + if suffix == "range_.h5": + range_input += len(value) + if suffix == ".h5": + recon_input += len(value) if (recon_input == 1) and (range_input == 1): # and (1 <= other_input <= 2): self.is_valid = True diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_load_ranging.py b/pynxtools/dataconverter/readers/apm/utils/apm_load_ranging.py index af38ff5e9..4681d2eaf 100644 --- a/pynxtools/dataconverter/readers/apm/utils/apm_load_ranging.py +++ b/pynxtools/dataconverter/readers/apm/utils/apm_load_ranging.py @@ -39,6 +39,11 @@ from pynxtools.dataconverter.readers.apm.utils.apm_define_io_cases \ import VALID_FILE_NAME_SUFFIX_RANGE +WARNING_TOO_MANY_DEFINITIONS \ + = f"WARNING::Range file contains more than {MAX_NUMBER_OF_ION_SPECIES} " \ + f"WARNING::definitions! This is often a signature of duplicates or " \ + f"WARNING::contradicting definitions." + def add_unknown_iontype(template: dict, entry_id: int) -> dict: """Add default unknown iontype.""" @@ -139,8 +144,7 @@ def extract_data_from_env_file(file_path: str, template: dict, entry_id: int) -> print(f"Extracting data from ENV file: {file_path}") rangefile = ReadEnvFileFormat(file_path) if len(rangefile.env["molecular_ions"]) > np.iinfo(np.uint8).max + 1: - raise ValueError(f"Current implementation does not support " - f"more than {MAX_NUMBER_OF_ION_SPECIES} ion types") + print(WARNING_TOO_MANY_DEFINITIONS) add_standardize_molecular_ions( rangefile.env["molecular_ions"], template, entry_id) @@ -152,8 +156,7 @@ def extract_data_from_fig_txt_file(file_path: str, template: dict, entry_id: int print(f"Extracting data from FIG.TXT file: {file_path}") rangefile = ReadFigTxtFileFormat(file_path) if len(rangefile.fig["molecular_ions"]) > np.iinfo(np.uint8).max + 1: - raise ValueError(f"Current implementation does not support " - f"more than {MAX_NUMBER_OF_ION_SPECIES} ion types") + print(WARNING_TOO_MANY_DEFINITIONS) add_standardize_molecular_ions( rangefile.fig["molecular_ions"], template, entry_id) @@ -165,8 +168,7 @@ def extract_data_from_pyccapt_file(file_path: str, template: dict, entry_id: int print(f"Extracting data from pyccapt/ranging HDF5 file: {file_path}") rangefile = ReadPyccaptRangingFileFormat(file_path) if len(rangefile.rng["molecular_ions"]) > np.iinfo(np.uint8).max + 1: - raise ValueError(f"Current implementation does not support " - f"more than {MAX_NUMBER_OF_ION_SPECIES} ion types") + print(WARNING_TOO_MANY_DEFINITIONS) add_standardize_molecular_ions( rangefile.rng["molecular_ions"], template, entry_id) @@ -178,8 +180,7 @@ def extract_data_from_rng_file(file_path: str, template: dict, entry_id: int) -> print(f"Extracting data from RNG file: {file_path}") rangefile = ReadRngFileFormat(file_path) if len(rangefile.rng["molecular_ions"]) > np.iinfo(np.uint8).max + 1: - raise ValueError(f"Current implementation does not support " - f"more than {MAX_NUMBER_OF_ION_SPECIES} ion types") + print(WARNING_TOO_MANY_DEFINITIONS) add_standardize_molecular_ions( rangefile.rng["molecular_ions"], template, entry_id) @@ -191,8 +192,7 @@ def extract_data_from_rrng_file(file_path: str, template: dict, entry_id) -> dic print(f"Extracting data from RRNG file: {file_path}") rangefile = ReadRrngFileFormat(file_path) if len(rangefile.rrng["molecular_ions"]) > np.iinfo(np.uint8).max + 1: - raise ValueError(f"Current implementation does not support more " - f"than {MAX_NUMBER_OF_ION_SPECIES} ion types") + print(WARNING_TOO_MANY_DEFINITIONS) add_standardize_molecular_ions( rangefile.rrng["molecular_ions"], template, entry_id) @@ -207,7 +207,7 @@ def __init__(self, file_path: str, entry_id: int): "file_path": file_path, "entry_id": entry_id} for suffix in VALID_FILE_NAME_SUFFIX_RANGE: - if file_path.lower().endswith(suffix) is True: + if file_path.lower().endswith(suffix) == True: self.meta["file_format"] = suffix break if self.meta["file_format"] is None: diff --git a/pynxtools/dataconverter/readers/apm/utils/apm_load_reconstruction.py b/pynxtools/dataconverter/readers/apm/utils/apm_load_reconstruction.py index 43b75dec2..319045102 100644 --- a/pynxtools/dataconverter/readers/apm/utils/apm_load_reconstruction.py +++ b/pynxtools/dataconverter/readers/apm/utils/apm_load_reconstruction.py @@ -224,7 +224,7 @@ def __init__(self, file_path: str, entry_id: int): "file_path": file_path, "entry_id": entry_id} for suffix in VALID_FILE_NAME_SUFFIX_RECON: - if file_path.lower().endswith(suffix) is True: + if file_path.lower().endswith(suffix) == True: self.meta["file_format"] = suffix break if self.meta["file_format"] is None: