Skip to content

Commit

Permalink
Added support for being able to incorporate D. R. Diercks et al. APT …
Browse files Browse the repository at this point in the history
…mass spectrum database which is currently hosted on an AirTable (but no longer maintained), added ELN and I/O logic parsing to populate NXserialized artifacts which can be used for implementing a visualization of a information provenance (workflow) for NOMAD OASIS, added ELN parsing, fix i) except for user
  • Loading branch information
atomprobe-tc committed Jan 10, 2024
1 parent b5ec396 commit 56a42ac
Show file tree
Hide file tree
Showing 12 changed files with 171 additions and 76 deletions.
3 changes: 1 addition & 2 deletions debug/batch_queue.04.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/bin/bash

dataconverter --reader apm --nxdl NXapm --input-file="/home/kaiobach/Research/paper_paper_paper/joss_nomad_apt/bb_analysis/analysis/harvest_examples/production/ErMnO_pole.pos" --input-file="/home/kaiobach/Research/paper_paper_paper/joss_nomad_apt/bb_analysis/analysis/harvest_examples/production/ErMnO.env" --input-file="apm.oasis.specific.yaml" --output=debug.nor_trondheim_hatzeglou_0.nxs
# 1>stdout.debug.nor_trondheim_hatzeglou_0.txt 2>stderr.debug.nor_trondheim_hatzeglou_0.txt
dataconverter --reader apm --nxdl NXapm --input-file="/home/kaiobach/Research/paper_paper_paper/joss_nomad_apt/bb_analysis/analysis/harvest_examples/production/ErMnO_pole.pos" --input-file="/home/kaiobach/Research/paper_paper_paper/joss_nomad_apt/bb_analysis/analysis/harvest_examples/production/ErMnO.env" --input-file="apm.oasis.specific.yaml" --input-file="eln_data_apm.yaml" --output=debug.nor_trondheim_hatzeglou_0.nxs 1>stdout.debug.nor_trondheim_hatzeglou_0.txt 2>stderr.debug.nor_trondheim_hatzeglou_0.txt
4 changes: 2 additions & 2 deletions debug/eln_data_apm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ workflow:
raw_dat_file: str.str
hit_dat_file: hits.hits
recon_cfg_file: hits.root
recon_res_file: recon.apt
range_dat_file: recon.rrng
# recon_res_file: recon.apt
# range_dat_file: recon.rrng
ranging:
program: IVAS
program_version: 3.6.8
Expand Down
Empty file added debug/hits.hits
Empty file.
Empty file added debug/root.root
Empty file.
Empty file added debug/str.str
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,14 @@
#
"""Dict mapping custom schema instances from eln_data.yaml file on concepts in NXapm."""

# "/ENTRY[entry*]/PROGRAM[program2]/program": "load_from", "atom_probe/control_software_program"),
# "/ENTRY[entry*]/PROGRAM[program2]/program/@version": "load_from", "atom_probe/control_software_program__attr_version"),
# ("/ENTRY[entry*]/atom_probe/specimen_monitoring/detection_rate", "load_from", "atom_probe/specimen_monitoring_detection_rate/value"),
# "/ENTRY[entry*]/atom_probe/specimen_monitoring/detection_rate/@units", "load_from", "atom_probe/specimen_monitoring_detection_rate/unit"),


APM_EXAMPLE_ENTRIES_TO_NEXUS \
APM_EXAMPLE_OTHER_TO_NEXUS \
= [("/ENTRY[entry*]/run_number", "load_from", "entry/run_number"),
("/ENTRY[entry*]/start_time", "load_from", "entry/start_time"),
("/ENTRY[entry*]/end_time", "load_from", "entry/end_time"),
("/ENTRY[entry*]/operation_mode", "load_from", "entry/operation_mode"),
("/ENTRY[entry*]/experiment_description", "load_from", "entry/experiment_description"),
("/ENTRY[entry*]/sample/method", "load_from", "sample/method"),
("/ENTRY[entry*]/sample/method", "experiment"),
("/ENTRY[entry*]/sample/alias", "load_from", "sample/alias"),
("/ENTRY[entry*]/sample/grain_diameter", "load_from", "sample/grain_diameter/value"),
("/ENTRY[entry*]/sample/grain_diameter/@units", "load_from", "sample/grain_diameter/unit"),
Expand All @@ -44,7 +39,7 @@
("/ENTRY[entry*]/sample/heat_treatment_quenching_rate_error", "load_from", "sample/heat_treatment_quenching_rate_error/value"),
("/ENTRY[entry*]/sample/heat_treatment_quenching_rate_error/@units", "load_from", "sample/heat_treatment_quenching_rate_error/unit"),
("/ENTRY[entry*]/sample/description", "load_from", "sample/description"),
("/ENTRY[entry*]/specimen/method", "load_from", "specimen/method"),
("/ENTRY[entry*]/specimen/method", "experiment"),
("/ENTRY[entry*]/specimen/alias", "load_from", "specimen/alias"),
("/ENTRY[entry*]/specimen/preparation_date", "load_from", "specimen/preparation_date"),
("/ENTRY[entry*]/specimen/description", "load_from", "specimen/description"),
Expand Down Expand Up @@ -75,16 +70,27 @@
("/ENTRY[entry*]/measurement/event_data_apm_set/EVENT_DATA_APM[event_data_apm]/instrument/analysis_chamber/chamber_pressure/@units", "load_from", "atom_probe/chamber_pressure/unit"),
("/ENTRY[entry*]/measurement/event_data_apm_set/EVENT_DATA_APM[event_data_apm]/instrument/stage_lab/base_temperature", "load_from", "atom_probe/base_temperature/value"),
("/ENTRY[entry*]/measurement/event_data_apm_set/EVENT_DATA_APM[event_data_apm]/instrument/stage_lab/base_temperature/@units", "load_from", "atom_probe/base_temperature/unit"),
("/ENTRY[entry*]/atom_probe/ranging/PROGRAM[program1]/program", "load_from", "atom_probe/ranging/program"),
("/ENTRY[entry*]/atom_probe/ranging/PROGRAM[program1]/program/@version", "load_from", "atom_probe/ranging/program_version"),
("/ENTRY[entry*]/atom_probe/reconstruction/PROGRAM[program1]/program", "load_from", "atom_probe/reconstruction/program"),
("/ENTRY[entry*]/atom_probe/reconstruction/PROGRAM[program1]/program/@version", "load_from", "atom_probe/reconstruction/program_version"),
("/ENTRY[entry*]/atom_probe/reconstruction/protocol_name", "load_from", "atom_probe/reconstruction/protocol_name"),
("/ENTRY[entry*]/atom_probe/reconstruction/crystallographic_calibration", "load_from", "atom_probe/reconstruction/crystallographic_calibration"),
("/ENTRY[entry*]/atom_probe/reconstruction/parameter", "load_from", "atom_probe/reconstruction/parameter"),
("/ENTRY[entry*]/atom_probe/reconstruction/field_of_view", "load_from", "atom_probe/reconstruction/field_of_view/value"),
("/ENTRY[entry*]/atom_probe/reconstruction/field_of_view/@units", "load_from", "atom_probe/reconstruction/field_of_view/unit"),
("/ENTRY[entry*]/atom_probe/raw_data/SERIALIZED[serialized]/type", "file"),
("/ENTRY[entry*]/atom_probe/ranging/PROGRAM[program1]/program", "load_from", "ranging/program"),
("/ENTRY[entry*]/atom_probe/ranging/PROGRAM[program1]/program/@version", "load_from", "ranging/program_version"),
("/ENTRY[entry*]/atom_probe/reconstruction/PROGRAM[program1]/program", "load_from", "reconstruction/program"),
("/ENTRY[entry*]/atom_probe/reconstruction/PROGRAM[program1]/program/@version", "load_from", "reconstruction/program_version"),
("/ENTRY[entry*]/atom_probe/reconstruction/protocol_name", "load_from", "reconstruction/protocol_name"),
("/ENTRY[entry*]/atom_probe/reconstruction/crystallographic_calibration", "load_from", "reconstruction/crystallographic_calibration"),
("/ENTRY[entry*]/atom_probe/reconstruction/parameter", "load_from", "reconstruction/parameter"),
("/ENTRY[entry*]/atom_probe/reconstruction/field_of_view", "load_from", "reconstruction/field_of_view/value"),
("/ENTRY[entry*]/atom_probe/reconstruction/field_of_view/@units", "load_from", "reconstruction/field_of_view/unit"),
("/ENTRY[entry*]/atom_probe/raw_data/SERIALIZED[serialized]/path", "load_from", "workflow/raw_dat_file"),
("/ENTRY[entry*]/atom_probe/raw_data/SERIALIZED[serialized]/checksum", "sha256", "workflow/raw_dat_file"),
("/ENTRY[entry*]/atom_probe/hit_finding/SERIALIZED[serialized]/path", "load_from", "workflow/hit_dat_file"),
("/ENTRY[entry*]/atom_probe/hit_finding/SERIALIZED[serialized]/checksum", "sha256", "workflow/hit_dat_file"),
("/ENTRY[entry*]/atom_probe/reconstruction/config/path", "load_from", "workflow/recon_cfg_file"),
("/ENTRY[entry*]/atom_probe/reconstruction/config/checksum", "sha256", "workflow/recon_cfg_file")]

"""
# depending on the case some of these are populated by the I/O logic resolver
# some of them will be loaded by the ELN parser
APM_EXAMPLE_WORKFLOW_TO_NEXUS \
= [("/ENTRY[entry*]/atom_probe/raw_data/SERIALIZED[serialized]/type", "file"),
("/ENTRY[entry*]/atom_probe/raw_data/SERIALIZED[serialized]/path", "load_from", "workflow/raw_dat_file"),
("/ENTRY[entry*]/atom_probe/raw_data/SERIALIZED[serialized]/checksum", "sha256", "workflow/raw_dat_file"),
("/ENTRY[entry*]/atom_probe/raw_data/SERIALIZED[serialized]/algorithm", "SHA256"),
Expand All @@ -104,11 +110,13 @@
("/ENTRY[entry*]/atom_probe/ranging/SERIALIZED[serialized]/path", "load_from", "workflow/range_dat_file"),
("/ENTRY[entry*]/atom_probe/ranging/SERIALIZED[serialized]/checksum", "sha256", "workflow/range_dat_file"),
("/ENTRY[entry*]/atom_probe/ranging/SERIALIZED[serialized]/algorithm", "SHA256")]
"""

# NeXus concept specific mapping tables which require special treatment as the current
# NOMAD OASIS custom schema implementation delivers them as a list of dictionaries instead
# of a directly flattenable list of key, value pairs


APM_EXAMPLE_USER_TO_NEXUS \
= [("/ENTRY[entry*]/USER[user*]/name", "load_from", "name"),
("/ENTRY[entry*]/USER[user*]/affiliation", "load_from", "affiliation"),
Expand Down
11 changes: 5 additions & 6 deletions pynxtools/dataconverter/readers/apm/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
import ApmRangingDefinitionsParser
from pynxtools.dataconverter.readers.apm.utils.apm_create_nx_default_plots \
import apm_default_plot_generator
from pynxtools.dataconverter.readers.apm.utils.apm_generate_synthetic_data \
import ApmCreateExampleData
# from pynxtools.dataconverter.readers.apm.utils.apm_generate_synthetic_data \
# import ApmCreateExampleData

# this apm parser combines multiple sub-parsers
# so we need the following input:
Expand Down Expand Up @@ -95,16 +95,15 @@ def read(self,
if case.is_valid == False:
print("Such a combination of input-file(s, if any) is not supported !")
return {}
case.report_workflow(template, entry_id)

"""
print("Parse (meta)data coming from an ELN...")
if len(case.eln) == 1:
nx_apm_eln = NxApmNomadOasisElnSchemaParser(case.eln[0], entry_id)
nx_apm_eln.report(template)
else:
print("No input file defined for eln data !")
return {}
"""

print("Parse (meta)data coming from a configuration that specific OASIS...")
if len(case.cfg) == 1:
Expand All @@ -131,8 +130,8 @@ def read(self,

# print("Reporting state of template before passing to HDF5 writing...")
# for keyword in template.keys():
# print(keyword)
# print(template[keyword])
# print(f"keyword: {keyword}, template[keyword]: {template[keyword]}")
# exit(1)

print("Forward instantiated template to the NXS writer...")
return template
Expand Down
56 changes: 53 additions & 3 deletions pynxtools/dataconverter/readers/apm/utils/apm_define_io_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""Utility class to analyze which vendor/community files are passed to em reader."""
"""Utility class to analyze which vendor/community files are passed to apm reader."""

# pylint: disable=no-member,duplicate-code

from typing import Tuple, Dict, List

from pynxtools.dataconverter.readers.shared.map_concepts.mapping_functors \
import variadic_path_to_specific_path
from pynxtools.dataconverter.readers.shared.shared_utils import get_sha256_of_file_content

VALID_FILE_NAME_SUFFIX_RECON = [".apt", ".pos", ".epos", ".ato", ".csv", ".h5"]
VALID_FILE_NAME_SUFFIX_RANGE = [".rng", ".rrng", ".env", ".fig.txt", "range_.h5"]
VALID_FILE_NAME_SUFFIX_CONFIG = [".yaml", ".yml"]


class ApmUseCaseSelector: # pylint: disable=too-few-public-methods
class ApmUseCaseSelector:
"""Decision maker about what needs to be parsed given arbitrary input.
Users might invoke this dataconverter with arbitrary input, no input, or
Expand Down Expand Up @@ -93,7 +98,7 @@ def check_validity_of_file_combinations(self):
range_input += len(value)
if suffix == ".h5":
recon_input += len(value)
print(f"{recon_input}, {range_input}, {other_input}")
# print(f"{recon_input}, {range_input}, {other_input}")

if (recon_input == 1) and (range_input == 1) and (1 <= other_input <= 2):
self.is_valid = True
Expand All @@ -112,3 +117,48 @@ def check_validity_of_file_combinations(self):
self.cfg += [entry]
else:
self.eln += [entry]
print(f"recon_results: {self.reconstruction}\n" \
f"range_results: {self.ranging}\n" \
f"OASIS ELN: {self.eln}\n" \
f"OASIS local config: {self.cfg}\n")

def report_workflow(self, template: dict, entry_id: int) -> dict:
"""Initialize the reporting of the workflow."""
steps = ["/ENTRY[entry*]/atom_probe/raw_data/SERIALIZED[serialized]",
"/ENTRY[entry*]/atom_probe/hit_finding/SERIALIZED[serialized]",
"/ENTRY[entry*]/atom_probe/reconstruction/config",
"/ENTRY[entry*]/atom_probe/reconstruction/results",
"/ENTRY[entry*]/atom_probe/ranging/SERIALIZED[serialized]"]
defaults = [("type", "file"),
("path", ""),
("checksum", ""),
("algorithm", "SHA256")]
identifier = [entry_id]
# populate workflow first with default steps to communicate in the NeXus file
# which usually recommended files have not been provided for an NXentry
# keep in mind that already in 2013 D. Larson et al. documented clearly
# in their book which files one should ideally document to enable as best as
# possible the repeating of an analysis until the reconstruction and ranging
# when using IVAS/APSuite !
for step in steps:
trg = variadic_path_to_specific_path(step, identifier)
for dflt in defaults:
template[f"{trg}/{dflt[0]}"] = f"{dflt[1]}"
# populate automatically input-files used
# rely on assumption made in check_validity_of_file_combination
for fpath in self.reconstruction:
prfx = variadic_path_to_specific_path(
"/ENTRY[entry*]/atom_probe/reconstruction/results", identifier)
with open(fpath, "rb") as fp:
template[f"{prfx}/path"] = f"{fpath}"
template[f"{prfx}/checksum"] = get_sha256_of_file_content(fp)
for fpath in self.ranging:
prfx = variadic_path_to_specific_path(
"/ENTRY[entry*]/atom_probe/ranging/SERIALIZED[serialized]", identifier)
with open(fpath, "rb") as fp:
template[f"{prfx}/path"] = f"{fpath}"
template[f"{prfx}/checksum"] = get_sha256_of_file_content(fp)
# FAU/Erlangen's pyccapt control and calibration file have not functional
# distinction which makes it non-trivial to decide if a given HDF5 qualifies
# as control or calibration file TODO::for this reason it is currently ignored
return template
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,17 @@
class NxApmNomadOasisConfigurationParser: # pylint: disable=too-few-public-methods
"""Parse deployment specific configuration."""

def __init__(self, file_path: str, entry_id: int):
def __init__(self, file_path: str, entry_id: int, verbose: bool = False):
print(f"Extracting data from deployment specific configuration file: {file_path}")
if (file_path.rsplit('/', 1)[-1].endswith(".oasis.specific.yaml")
or file_path.endswith(".oasis.specific.yml")) and entry_id > 0:
self.entry_id = entry_id
self.file_path = file_path
with open(self.file_path, "r", encoding="utf-8") as stream:
self.yml = fd.FlatDict(yaml.safe_load(stream), delimiter="/")
print(self.yml)
if verbose == True:
for key, val in self.yml.items():
print(f"key: {key}, val: {val}")
else:
self.entry_id = 1
self.file_path = ""
Expand All @@ -49,11 +51,10 @@ def report(self, template: dict) -> dict:
"""Copy data from configuration applying mapping functors."""
for tpl in APM_OASIS_TO_NEXUS_CFG:
identifier = [self.entry_id]
if isinstance(tpl, tuple):
if isinstance(tpl, tuple) and len(tpl) >= 2:
if tpl[0] not in ("IGNORE", "UNCLEAR"):
trg = variadic_path_to_specific_path(tpl[0], identifier)
print(f"processing tpl {tpl} ... trg {trg}")
# print(f"Target {trg} after variadic name resolution identifier {identifier}")
# print(f"processing tpl {tpl} ... trg {trg}")
if len(tpl) == 2:
# nxpath, value to use directly
template[trg] = tpl[1]
Expand Down
Loading

0 comments on commit 56a42ac

Please sign in to comment.