From 752a3fcee53c7638b0a2718baab161bb3a7d35eb Mon Sep 17 00:00:00 2001 From: "markus.kuehbach" Date: Mon, 27 Nov 2023 21:15:26 +0100 Subject: [PATCH 1/5] Release py3.11 constraint, updated readme --- README.md | 9 ++++++--- dev-requirements.txt | 15 +++------------ pyproject.toml | 10 ++++++---- 3 files changed, 15 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 0a5ffa9..f68b3fb 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,13 @@ Foster exchange about data models and work towards specifications of file formats from the research field of atom probe microscopy. # Getting started -You should create a virtual environment. We tested on Ubuntu with Python 3.8. -Newer versions of Python should work similarly when using the desired version tag. +You should create a virtual environment. We tested on Ubuntu with Python 3.8 and newer version. +In what follows the version (tag) 3.8 is a placeholder whereby we show how to proceed when using +Python 3.8. Using newer versions of Python should work the same by replacing 3.8 with the respective +version (tag). -If you don't have Python 3.8 installed on your computer, follow these commands: +Older versions of Python like 3.8 and 3.9 are available e.g. via the deadsnakes repository or via +conda. For using deadsnakes proceed with the following commands: ``` sudo add-apt-repository ppa:deadsnakes/ppa sudo apt install python3.8 python3-dev libpython3.8-dev python3.8-venv diff --git a/dev-requirements.txt b/dev-requirements.txt index 7bf134c..0947a56 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.9 +# This file is autogenerated by pip-compile with Python 3.11 # by the following command: # -# pip-compile --extra=dev --output-file=dev-requirements.txt --resolver=backtracking pyproject.toml +# pip-compile --extra=dev --output-file=dev-requirements.txt pyproject.toml # anyio==3.6.2 # via jupyter-server @@ -72,13 +72,8 @@ idna==3.4 # requests importlib-metadata==6.6.0 # via - # jupyter-client - # jupyterlab-server # keyring - # nbconvert # twine -importlib-resources==5.12.0 - # via matplotlib ipykernel==6.22.0 # via # nbclassic @@ -355,8 +350,6 @@ traitlets==5.9.0 # notebook twine==4.0.2 # via ifes-apt-tc-data-modeling (pyproject.toml) -typing-extensions==4.5.0 - # via ipython tzdata==2023.3 # via pandas urllib3==1.26.15 @@ -372,9 +365,7 @@ webencodings==0.5.1 websocket-client==1.5.1 # via jupyter-server zipp==3.15.0 - # via - # importlib-metadata - # importlib-resources + # via importlib-metadata # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/pyproject.toml b/pyproject.toml index c08ac1f..1b00af1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,11 +11,13 @@ authors = [ description = "Foster exchange about data models and work towards clear specifications of file formats and data models in the research field of atom probe microscopy." readme = "README.md" license = { file = "LICENSE.txt" } -requires-python = ">=3.8,<3.11" +requires-python = ">=3.8" classifiers = [ "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "License :: OSI Approved :: Apache Software License", "Operating System :: OS Independent", ] @@ -29,9 +31,9 @@ dependencies = [ [project.optional-dependencies] dev = [ - "twine>=4.0.2", - "jupyterlab_h5web>=6.6.1", - "jupyterlab>=3.5.2,<3.6.0" + "twine", + "jupyterlab_h5web", + "jupyterlab" ] # [tool.setuptools] From c3492871193d6f8689f5b19701c388b45680baac Mon Sep 17 00:00:00 2001 From: atomprobe-tc Date: Mon, 27 Nov 2023 23:02:33 +0100 Subject: [PATCH 2/5] Parsing of ato3 and ato5, specifically ato5 needs testing --- ifes_apt_tc_data_modeling/ato/ato_reader.py | 117 ++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 ifes_apt_tc_data_modeling/ato/ato_reader.py diff --git a/ifes_apt_tc_data_modeling/ato/ato_reader.py b/ifes_apt_tc_data_modeling/ato/ato_reader.py new file mode 100644 index 0000000..6e635d2 --- /dev/null +++ b/ifes_apt_tc_data_modeling/ato/ato_reader.py @@ -0,0 +1,117 @@ +# POS file format reader used by atom probe microscopists. +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pylint: disable=no-member,duplicate-code + +import os + +import numpy as np + +from ifes_apt_tc_data_modeling.nexus.nx_field import NxField + +from ifes_apt_tc_data_modeling.utils.mmapped_io import get_memory_mapped_data + + +class ReadAtoFileFormat(): + """Read Rouen group *.ato file format.""" + + def __init__(self, filename: str): + assert len(filename) > 4, "ATO file incorrect filename ending!" + assert filename.lower().endswith(".ato"), \ + "ATO file incorrect file type!" + self.filename = filename + + self.filesize = os.path.getsize(self.filename) + self.number_of_events = None + self.version = None + retval = self.get_ato_version() + if retval in [3, 4, 5]: + self.version = retval + print(f"ATO file is in a supported version {self.version}") + if self.version == 3: + assert (self.filesize - 2 * 4) % 14 * 4 == 0, \ + "ATO v3 filesize not integer multiple of 14*4B!" + self.number_of_events = np.uint32((self.filesize - 2 * 4) / (14 * 4)) + print(f"ATO file contains {self.number_of_events} entries") + if self.version == 5: + assert (self.filesize - 5000) % 40 == 0, \ + "ATO v5 filesize not integer multiple of 40B!" + self.number_of_events = np.uint32((self.filesize - 5000) / 40) + print(f"ATO file contains {self.number_of_events} entries") + else: + raise ValueError("ATO file unsupported version!") + # https://zenodo.org/records/8382828 + # details three versions of the Rouen/GPM ato format v3, v4, v5 + # Cameca/AMETEK's runrootl/FileConvert utility know two ATO flavours: + # CamecaRoot v18.46.533g built Marc, 21, 2022 against ROOT 5.34/36 + # v3 LAWATOP and v5 current GPM + + def get_ato_version(self): + header = get_memory_mapped_data(self.filename, " x + xyz.typed_value[:, 1] = \ + get_memory_mapped_data(self.filename, " y + xyz.typed_value[:, 2] = \ + get_memory_mapped_data(self.filename, " z + if self.version == 5: + xyz.typed_value[:, 0] = \ + np.float32(get_memory_mapped_data(self.filename, " x + xyz.typed_value[:, 1] = \ + np.float32(get_memory_mapped_data(self.filename, " y + xyz.typed_value[:, 2] = \ + get_memory_mapped_data(self.filename, " z + return xyz + + def get_mass_to_charge_state_ratio(self): + """Read mass-to-charge-state-ratio column.""" + + m_n = NxField() + m_n.typed_value = np.zeros( + [self.number_of_events, 1], np.float32) + m_n.unit = "Da" + + if self.version == 3: + m_n.typed_value[:, 0] = \ + get_memory_mapped_data(self.filename, " Date: Mon, 27 Nov 2023 23:13:44 +0100 Subject: [PATCH 3/5] Added warning wrt to pitfalls when parsing ATO v3 as detailed in libatomprobe --- ifes_apt_tc_data_modeling/ato/ato_reader.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ifes_apt_tc_data_modeling/ato/ato_reader.py b/ifes_apt_tc_data_modeling/ato/ato_reader.py index 6e635d2..70e8d43 100644 --- a/ifes_apt_tc_data_modeling/ato/ato_reader.py +++ b/ifes_apt_tc_data_modeling/ato/ato_reader.py @@ -61,6 +61,10 @@ def __init__(self, filename: str): # Cameca/AMETEK's runrootl/FileConvert utility know two ATO flavours: # CamecaRoot v18.46.533g built Marc, 21, 2022 against ROOT 5.34/36 # v3 LAWATOP and v5 current GPM + # specifically an earlier parser + # https://hg.sr.ht/~mycae/libatomprobe/browse/src/io/dataFiles.cpp?rev=tip + # mentions that storage format may not be robust enough against overflow and + # suggests that additional polishing of results is needed def get_ato_version(self): header = get_memory_mapped_data(self.filename, " Date: Tue, 28 Nov 2023 11:26:10 +0100 Subject: [PATCH 4/5] Further testing of ATO based on imec, pnnl, rouen examples --- ifes_apt_tc_data_modeling/ato/ato_reader.py | 28 +++---- ifes_apt_tc_data_modeling/rng/rng_reader.py | 3 + ifes_apt_tc_data_modeling/utils/dev/ato/ato.m | 11 +++ tests/TestsForDevelopers.ipynb | 78 ++++++++++++++++++- 4 files changed, 104 insertions(+), 16 deletions(-) create mode 100644 ifes_apt_tc_data_modeling/utils/dev/ato/ato.m diff --git a/ifes_apt_tc_data_modeling/ato/ato_reader.py b/ifes_apt_tc_data_modeling/ato/ato_reader.py index 70e8d43..d9a75e1 100644 --- a/ifes_apt_tc_data_modeling/ato/ato_reader.py +++ b/ifes_apt_tc_data_modeling/ato/ato_reader.py @@ -41,7 +41,8 @@ def __init__(self, filename: str): self.number_of_events = None self.version = None retval = self.get_ato_version() - if retval in [3, 4, 5]: + if retval in [3, 5]: + # there also seems to exist a version 4 but I have never seen an example for it self.version = retval print(f"ATO file is in a supported version {self.version}") if self.version == 3: @@ -81,25 +82,26 @@ def get_reconstructed_positions(self): xyz.unit = "nm" if self.version == 3: - xyz.typed_value[:, 0] = \ - get_memory_mapped_data(self.filename, " x - xyz.typed_value[:, 1] = \ - get_memory_mapped_data(self.filename, " y - xyz.typed_value[:, 2] = \ - get_memory_mapped_data(self.filename, " z + for dim in [0, 1, 2]: + xyz.typed_value[:, dim] = \ + get_memory_mapped_data(self.filename, " x, wpy -> y, fpz -> z if self.version == 5: + # publicly available sources are inconclusive whether coordinates are in angstroem or nm + # based on the evidence of usa_denton_smith Si.epos converted to v5 ATO via CamecaRoot + # the resulting x, y coordinates suggests that v5 ATO stores in angstroem, while fpz is stored in nm? + # however https://zenodo.org/records/8382828 reports the reconstructed positions to be named + # not at all wpx, wpy and fpz but x, y, z instead and here claims the nm xyz.typed_value[:, 0] = \ np.float32(get_memory_mapped_data(self.filename, " x + 5000 + 0, 40, self.number_of_events) * 0.1) # wpx -> x xyz.typed_value[:, 1] = \ np.float32(get_memory_mapped_data(self.filename, " y + 5000 + 2, 40, self.number_of_events) * 0.1) # wpy -> y xyz.typed_value[:, 2] = \ get_memory_mapped_data(self.filename, " z + 5000 + 4, 40, self.number_of_events) # fpz -> z return xyz def get_mass_to_charge_state_ratio(self): diff --git a/ifes_apt_tc_data_modeling/rng/rng_reader.py b/ifes_apt_tc_data_modeling/rng/rng_reader.py index 3c21912..323c2cb 100644 --- a/ifes_apt_tc_data_modeling/rng/rng_reader.py +++ b/ifes_apt_tc_data_modeling/rng/rng_reader.py @@ -34,6 +34,9 @@ from ase.data import atomic_numbers, atomic_masses, chemical_symbols +# there are specific examples for unusual range files here: +# https://hg.sr.ht/~mycae/libatomprobe/browse/test/samples/ranges?rev=tip + def evaluate_rng_range_line( i: int, line: str, column_id_to_label: dict, n_columns: int) -> dict: diff --git a/ifes_apt_tc_data_modeling/utils/dev/ato/ato.m b/ifes_apt_tc_data_modeling/utils/dev/ato/ato.m new file mode 100644 index 0000000..cf166c3 --- /dev/null +++ b/ifes_apt_tc_data_modeling/utils/dev/ato/ato.m @@ -0,0 +1,11 @@ +raw = fopen(["/home/kaiobach/Research/paper_paper_paper/joss_nomad_apt/bb_analysis/data/fra_rouen_karam/Experimental Analysis of LaB6 ; Negative Pulse ; DC Voltage 6.6 kV ; Amplitudes 2.5 kV and 3 kV.ato"]); +attoread = inf; %max number of atoms +data.version = fread(raw,2,'uint32','l'); +display(data.version(2)); +header = 5000; +oneat = 320; +fseek(raw,header,'bof'); +wpx = fread(raw,attoread,'int16',oneat/8-2,'b'); +fseek(raw,header+2,'bof'); +wpy = fread(raw,attoread,'int16',oneat/8-2,'b'); +fclose(raw); diff --git a/tests/TestsForDevelopers.ipynb b/tests/TestsForDevelopers.ipynb index 074cba2..b7e0e19 100644 --- a/tests/TestsForDevelopers.ipynb +++ b/tests/TestsForDevelopers.ipynb @@ -41,6 +41,80 @@ "from ase.data import atomic_numbers, atomic_masses, chemical_symbols" ] }, + { + "cell_type": "markdown", + "id": "52294143-78c7-47bf-b39e-9e40eec3999d", + "metadata": {}, + "source": [ + "## ATO" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f1c42976-c225-40f4-9797-bfcc0d566012", + "metadata": {}, + "outputs": [], + "source": [ + "from ifes_apt_tc_data_modeling.ato.ato_reader import ReadAtoFileFormat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0d1f9d2f-ca12-4d74-9664-08a722d24c46", + "metadata": {}, + "outputs": [], + "source": [ + "prefix = f\"{os.getcwd()}/../../../../../paper_paper_paper/joss_nomad_apt/bb_analysis/data\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "084d2bb0-5a52-49fa-af88-d9851db48521", + "metadata": {}, + "outputs": [], + "source": [ + "directory = f\"{prefix}/usa_cameca_ato_format\"\n", + "fnm = [\"Si.epos.v3.ATO\",\n", + " \"Si.epos.v5.ATO\"]\n", + "# directory = f\"{prefix}/bel_leuven_imec/dataverse_files/correlative-APT-EM\"\n", + "# fnm = [\"finfet/finfet_system.ato\",\n", + "# \"multilayer/multilayer_system.ato\"]\n", + "# directory = f\"{prefix}/fra_rouen_karam\"\n", + "# fnm = [\"Experimental Analysis of LaB6 ; Negative Pulse ; DC Voltage 6.6 kV ; Amplitudes 2.5 kV and 3 kV.ato\",\n", + "# \"Experimental Analysis of LaB6 ; Positive Pulse ; Amplitude 15 kV ; DC Voltage 3.2 kV and 5 kV.ato\",\n", + "# \"Experimental Analysis of LaB6 ; Positive Pulse ; DC Voltage 2.5 kV ; Amplirudes 13.5 kV and 15 kV.ato\",\n", + "# \"Experimental Analysis of LaB6 ; Positive Pulse ; DC Voltage 7 kV ; Amplitudes 6.5 kV and 7.5 kV.ato\"]\n", + "# directory = f\"{prefix}/usa_richland_kruska/kruska/APT\"\n", + "# fnm = [\"R31_11378-v01.ato\",\n", + "# \"R31_11381-v02.ato\",\n", + "# \"R31_11553-v01.ato\",\n", + "# \"R31_11554-v01.ato\",\n", + "# \"R31_11556-v01.ato\"]\n", + "print(directory)\n", + "for filename in fnm:\n", + " print(os.path.getsize(f\"{directory}/{filename}\"))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b2a4eb3b-0a08-47a6-913b-cf4178e6bb24", + "metadata": {}, + "outputs": [], + "source": [ + "for filename in fnm:\n", + " print(f\"{directory}/{filename}\")\n", + " ato = ReadAtoFileFormat(f\"{directory}/{filename}\")\n", + " \n", + " xyz = ato.get_reconstructed_positions()\n", + " print(xyz.typed_value)\n", + " m_q = ato.get_mass_to_charge_state_ratio()\n", + " print(m_q.typed_value)" + ] + }, { "cell_type": "markdown", "id": "83369dab-5ce9-4c9e-810e-58e9f8bb93cc", @@ -415,7 +489,6 @@ "cell_type": "markdown", "id": "0ab488b6-ddb3-4507-8f4e-de1c425106d6", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -527,7 +600,6 @@ "cell_type": "markdown", "id": "8783c551-27b2-4902-93c4-2187b1ba0fab", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -703,7 +775,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.16" + "version": "3.11.5" } }, "nbformat": 4, From cb6fe0b6d2596e81969a60ae394298f3e439c852 Mon Sep 17 00:00:00 2001 From: atomprobe-tc Date: Tue, 28 Nov 2023 15:40:32 +0100 Subject: [PATCH 5/5] Added support for pyccapt file formats and tested successfully with Mehrpad's example --- dev-requirements.txt | 18 ++ ifes_apt_tc_data_modeling/ato/ato_reader.py | 1 - .../pyccapt/pyccapt_reader.py | 213 ++++++++++++++++++ pyproject.toml | 3 +- tests/TestsForDevelopers.ipynb | 71 ++++++ 5 files changed, 304 insertions(+), 2 deletions(-) create mode 100644 ifes_apt_tc_data_modeling/pyccapt/pyccapt_reader.py diff --git a/dev-requirements.txt b/dev-requirements.txt index 0947a56..c34a224 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -29,6 +29,8 @@ bleach==6.0.0 # via # nbconvert # readme-renderer +blosc2==2.3.1 + # via tables certifi==2022.12.7 # via requests cffi==1.15.1 @@ -167,6 +169,8 @@ more-itertools==9.1.0 # via jaraco-classes mpmath==1.3.0 # via sympy +msgpack==1.0.7 + # via blosc2 nbclassic==0.5.6 # via # jupyterlab @@ -185,6 +189,8 @@ nbformat==5.8.0 # nbclient # nbconvert # notebook +ndindex==1.7 + # via blosc2 nest-asyncio==1.5.6 # via # ipykernel @@ -196,17 +202,22 @@ notebook==6.5.4 # via jupyterlab notebook-shim==0.2.3 # via nbclassic +numexpr==2.8.7 + # via tables numpy==1.24.3 # via # ase + # blosc2 # contourpy # h5grove # h5py # ifes-apt-tc-data-modeling (pyproject.toml) # matplotlib + # numexpr # pandas # radioactivedecay # scipy + # tables # tifffile orjson==3.8.11 # via h5grove @@ -218,6 +229,7 @@ packaging==23.1 # jupyterlab-server # matplotlib # nbconvert + # tables pandas==2.0.1 # via ifes-apt-tc-data-modeling (pyproject.toml) pandocfilters==1.5.0 @@ -249,6 +261,10 @@ ptyprocess==0.7.0 # terminado pure-eval==0.2.2 # via stack-data +py-cpuinfo==9.0.0 + # via + # blosc2 + # tables pycparser==2.21 # via cffi pygments==2.15.1 @@ -314,6 +330,8 @@ stack-data==0.6.2 # via ipython sympy==1.11.1 # via radioactivedecay +tables==3.9.2 + # via ifes-apt-tc-data-modeling (pyproject.toml) terminado==0.17.1 # via # jupyter-server diff --git a/ifes_apt_tc_data_modeling/ato/ato_reader.py b/ifes_apt_tc_data_modeling/ato/ato_reader.py index d9a75e1..a5ebce2 100644 --- a/ifes_apt_tc_data_modeling/ato/ato_reader.py +++ b/ifes_apt_tc_data_modeling/ato/ato_reader.py @@ -24,7 +24,6 @@ import numpy as np from ifes_apt_tc_data_modeling.nexus.nx_field import NxField - from ifes_apt_tc_data_modeling.utils.mmapped_io import get_memory_mapped_data diff --git a/ifes_apt_tc_data_modeling/pyccapt/pyccapt_reader.py b/ifes_apt_tc_data_modeling/pyccapt/pyccapt_reader.py new file mode 100644 index 0000000..5b793d5 --- /dev/null +++ b/ifes_apt_tc_data_modeling/pyccapt/pyccapt_reader.py @@ -0,0 +1,213 @@ +# POS file format reader used by atom probe microscopists. +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# pylint: disable=no-member,duplicate-code + +import os + +import h5py + +import numpy as np + +import pandas as pd + +from ase.data import atomic_numbers, chemical_symbols +from ifes_apt_tc_data_modeling.nexus.nx_ion import NxIon +from ifes_apt_tc_data_modeling.nexus.nx_field import NxField +from ifes_apt_tc_data_modeling.utils.utils import \ + isotope_to_hash, isotope_vector_to_nuclid_list, MAX_NUMBER_OF_ATOMS_PER_ION + +# this implementation focuses on the following state of the pyccapt repository +# https://github.com/mmonajem/pyccapt/commit/e955beb4f2627befb8b4d26f2e74e4c52e00394e + +# during the course of an atom probe measurement and analysis with FAU/Erlangen's Oxcart instrument +# several HDF5 files are generated with essentially two software tools. One is pyccapt which has a +# a control module, a calibration module (where the voltage/bowl calibration and reconstruction is performed), +# and a module/functionalities to document ranging i.e. ion type identification made +# The other software typically used by the FAU/Erlangen atom probe group is Atom Probe Toolbox; +# instructed as a set of Matlab live scripts this toolbox offers data analysis functionalities, +# results are stored via an HDF5 file + +# specific comments +# pyccapt/control +# an HDF5 file keeping relevant quantities + +# pyccapt/calibration +# unfortunately the generated HDF5 file has internally no provenance information +# with which pyccapt version it was generated, therefore developers of pyccapt should +# rather write the content of the HDF5 file explicitly dset by dset e.g. using h5py instead +# of the pandas HDF5 dump convenience functionality +# of course pandas stores its own version but that is not conclusive enough to infer with +# which pyccapt version and most importantly from which other context the file was generated +# this is an aspect of the FAIR RDM principles which the pyccapt approach currently ignores + + +class ReadPyccaptControlFileFormat(): + """Read FAU/Erlangen pyccapt (controle module) HDF5 file format.""" + + def __init__(self, filename: str): + assert len(filename) > 2, "H5 file incorrect filename ending!" + assert filename.lower().endswith(".h5") or filename.lower().endswith(".hdf5"), \ + "HDF5 file incorrect file type!" + self.filename = filename + + self.filesize = os.path.getsize(self.filename) + self.number_of_events = None + self.version = "e955beb4f2627befb8b4d26f2e74e4c52e00394e" + + # check that the formatting matches that of an pyccapt control module output HDF5 file + with h5py.File(self.filename, "r") as h5r: + self.supported = 0 # voting-based + required_groups = ["apt", "dld", "tdc"] + for req_grpnm in required_groups: + if req_grpnm in h5r.keys(): + self.supported += 1 + if self.supported == 3: + print(f"{self.filename} is a supported pyccapt/control HDF5 file!") + else: + print(f"{self.filename} is not a supported pyccapt/control HDF5 file!") + return + + +class ReadPyccaptCalibrationFileFormat(): + """Read FAU/Erlangen pyccapt (calibration module) HDF5 file format.""" + + def __init__(self, filename: str): + assert len(filename) > 2, "H5 file incorrect filename ending!" + assert filename.lower().endswith(".h5") or filename.lower().endswith(".hdf5"), \ + "HDF5 file incorrect file type!" + self.filename = filename + + self.filesize = os.path.getsize(self.filename) + self.number_of_events = None + self.version = "e955beb4f2627befb8b4d26f2e74e4c52e00394e" + self.df = None + + with h5py.File(self.filename, "r") as h5r: + self.supported = 0 # voting-based + required_entries = ["df", + "df/axis0", "df/axis1", + "df/block0_items", "df/block0_values", + "df/block1_items", "df/block1_values"] + for entry in required_entries: + if entry in h5r.keys(): + self.supported += 1 + if self.supported == 7: + print(f"{self.filename} is a supported pyccapt/calibration HDF5 file!") + else: + print(f"{self.filename} is not a supported pyccapt/calibration HDF5 file!") + return + + self.df = pd.read_hdf(self.filename) + self.number_of_events = np.shape(self.df)[0] + + def get_named_quantities(self, term: str): + if term in self.df.keys(): + return self.df[term] + return None + + def get_reconstructed_positions(self): + """Read xyz columns.""" + + xyz = NxField() + xyz.typed_value = np.zeros( + [self.number_of_events, 3], np.float32) + xyz.unit = "nm" + + dim = 0 + for quant in ["x (nm)", "y (nm)", "z (nm)"]: + xyz.typed_value[:, dim] = np.asarray(self.get_named_quantities(quant), np.float32) + dim += 1 + return xyz + + def get_mass_to_charge_state_ratio(self): + """Read (calibrated) mass-to-charge-state-ratio column.""" + + m_n = NxField() + m_n.typed_value = np.zeros( + [self.number_of_events, 1], np.float32) + m_n.unit = "Da" + + m_n.typed_value[:, 0] = np.asarray(self.get_named_quantities("mc_c (Da)"), np.float32) + return m_n + + +class ReadPyccaptRangingFileFormat(): + """Read FAU/Erlangen pyccapt (ranging module) HDF5 file format.""" + + def __init__(self, filename: str): + assert len(filename) > 2, "H5 file incorrect filename ending!" + assert filename.lower().endswith(".h5") or filename.lower().endswith(".hdf5"), \ + "HDF5 file incorrect file type!" + self.filename = filename + + self.filesize = os.path.getsize(self.filename) + self.number_of_events = None + self.version = "e955beb4f2627befb8b4d26f2e74e4c52e00394e" + self.df = None + + with h5py.File(self.filename, "r") as h5r: + self.supported = 0 # voting-based + required_entries = ["df", + "df/axis0", "df/axis1", + "df/block0_items", "df/block0_values", + "df/block1_items", "df/block1_values", + "df/block2_items", "df/block2_values"] + for entry in required_entries: + if entry in h5r.keys(): + self.supported += 1 + if self.supported == 9: + print(f"{self.filename} is a supported pyccapt/ranging HDF5 file!") + else: + print(f"{self.filename} is not a supported pyccapt/ranging HDF5 file!") + return + + self.df = pd.read_hdf(self.filename) + self.rng = {} + self.rng["molecular_ions"] = [] + print(np.shape(self.df)[0]) + for idx in np.arange(0, np.shape(self.df)[0]): + if isinstance(self.df.iloc[idx, 6], str) is True: + if self.df.iloc[idx, 6] == "unranged": + continue + + elements = self.df.iloc[idx, 6] + complexs = self.df.iloc[idx, 7] + isotopes = self.df.iloc[idx, 8] + # assertions + ivec = np.zeros((MAX_NUMBER_OF_ATOMS_PER_ION,), np.uint16) + hashvector = [] + for idxj in np.arange(0, len(elements)): + symbol = elements[idxj] + if symbol in chemical_symbols and symbol != "X": + proton_number = atomic_numbers[symbol] + neutron_number = isotopes[idxj] - proton_number + for mult in np.arange(0, complexs[idxj]): + hashvector.append(isotope_to_hash(proton_number, neutron_number)) + ivec[0:len(hashvector)] = np.sort(np.asarray(hashvector, np.uint16), kind="stable")[::-1] + + m_ion = NxIon() + m_ion.isotope_vector.typed_value = ivec + m_ion.nuclid_list.typed_value = isotope_vector_to_nuclid_list(ivec) + m_ion.charge_state.typed_value = np.int8(self.df.iloc[idx, 9]) + m_ion.add_range(self.df.iloc[idx, 3], self.df.iloc[idx, 4]) + m_ion.update_human_readable_name() + # m_ion.report() + self.rng["molecular_ions"].append(m_ion) + print(f"{self.filename} parsed successfully") diff --git a/pyproject.toml b/pyproject.toml index 1b00af1..e3c0817 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,8 @@ classifiers = [ dependencies = [ "h5py>=3.6.0", "numpy>=1.21.2", - "pandas>=1.3.2", + "pandas", + "tables", "ase>=3.19.0", "radioactivedecay>=0.4.16" ] diff --git a/tests/TestsForDevelopers.ipynb b/tests/TestsForDevelopers.ipynb index b7e0e19..67fb6ee 100644 --- a/tests/TestsForDevelopers.ipynb +++ b/tests/TestsForDevelopers.ipynb @@ -30,6 +30,7 @@ "import os\n", "import numpy as np\n", "import h5py\n", + "import pandas as pd\n", "from jupyterlab_h5web import H5Web\n", "from ifes_apt_tc_data_modeling.utils.utils import create_isotope_vector, \\\n", " isotope_vector_to_dict_keyword, isotope_vector_to_human_readable_name, \\\n", @@ -41,6 +42,76 @@ "from ase.data import atomic_numbers, atomic_masses, chemical_symbols" ] }, + { + "cell_type": "markdown", + "id": "e680a9d0-4005-40b0-ab45-a8f5fb215273", + "metadata": {}, + "source": [ + "## FAU/Erlangen pyccapt control/calibration/ranging module" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2378059c-dae4-46e8-88b0-9fd1ac8a5a70", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "prefix = f\"{os.getcwd()}/../../../../../paper_paper_paper/joss_nomad_apt/bb_analysis/data\"\n", + "fnm = [\"1748_Al.h5\",\n", + " \"1748_Al_range_.h5\",\n", + " \"1748_Nov-14-2023_13-31_Al.h5\"]\n", + "# df = pd.read_hdf(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[1]}\")\n", + "# H5Web(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[1]}\")\n", + "# df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "319c3c28-8d4d-46a5-bb73-7a8396d84020", + "metadata": {}, + "outputs": [], + "source": [ + "from ifes_apt_tc_data_modeling.pyccapt.pyccapt_reader import ReadPyccaptControlFileFormat, ReadPyccaptCalibrationFileFormat, ReadPyccaptRangingFileFormat" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac6b2e02-371b-47ff-9c68-b23b6e1b4b83", + "metadata": {}, + "outputs": [], + "source": [ + "pyc_r = ReadPyccaptRangingFileFormat(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[1]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7178b8b-51f7-4270-acbe-77844692477b", + "metadata": {}, + "outputs": [], + "source": [ + "pyc_m = ReadPyccaptControlFileFormat(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[2]}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1f1c4d8-75fe-4884-b81a-338a6c5eab94", + "metadata": {}, + "outputs": [], + "source": [ + "pyc_c = ReadPyccaptCalibrationFileFormat(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[0]}\")\n", + "xyz = pyc_c.get_reconstructed_positions()\n", + "print(xyz.typed_value)\n", + "m_q = pyc_c.get_mass_to_charge_state_ratio()\n", + "print(m_q.typed_value)" + ] + }, { "cell_type": "markdown", "id": "52294143-78c7-47bf-b39e-9e40eec3999d",