Skip to content

Commit

Permalink
Added support for pyccapt file formats and tested successfully with M…
Browse files Browse the repository at this point in the history
…ehrpad's example
  • Loading branch information
atomprobe-tc committed Nov 28, 2023
1 parent c7edc5a commit cb6fe0b
Show file tree
Hide file tree
Showing 5 changed files with 304 additions and 2 deletions.
18 changes: 18 additions & 0 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ bleach==6.0.0
# via
# nbconvert
# readme-renderer
blosc2==2.3.1
# via tables
certifi==2022.12.7
# via requests
cffi==1.15.1
Expand Down Expand Up @@ -167,6 +169,8 @@ more-itertools==9.1.0
# via jaraco-classes
mpmath==1.3.0
# via sympy
msgpack==1.0.7
# via blosc2
nbclassic==0.5.6
# via
# jupyterlab
Expand All @@ -185,6 +189,8 @@ nbformat==5.8.0
# nbclient
# nbconvert
# notebook
ndindex==1.7
# via blosc2
nest-asyncio==1.5.6
# via
# ipykernel
Expand All @@ -196,17 +202,22 @@ notebook==6.5.4
# via jupyterlab
notebook-shim==0.2.3
# via nbclassic
numexpr==2.8.7
# via tables
numpy==1.24.3
# via
# ase
# blosc2
# contourpy
# h5grove
# h5py
# ifes-apt-tc-data-modeling (pyproject.toml)
# matplotlib
# numexpr
# pandas
# radioactivedecay
# scipy
# tables
# tifffile
orjson==3.8.11
# via h5grove
Expand All @@ -218,6 +229,7 @@ packaging==23.1
# jupyterlab-server
# matplotlib
# nbconvert
# tables
pandas==2.0.1
# via ifes-apt-tc-data-modeling (pyproject.toml)
pandocfilters==1.5.0
Expand Down Expand Up @@ -249,6 +261,10 @@ ptyprocess==0.7.0
# terminado
pure-eval==0.2.2
# via stack-data
py-cpuinfo==9.0.0
# via
# blosc2
# tables
pycparser==2.21
# via cffi
pygments==2.15.1
Expand Down Expand Up @@ -314,6 +330,8 @@ stack-data==0.6.2
# via ipython
sympy==1.11.1
# via radioactivedecay
tables==3.9.2
# via ifes-apt-tc-data-modeling (pyproject.toml)
terminado==0.17.1
# via
# jupyter-server
Expand Down
1 change: 0 additions & 1 deletion ifes_apt_tc_data_modeling/ato/ato_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
import numpy as np

from ifes_apt_tc_data_modeling.nexus.nx_field import NxField

from ifes_apt_tc_data_modeling.utils.mmapped_io import get_memory_mapped_data


Expand Down
213 changes: 213 additions & 0 deletions ifes_apt_tc_data_modeling/pyccapt/pyccapt_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
# POS file format reader used by atom probe microscopists.
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# pylint: disable=no-member,duplicate-code

import os

import h5py

import numpy as np

import pandas as pd

from ase.data import atomic_numbers, chemical_symbols
from ifes_apt_tc_data_modeling.nexus.nx_ion import NxIon
from ifes_apt_tc_data_modeling.nexus.nx_field import NxField
from ifes_apt_tc_data_modeling.utils.utils import \
isotope_to_hash, isotope_vector_to_nuclid_list, MAX_NUMBER_OF_ATOMS_PER_ION

# this implementation focuses on the following state of the pyccapt repository
# https://github.com/mmonajem/pyccapt/commit/e955beb4f2627befb8b4d26f2e74e4c52e00394e

# during the course of an atom probe measurement and analysis with FAU/Erlangen's Oxcart instrument
# several HDF5 files are generated with essentially two software tools. One is pyccapt which has a
# a control module, a calibration module (where the voltage/bowl calibration and reconstruction is performed),
# and a module/functionalities to document ranging i.e. ion type identification made
# The other software typically used by the FAU/Erlangen atom probe group is Atom Probe Toolbox;
# instructed as a set of Matlab live scripts this toolbox offers data analysis functionalities,
# results are stored via an HDF5 file

# specific comments
# pyccapt/control
# an HDF5 file keeping relevant quantities

# pyccapt/calibration
# unfortunately the generated HDF5 file has internally no provenance information
# with which pyccapt version it was generated, therefore developers of pyccapt should
# rather write the content of the HDF5 file explicitly dset by dset e.g. using h5py instead
# of the pandas HDF5 dump convenience functionality
# of course pandas stores its own version but that is not conclusive enough to infer with
# which pyccapt version and most importantly from which other context the file was generated
# this is an aspect of the FAIR RDM principles which the pyccapt approach currently ignores


class ReadPyccaptControlFileFormat():
"""Read FAU/Erlangen pyccapt (controle module) HDF5 file format."""

def __init__(self, filename: str):
assert len(filename) > 2, "H5 file incorrect filename ending!"
assert filename.lower().endswith(".h5") or filename.lower().endswith(".hdf5"), \
"HDF5 file incorrect file type!"
self.filename = filename

self.filesize = os.path.getsize(self.filename)
self.number_of_events = None
self.version = "e955beb4f2627befb8b4d26f2e74e4c52e00394e"

# check that the formatting matches that of an pyccapt control module output HDF5 file
with h5py.File(self.filename, "r") as h5r:
self.supported = 0 # voting-based
required_groups = ["apt", "dld", "tdc"]
for req_grpnm in required_groups:
if req_grpnm in h5r.keys():
self.supported += 1
if self.supported == 3:
print(f"{self.filename} is a supported pyccapt/control HDF5 file!")
else:
print(f"{self.filename} is not a supported pyccapt/control HDF5 file!")
return


class ReadPyccaptCalibrationFileFormat():
"""Read FAU/Erlangen pyccapt (calibration module) HDF5 file format."""

def __init__(self, filename: str):
assert len(filename) > 2, "H5 file incorrect filename ending!"
assert filename.lower().endswith(".h5") or filename.lower().endswith(".hdf5"), \
"HDF5 file incorrect file type!"
self.filename = filename

self.filesize = os.path.getsize(self.filename)
self.number_of_events = None
self.version = "e955beb4f2627befb8b4d26f2e74e4c52e00394e"
self.df = None

with h5py.File(self.filename, "r") as h5r:
self.supported = 0 # voting-based
required_entries = ["df",
"df/axis0", "df/axis1",
"df/block0_items", "df/block0_values",
"df/block1_items", "df/block1_values"]
for entry in required_entries:
if entry in h5r.keys():
self.supported += 1
if self.supported == 7:
print(f"{self.filename} is a supported pyccapt/calibration HDF5 file!")
else:
print(f"{self.filename} is not a supported pyccapt/calibration HDF5 file!")
return

self.df = pd.read_hdf(self.filename)
self.number_of_events = np.shape(self.df)[0]

def get_named_quantities(self, term: str):
if term in self.df.keys():
return self.df[term]
return None

def get_reconstructed_positions(self):
"""Read xyz columns."""

xyz = NxField()
xyz.typed_value = np.zeros(
[self.number_of_events, 3], np.float32)
xyz.unit = "nm"

dim = 0
for quant in ["x (nm)", "y (nm)", "z (nm)"]:
xyz.typed_value[:, dim] = np.asarray(self.get_named_quantities(quant), np.float32)
dim += 1
return xyz

def get_mass_to_charge_state_ratio(self):
"""Read (calibrated) mass-to-charge-state-ratio column."""

m_n = NxField()
m_n.typed_value = np.zeros(
[self.number_of_events, 1], np.float32)
m_n.unit = "Da"

m_n.typed_value[:, 0] = np.asarray(self.get_named_quantities("mc_c (Da)"), np.float32)
return m_n


class ReadPyccaptRangingFileFormat():
"""Read FAU/Erlangen pyccapt (ranging module) HDF5 file format."""

def __init__(self, filename: str):
assert len(filename) > 2, "H5 file incorrect filename ending!"
assert filename.lower().endswith(".h5") or filename.lower().endswith(".hdf5"), \
"HDF5 file incorrect file type!"
self.filename = filename

self.filesize = os.path.getsize(self.filename)
self.number_of_events = None
self.version = "e955beb4f2627befb8b4d26f2e74e4c52e00394e"
self.df = None

with h5py.File(self.filename, "r") as h5r:
self.supported = 0 # voting-based
required_entries = ["df",
"df/axis0", "df/axis1",
"df/block0_items", "df/block0_values",
"df/block1_items", "df/block1_values",
"df/block2_items", "df/block2_values"]
for entry in required_entries:
if entry in h5r.keys():
self.supported += 1
if self.supported == 9:
print(f"{self.filename} is a supported pyccapt/ranging HDF5 file!")
else:
print(f"{self.filename} is not a supported pyccapt/ranging HDF5 file!")
return

self.df = pd.read_hdf(self.filename)
self.rng = {}
self.rng["molecular_ions"] = []
print(np.shape(self.df)[0])
for idx in np.arange(0, np.shape(self.df)[0]):
if isinstance(self.df.iloc[idx, 6], str) is True:
if self.df.iloc[idx, 6] == "unranged":
continue

elements = self.df.iloc[idx, 6]
complexs = self.df.iloc[idx, 7]
isotopes = self.df.iloc[idx, 8]
# assertions
ivec = np.zeros((MAX_NUMBER_OF_ATOMS_PER_ION,), np.uint16)
hashvector = []
for idxj in np.arange(0, len(elements)):
symbol = elements[idxj]
if symbol in chemical_symbols and symbol != "X":
proton_number = atomic_numbers[symbol]
neutron_number = isotopes[idxj] - proton_number
for mult in np.arange(0, complexs[idxj]):
hashvector.append(isotope_to_hash(proton_number, neutron_number))
ivec[0:len(hashvector)] = np.sort(np.asarray(hashvector, np.uint16), kind="stable")[::-1]

m_ion = NxIon()
m_ion.isotope_vector.typed_value = ivec
m_ion.nuclid_list.typed_value = isotope_vector_to_nuclid_list(ivec)
m_ion.charge_state.typed_value = np.int8(self.df.iloc[idx, 9])
m_ion.add_range(self.df.iloc[idx, 3], self.df.iloc[idx, 4])
m_ion.update_human_readable_name()
# m_ion.report()
self.rng["molecular_ions"].append(m_ion)
print(f"{self.filename} parsed successfully")
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ classifiers = [
dependencies = [
"h5py>=3.6.0",
"numpy>=1.21.2",
"pandas>=1.3.2",
"pandas",
"tables",
"ase>=3.19.0",
"radioactivedecay>=0.4.16"
]
Expand Down
71 changes: 71 additions & 0 deletions tests/TestsForDevelopers.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"import os\n",
"import numpy as np\n",
"import h5py\n",
"import pandas as pd\n",
"from jupyterlab_h5web import H5Web\n",
"from ifes_apt_tc_data_modeling.utils.utils import create_isotope_vector, \\\n",
" isotope_vector_to_dict_keyword, isotope_vector_to_human_readable_name, \\\n",
Expand All @@ -41,6 +42,76 @@
"from ase.data import atomic_numbers, atomic_masses, chemical_symbols"
]
},
{
"cell_type": "markdown",
"id": "e680a9d0-4005-40b0-ab45-a8f5fb215273",
"metadata": {},
"source": [
"## FAU/Erlangen pyccapt control/calibration/ranging module"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2378059c-dae4-46e8-88b0-9fd1ac8a5a70",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"prefix = f\"{os.getcwd()}/../../../../../paper_paper_paper/joss_nomad_apt/bb_analysis/data\"\n",
"fnm = [\"1748_Al.h5\",\n",
" \"1748_Al_range_.h5\",\n",
" \"1748_Nov-14-2023_13-31_Al.h5\"]\n",
"# df = pd.read_hdf(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[1]}\")\n",
"# H5Web(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[1]}\")\n",
"# df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "319c3c28-8d4d-46a5-bb73-7a8396d84020",
"metadata": {},
"outputs": [],
"source": [
"from ifes_apt_tc_data_modeling.pyccapt.pyccapt_reader import ReadPyccaptControlFileFormat, ReadPyccaptCalibrationFileFormat, ReadPyccaptRangingFileFormat"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac6b2e02-371b-47ff-9c68-b23b6e1b4b83",
"metadata": {},
"outputs": [],
"source": [
"pyc_r = ReadPyccaptRangingFileFormat(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[1]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e7178b8b-51f7-4270-acbe-77844692477b",
"metadata": {},
"outputs": [],
"source": [
"pyc_m = ReadPyccaptControlFileFormat(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[2]}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c1f1c4d8-75fe-4884-b81a-338a6c5eab94",
"metadata": {},
"outputs": [],
"source": [
"pyc_c = ReadPyccaptCalibrationFileFormat(f\"{prefix}/ger_erlangen_pyccapt_format/{fnm[0]}\")\n",
"xyz = pyc_c.get_reconstructed_positions()\n",
"print(xyz.typed_value)\n",
"m_q = pyc_c.get_mass_to_charge_state_ratio()\n",
"print(m_q.typed_value)"
]
},
{
"cell_type": "markdown",
"id": "52294143-78c7-47bf-b39e-9e40eec3999d",
Expand Down

0 comments on commit cb6fe0b

Please sign in to comment.