Skip to content

Commit

Permalink
Merge pull request #2 from atomprobe-tc/add_ato_and_pyccapt_support
Browse files Browse the repository at this point in the history
Release 3.11 constraint and added ATO v3, v5, and pyccapt file format support
  • Loading branch information
atomprobe-tc authored Nov 28, 2023
2 parents a96332e + cb6fe0b commit de66542
Show file tree
Hide file tree
Showing 8 changed files with 530 additions and 23 deletions.
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@ Foster exchange about data models and work towards specifications
of file formats from the research field of atom probe microscopy.

# Getting started
You should create a virtual environment. We tested on Ubuntu with Python 3.8.
Newer versions of Python should work similarly when using the desired version tag.
You should create a virtual environment. We tested on Ubuntu with Python 3.8 and newer version.
In what follows the version (tag) 3.8 is a placeholder whereby we show how to proceed when using
Python 3.8. Using newer versions of Python should work the same by replacing 3.8 with the respective
version (tag).

If you don't have Python 3.8 installed on your computer, follow these commands:
Older versions of Python like 3.8 and 3.9 are available e.g. via the deadsnakes repository or via
conda. For using deadsnakes proceed with the following commands:
```
sudo add-apt-repository ppa:deadsnakes/ppa
sudo apt install python3.8 python3-dev libpython3.8-dev python3.8-venv
Expand Down
33 changes: 21 additions & 12 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#
# This file is autogenerated by pip-compile with Python 3.9
# This file is autogenerated by pip-compile with Python 3.11
# by the following command:
#
# pip-compile --extra=dev --output-file=dev-requirements.txt --resolver=backtracking pyproject.toml
# pip-compile --extra=dev --output-file=dev-requirements.txt pyproject.toml
#
anyio==3.6.2
# via jupyter-server
Expand All @@ -29,6 +29,8 @@ bleach==6.0.0
# via
# nbconvert
# readme-renderer
blosc2==2.3.1
# via tables
certifi==2022.12.7
# via requests
cffi==1.15.1
Expand Down Expand Up @@ -72,13 +74,8 @@ idna==3.4
# requests
importlib-metadata==6.6.0
# via
# jupyter-client
# jupyterlab-server
# keyring
# nbconvert
# twine
importlib-resources==5.12.0
# via matplotlib
ipykernel==6.22.0
# via
# nbclassic
Expand Down Expand Up @@ -172,6 +169,8 @@ more-itertools==9.1.0
# via jaraco-classes
mpmath==1.3.0
# via sympy
msgpack==1.0.7
# via blosc2
nbclassic==0.5.6
# via
# jupyterlab
Expand All @@ -190,6 +189,8 @@ nbformat==5.8.0
# nbclient
# nbconvert
# notebook
ndindex==1.7
# via blosc2
nest-asyncio==1.5.6
# via
# ipykernel
Expand All @@ -201,17 +202,22 @@ notebook==6.5.4
# via jupyterlab
notebook-shim==0.2.3
# via nbclassic
numexpr==2.8.7
# via tables
numpy==1.24.3
# via
# ase
# blosc2
# contourpy
# h5grove
# h5py
# ifes-apt-tc-data-modeling (pyproject.toml)
# matplotlib
# numexpr
# pandas
# radioactivedecay
# scipy
# tables
# tifffile
orjson==3.8.11
# via h5grove
Expand All @@ -223,6 +229,7 @@ packaging==23.1
# jupyterlab-server
# matplotlib
# nbconvert
# tables
pandas==2.0.1
# via ifes-apt-tc-data-modeling (pyproject.toml)
pandocfilters==1.5.0
Expand Down Expand Up @@ -254,6 +261,10 @@ ptyprocess==0.7.0
# terminado
pure-eval==0.2.2
# via stack-data
py-cpuinfo==9.0.0
# via
# blosc2
# tables
pycparser==2.21
# via cffi
pygments==2.15.1
Expand Down Expand Up @@ -319,6 +330,8 @@ stack-data==0.6.2
# via ipython
sympy==1.11.1
# via radioactivedecay
tables==3.9.2
# via ifes-apt-tc-data-modeling (pyproject.toml)
terminado==0.17.1
# via
# jupyter-server
Expand Down Expand Up @@ -355,8 +368,6 @@ traitlets==5.9.0
# notebook
twine==4.0.2
# via ifes-apt-tc-data-modeling (pyproject.toml)
typing-extensions==4.5.0
# via ipython
tzdata==2023.3
# via pandas
urllib3==1.26.15
Expand All @@ -372,9 +383,7 @@ webencodings==0.5.1
websocket-client==1.5.1
# via jupyter-server
zipp==3.15.0
# via
# importlib-metadata
# importlib-resources
# via importlib-metadata

# The following packages are considered to be unsafe in a requirements file:
# setuptools
122 changes: 122 additions & 0 deletions ifes_apt_tc_data_modeling/ato/ato_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# POS file format reader used by atom probe microscopists.
#
# Copyright The NOMAD Authors.
#
# This file is part of NOMAD. See https://nomad-lab.eu for further info.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# pylint: disable=no-member,duplicate-code

import os

import numpy as np

from ifes_apt_tc_data_modeling.nexus.nx_field import NxField
from ifes_apt_tc_data_modeling.utils.mmapped_io import get_memory_mapped_data


class ReadAtoFileFormat():
"""Read Rouen group *.ato file format."""

def __init__(self, filename: str):
assert len(filename) > 4, "ATO file incorrect filename ending!"
assert filename.lower().endswith(".ato"), \
"ATO file incorrect file type!"
self.filename = filename

self.filesize = os.path.getsize(self.filename)
self.number_of_events = None
self.version = None
retval = self.get_ato_version()
if retval in [3, 5]:
# there also seems to exist a version 4 but I have never seen an example for it
self.version = retval
print(f"ATO file is in a supported version {self.version}")
if self.version == 3:
assert (self.filesize - 2 * 4) % 14 * 4 == 0, \
"ATO v3 filesize not integer multiple of 14*4B!"
self.number_of_events = np.uint32((self.filesize - 2 * 4) / (14 * 4))
print(f"ATO file contains {self.number_of_events} entries")
if self.version == 5:
assert (self.filesize - 5000) % 40 == 0, \
"ATO v5 filesize not integer multiple of 40B!"
self.number_of_events = np.uint32((self.filesize - 5000) / 40)
print(f"ATO file contains {self.number_of_events} entries")
else:
raise ValueError("ATO file unsupported version!")
# https://zenodo.org/records/8382828
# details three versions of the Rouen/GPM ato format v3, v4, v5
# Cameca/AMETEK's runrootl/FileConvert utility know two ATO flavours:
# CamecaRoot v18.46.533g built Marc, 21, 2022 against ROOT 5.34/36
# v3 LAWATOP and v5 current GPM
# specifically an earlier parser
# https://hg.sr.ht/~mycae/libatomprobe/browse/src/io/dataFiles.cpp?rev=tip
# mentions that storage format may not be robust enough against overflow and
# suggests that additional polishing of results is needed

def get_ato_version(self):
header = get_memory_mapped_data(self.filename, "<u4", 0, 4, 2)
if header[1] in [3, 4, 5]:
return header[1]
return None

def get_reconstructed_positions(self):
"""Read xyz columns."""

xyz = NxField()
xyz.typed_value = np.zeros(
[self.number_of_events, 3], np.float32)
xyz.unit = "nm"

if self.version == 3:
for dim in [0, 1, 2]:
xyz.typed_value[:, dim] = \
get_memory_mapped_data(self.filename, "<f4",
2 * 4 + dim * 4, 14 * 4, self.number_of_events)
# wpx -> x, wpy -> y, fpz -> z
if self.version == 5:
# publicly available sources are inconclusive whether coordinates are in angstroem or nm
# based on the evidence of usa_denton_smith Si.epos converted to v5 ATO via CamecaRoot
# the resulting x, y coordinates suggests that v5 ATO stores in angstroem, while fpz is stored in nm?
# however https://zenodo.org/records/8382828 reports the reconstructed positions to be named
# not at all wpx, wpy and fpz but x, y, z instead and here claims the nm
xyz.typed_value[:, 0] = \
np.float32(get_memory_mapped_data(self.filename, "<i2",
5000 + 0, 40, self.number_of_events) * 0.1) # wpx -> x
xyz.typed_value[:, 1] = \
np.float32(get_memory_mapped_data(self.filename, "<i2",
5000 + 2, 40, self.number_of_events) * 0.1) # wpy -> y
xyz.typed_value[:, 2] = \
get_memory_mapped_data(self.filename, "<f4",
5000 + 4, 40, self.number_of_events) # fpz -> z
return xyz

def get_mass_to_charge_state_ratio(self):
"""Read mass-to-charge-state-ratio column."""

m_n = NxField()
m_n.typed_value = np.zeros(
[self.number_of_events, 1], np.float32)
m_n.unit = "Da"

if self.version == 3:
m_n.typed_value[:, 0] = \
get_memory_mapped_data(self.filename, "<f4",
2 * 4 + 3 * 4, 14 * 4, self.number_of_events)
if self.version == 5:
m_n.typed_value[:, 0] = \
get_memory_mapped_data(self.filename, "<f4",
5000 + 8, 40, self.number_of_events)
return m_n
Loading

0 comments on commit de66542

Please sign in to comment.