Skip to content

Commit

Permalink
Merge pull request #11 from atomprobe-tc/hdl_duplicated_rng_rrng
Browse files Browse the repository at this point in the history
Dealing with duplicated ranging definitions in RRNG
  • Loading branch information
mkuehbach authored Apr 26, 2024
2 parents cf56ecf + d69a2dd commit 00d501d
Show file tree
Hide file tree
Showing 12 changed files with 1,948 additions and 45 deletions.
16 changes: 4 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,12 @@ Please consider this if you run into issues when continuing with this manual.
### Install the ifes_apt_tc_data_modeling modules as a user

```
git clone git@github.com:atomprobe-tc/ifes_apt_tc_data_modeling.git
git clone https://www.github.com/atomprobe-tc/ifes_apt_tc_data_modeling.git
cd ifes_apt_tc_data_modeling
python -m pip install --upgrade pip
python -m pip install pip-tools
python -m pip install -e .
python -m pip install -e ".[dev]"
python -m pip list
```

### Additional steps to perform when you are a developer

```
python -m pip install -e ".[dev]"
python -m pip list
jupyter-lab
```

Expand All @@ -64,13 +56,13 @@ of files in respective formats. Pieces of information about the content and form
(e.g. in the books by D. Larson et al. https://doi.org/10.1007/978-1-4614-8721-0 or B. Gault et al. http://dx.doi.org/10.1007/978-1-4614-3436-8 ).
Atom probers like D. Haley have contributed substantially through raising awareness of the issue within the community.

AMETEK/Cameca is the key technology partner in atom probe. AMETEK has developed an open file format called APT which has improved
AMETEK/Cameca is the key technology partner in atom probe. AMETEK/Cameca has developed an open file format called APT which has improved
the accessibility of specific numerical data and some metadata. Individuals like M. Kühbach have driven the implementation and
communication of parsers for this APT file format. There are ongoing efforts by both AMETEK and the scientific community to extent the APT file format
communication of parsers for this APT file format. There are ongoing efforts by both AMETEK/Cameca and the scientific community to extent the APT file format
with additional metadata. The main motivation behind these newer efforts is to improve the interoperability between research data collected
within the IVAS/APSuite software and third-party software including research data management systems.
Currently, most metadata have to be entered manually via e.g. electronic lab notebooks if one were to use or register atom probe
data in solutions other than those developed by AMETEK.
data in solutions other than those developed by AMETEK/Cameca.

Nowadays, there is a global desire, a push by research funding agencies, and an increased interest of atom probers
to make their research data and knowledge generation process better matching and more completely aligned to the aims
Expand Down
45 changes: 42 additions & 3 deletions dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,9 @@ charset-normalizer==3.1.0
click==8.1.7
# via pip-tools
comm==0.1.3
# via ipykernel
# via
# ipykernel
# ipywidgets
contourpy==1.0.7
# via matplotlib
cryptography==40.0.2
Expand Down Expand Up @@ -88,16 +90,23 @@ importlib-metadata==6.6.0
# twine
ipykernel==6.22.0
# via
# jupyter
# jupyter-console
# nbclassic
# notebook
# qtconsole
ipython==8.12.1
# via
# ipykernel
# ipywidgets
# jupyter-console
# jupyterlab
ipython-genutils==0.2.0
# via
# nbclassic
# notebook
ipywidgets==8.1.2
# via jupyter
isort==5.13.2
# via pylint
jaraco-classes==3.2.3
Expand All @@ -122,24 +131,32 @@ jsonschema==4.17.3
# via
# jupyterlab-server
# nbformat
jupyter==1.0.0
# via ifes_apt_tc_data_modeling (pyproject.toml)
jupyter-client==8.2.0
# via
# ipykernel
# jupyter-console
# jupyter-server
# nbclassic
# nbclient
# notebook
# qtconsole
jupyter-console==6.6.3
# via jupyter
jupyter-core==5.3.0
# via
# ipykernel
# jupyter-client
# jupyter-console
# jupyter-server
# jupyterlab
# nbclassic
# nbclient
# nbconvert
# nbformat
# notebook
# qtconsole
jupyter-server==1.24.0
# via
# jupyterlab
Expand All @@ -155,6 +172,8 @@ jupyterlab-pygments==0.2.2
# via nbconvert
jupyterlab-server==2.22.1
# via jupyterlab
jupyterlab-widgets==3.0.10
# via ipywidgets
keyring==23.13.1
# via twine
kiwisolver==1.4.4
Expand Down Expand Up @@ -197,6 +216,7 @@ nbclient==0.7.4
# via nbconvert
nbconvert==7.3.1
# via
# jupyter
# jupyter-server
# nbclassic
# notebook
Expand All @@ -217,7 +237,9 @@ nest-asyncio==1.5.6
networkx==3.1
# via radioactivedecay
notebook==6.5.4
# via jupyterlab
# via
# jupyter
# jupyterlab
notebook-shim==0.2.3
# via nbclassic
numexpr==2.8.7
Expand Down Expand Up @@ -248,6 +270,8 @@ packaging==23.1
# jupyterlab-server
# matplotlib
# nbconvert
# qtconsole
# qtpy
# tables
pandas==2.0.1
# via ifes_apt_tc_data_modeling (pyproject.toml)
Expand Down Expand Up @@ -275,7 +299,9 @@ prometheus-client==0.16.0
# nbclassic
# notebook
prompt-toolkit==3.0.38
# via ipython
# via
# ipython
# jupyter-console
psutil==5.9.5
# via ipykernel
ptyprocess==0.7.0
Expand All @@ -295,7 +321,9 @@ pycparser==2.21
pygments==2.15.1
# via
# ipython
# jupyter-console
# nbconvert
# qtconsole
# readme-renderer
# rich
pylint==3.0.3
Expand All @@ -319,9 +347,15 @@ pyzmq==25.0.2
# via
# ipykernel
# jupyter-client
# jupyter-console
# jupyter-server
# nbclassic
# notebook
# qtconsole
qtconsole==5.5.1
# via jupyter
qtpy==2.4.1
# via qtconsole
radioactivedecay==0.4.17
# via ifes_apt_tc_data_modeling (pyproject.toml)
readme-renderer==37.3
Expand Down Expand Up @@ -390,7 +424,9 @@ traitlets==5.9.0
# comm
# ipykernel
# ipython
# ipywidgets
# jupyter-client
# jupyter-console
# jupyter-core
# jupyter-server
# matplotlib-inline
Expand All @@ -399,6 +435,7 @@ traitlets==5.9.0
# nbconvert
# nbformat
# notebook
# qtconsole
twine==4.0.2
# via ifes_apt_tc_data_modeling (pyproject.toml)
typing-extensions==4.9.0
Expand All @@ -419,6 +456,8 @@ websocket-client==1.5.1
# via jupyter-server
wheel==0.43.0
# via pip-tools
widgetsnbextension==4.0.10
# via ipywidgets
xmltodict==0.13.0
# via ifes_apt_tc_data_modeling (pyproject.toml)
zipp==3.15.0
Expand Down
66 changes: 66 additions & 0 deletions ifes_apt_tc_data_modeling/nexus/nx_ion.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,72 @@
from ifes_apt_tc_data_modeling.nexus.nx_field import NxField


def try_to_reduce_to_unique_definitions(inp: list) -> list:
"""Try to reduce a set of (molecular) ion definitions to unique."""
for entry in inp:
if isinstance(entry, NxIon):
continue
else:
raise ValueError(f"Argument inp to try_to_reduce_to_unique_definitions needs to list of NxIon!")
unique = []
# unique if mqival does not overlap (but can touch) either side
# extrema of ranging definition and ivec is different or all 0
# from a scientific point of view we would like iontypes to be
# unique and ranges at most touching numerically but not overlapping
# as then for a given mass-to-charge-state value an ion can qualify
# to be an instance more than one iontype thus making the ranging
# ambiguous
visited = np.asarray(np.zeros(len(inp,)), bool)
for idx in np.arange(0, len(inp)):
if not visited[idx]:
# find all ranging definition value intersections with other ions
isect = [] #
for jdx in np.concatenate((np.arange(0, idx), np.arange(idx + 1, len(inp)))):
if not visited[jdx]:
if inp[idx].ranges.values[0, 1] < inp[jdx].ranges.values[0, 0] \
or inp[idx].ranges.values[0, 0] > inp[jdx].ranges.values[0, 1]:
continue
else:
# append only if exactly the same ivec
idx_jdx_are_equal = True # try to falsify
for i in np.arange(0, MAX_NUMBER_OF_ATOMS_PER_ION):
if inp[idx].nuclide_hash.values[i] != inp[jdx].nuclide_hash.values[i]:
idx_jdx_are_equal = False
break
if idx_jdx_are_equal:
isect.append(jdx)
# that nuclide_hashes are the same is necessary for subsequent
# processing of the range for these ions
"""
else:
print(f"Overlapping or exactly numerically aligned ranges for different ion types {idx}, {jdx}!")
inp[idx].report()
inp[jdx].report()
"""
# print(f"isect {isect}")
# if there are none accept this candidate for sure
visited[idx] = True
if len(isect) == 0:
# inp[idx].report()
unique.append(inp[idx])
else:
# combine range of isect candidates with the same nuclide_hash
mqmin = inp[idx].ranges.values[0, 0]
mqmax = inp[idx].ranges.values[0, 1]
for ids in isect:
visited[ids] = True
if inp[ids].ranges.values[0, 0] <= mqmin:
mqmin = inp[ids].ranges.values[0, 0]
if inp[ids].ranges.values[0, 1] >= mqmax:
mqmax = inp[ids].ranges.values[0, 1]
joined_ion = NxIon(nuclide_hash=inp[idx].nuclide_hash.values, charge_state=0)
joined_ion.add_range(mqmin, mqmax)
joined_ion.comment.values = f"{inp[idx].comment.values} was combined with {isect}"
# joined_ion.report()
unique.append(joined_ion)
return unique


class NxIon():
"""Representative of a NeXus base class NXion."""

Expand Down
35 changes: 25 additions & 10 deletions ifes_apt_tc_data_modeling/rrng/rrng_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
import re
import numpy as np

from ase.data import chemical_symbols
from ifes_apt_tc_data_modeling.nexus.nx_ion import NxField, NxIon
from ifes_apt_tc_data_modeling.nexus.nx_ion import NxField, NxIon, \
try_to_reduce_to_unique_definitions
from ifes_apt_tc_data_modeling.utils.utils import \
create_nuclide_hash, is_range_significant
from ifes_apt_tc_data_modeling.utils.definitions import MQ_EPSILON
from ifes_apt_tc_data_modeling.utils.molecular_ions import \
get_chemical_symbols


def evaluate_rrng_range_line(i: int, line: str) -> dict:
Expand All @@ -47,13 +49,10 @@ def evaluate_rrng_range_line(i: int, line: str) -> dict:
tmp = re.split(r"[\s=]+", line)
if len(tmp) < 6:
# raise ValueError(f"Line {line} does not contain all required fields {len(tmp)}!")
return info
if tmp[0] != f"Range{i}":
# raise ValueError(f"Line {line} has inconsistent line prefix {tmp[0]}!")
return info
return None
if is_range_significant(np.float64(tmp[1]), np.float64(tmp[2])) is False:
# raise ValueError(f"Line {line} insignificant range!")
return info
return None
info["range"] = np.asarray([tmp[1], tmp[2]], np.float64)

if tmp[3].lower().startswith("vol:"):
Expand Down Expand Up @@ -81,7 +80,7 @@ def evaluate_rrng_range_line(i: int, line: str) -> dict:
elif element_multiplicity[0].lower() not in ["vol", "color"]:
# pick up what is an element name
symbol = element_multiplicity[0]
if (symbol not in chemical_symbols) or (symbol == "X"):
if symbol not in get_chemical_symbols():
# raise ValueError(f"WARNING::Line {line} contains an invalid chemical symbol {symbol}!")
return info
# if np.uint32(element_multiplicity[1]) <= 0:
Expand All @@ -98,7 +97,7 @@ def evaluate_rrng_range_line(i: int, line: str) -> dict:
class ReadRrngFileFormat():
"""Read *.rrng file format."""

def __init__(self, file_path: str):
def __init__(self, file_path: str, unique=False, verbose=False):
"""Initialize the class."""
if (len(file_path) <= 5) or (file_path.lower().endswith(".rrng") is False):
raise ImportError("WARNING::RRNG file incorrect file_path ending or file type!")
Expand All @@ -107,6 +106,8 @@ def __init__(self, file_path: str):
"ranges": {},
"ions": {},
"molecular_ions": []}
self.unique = unique
self.verbose = verbose
self.read_rrng()

def read_rrng(self):
Expand Down Expand Up @@ -178,7 +179,10 @@ def read_rrng(self):
raise ValueError(f"Line {txt_stripped[current_line_id]} No ranges defined!")
current_line_id += 1

m_ions = []
for jdx in np.arange(0, number_of_ranges):
if self.verbose:
print(f"{txt_stripped[current_line_id + jdx]}")
dct = evaluate_rrng_range_line(jdx + 1, txt_stripped[current_line_id + jdx])
if dct is None:
print(f"WARNING::RRNG line {txt_stripped[current_line_id + jdx]} is corrupted!")
Expand All @@ -187,8 +191,19 @@ def read_rrng(self):
m_ion = NxIon(nuclide_hash=create_nuclide_hash(dct["atoms"]), charge_state=0)
m_ion.add_range(dct["range"][0], dct["range"][1])
m_ion.comment = NxField(dct["name"], "")
m_ions.append(m_ion)
# this set may contain duplicates or overlapping ranges if ranging definitions are ambiguous like here https://doi.org/10.5281/zenodo.7788883

if self.unique:
unique_m_ions = try_to_reduce_to_unique_definitions(m_ions)
print(f"Found {len(m_ions)} ranging definitions, performed reduction to {len(unique_m_ions)} unique ones")
else:
unique_m_ions = m_ions.copy()
print(f"Found {len(m_ions)} ranging definitions, no reduction, {len(unique_m_ions)} remain.")
del m_ions

for m_ion in unique_m_ions:
m_ion.apply_combinatorics()
# m_ion.report()

self.rrng["molecular_ions"].append(m_ion)
print(f"{self.file_path} parsed successfully")
2 changes: 1 addition & 1 deletion ifes_apt_tc_data_modeling/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def is_range_overlapping(interval: np.ndarray,
def is_range_significant(left: np.float64, right: np.float64) -> bool:
"""Check if inclusive interval bounds [left, right] span a finite range."""
if (np.float64(0.) <= left) and (np.float64(0.) <= right):
if (right - left) > MQ_EPSILON:
if (right - left) >= MQ_EPSILON:
return True
return False

Expand Down
Loading

0 comments on commit 00d501d

Please sign in to comment.