From 92785b9a74cd5974b96692af4cc436bb4d6c21d2 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Thu, 18 Apr 2024 20:51:06 +0200 Subject: [PATCH 01/30] - Add first implementation of bond length calculation + tests - TODO: move to own subsection --- src/nomad_simulations/model_system.py | 86 ++++++++++++++++++++++++++- tests/test_system.py | 42 +++++++++++++ 2 files changed, 126 insertions(+), 2 deletions(-) create mode 100644 tests/test_system.py diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index e7a99f2a..507a71e8 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -17,8 +17,11 @@ # import re +import ase.geometry import numpy as np import ase +from ase import neighborlist as ase_nl +from ase.geometry import analysis as ase_as from typing import Tuple, Optional from structlog.stdlib import BoundLogger @@ -49,7 +52,7 @@ class GeometricSpace(Entity): """ A base section used to define geometrical spaces and their entities. - """ + """ # TODO: allow input in of the lattice vectors directly length_vector_a = Quantity( type=np.float64, @@ -218,7 +221,7 @@ class Cell(GeometricSpace): type=MEnum('original', 'primitive', 'conventional'), description=""" Representation type of the cell structure. It might be: - - 'original' as in origanally parsed, + - 'original' as in originally parsed, - 'primitive' as the primitive unit cell, - 'conventional' as the conventional cell used for referencing. """, @@ -317,6 +320,47 @@ class AtomicCell(Cell): """, ) + @property + def elements(self): + """ + Returns a list of the unique elements in the `AtomicCell` based on the `atoms_state`. + """ + return list( + set([atom_state.chemical_symbol for atom_state in self.atoms_state]) + ) + + @property + def element_pairs(self): + """ + Returns a list of the unique element pairs in the `AtomicCell` based on the `atoms_state`. + """ + return list( + set( + [ + (a.chemical_symbol, b.chemical_symbol) + for a in self.atoms_state + for b in self.atoms_state + ] + ) + ) + + def get_element_combos( + self, n_elements: int + ) -> list[tuple[str]]: # ! add logger and check for negative n + """ + Returns all unique n-pair combinations of elements. + """ + + def recursion(depth: int, combos: list[list[str]]): + if depth < 1: + return combos + return recursion( + depth - 1, + list(set((*combo, elem) for combo in combos for elem in self.elements)), + ) + + return recursion(n_elements - 1, self.elements) + def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs): super().__init__(m_def, m_context, **kwargs) # Set the name of the section @@ -365,6 +409,44 @@ def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: return ase_atoms + def setup_ase_analyze( + self, + ase_atoms: ase.Atoms, + neighbor_list: Optional[ase_nl.NeighborList] = None, + scale_cutoff: float = 3.0, + ) -> None: + """ + Analyzes the `AtomicCell` section using ASE Atoms object. + Return a bonds and angles list. + + Args: + ase_atoms (ase.Atoms): The ASE Atoms object to analyze. + logger (BoundLogger): The logger to log messages. + """ + + if neighbor_list is None: + neighbor_list = ase_nl.NeighborList( + ase_nl.natural_cutoffs(ase_atoms, mult=scale_cutoff), + self_interaction=False, + ) + neighbor_list.update(ase_atoms) # ? should the update always be triggered? + self._ase_analysis = ase_as.Analysis(ase_atoms, nl=neighbor_list) + + def get_bonds( + self, + ) -> list[tuple[int, int]]: # ! write this out to a separate subsection + """ + Returns a list of tuples with the indices of the atoms that are bonded in the `AtomicCell`. + """ + bond_lengths: list[list[float]] = [] + for bonds in self.get_element_combos(2): + bond_lengths.extend( + self._ase_analysis.get_values( + self._ase_analysis.get_bonds(*bonds, unique=True) + ) + ) + return bond_lengths + def normalize(self, archive, logger) -> None: super().normalize(archive, logger) diff --git a/tests/test_system.py b/tests/test_system.py new file mode 100644 index 00000000..85f65fa2 --- /dev/null +++ b/tests/test_system.py @@ -0,0 +1,42 @@ +# +# Copyright The NOMAD Authors. +# +# This file is part of NOMAD. See https://nomad-lab.eu for further info. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +import numpy as np + +from . import logger +from .conftest import get_scf_electronic_band_gap_template + +from nomad.units import ureg +from nomad_simulations.model_system import AtomicCell, AtomsState + + +def test_geometry_analysis(): + atomic_cell = AtomicCell( + length_vector_a=1.0, + length_vector_b=1.0, + length_vector_c=1.0, + name='H2', + type='original', + positions=np.array([[0, 0.2, 0], [0, 0, 0.1]]) * ureg('angstrom'), + atoms_state=[AtomsState(chemical_symbol='H')] * 2, + ) + atomic_cell.normalize(None, logger) + atomic_cell.setup_ase_analyze(atomic_cell.to_ase_atoms(logger)) + + assert atomic_cell.get_bonds() == [[0.223606797749979]] #! add approx function From e60bceb98e38ad41f4d8d25df53ace95666cfdec Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 19 Apr 2024 16:02:32 +0200 Subject: [PATCH 02/30] Fix typos --- src/nomad_simulations/model_system.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 507a71e8..716c5d83 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -775,7 +775,7 @@ class ChemicalFormula(ArchiveSection): Examples: H2O becomes A2B and H2O2 becomes AB. The letters are drawn from the English alphabet that may be extended by increasing the number of letters: A, B, ..., Z, Aa, Ab - and so on. This definition is in line with the similarly named OPTIMADE definition. + and so on. This definition is in line with the similarly named `OPTIMADE` definition. """, ) @@ -784,7 +784,7 @@ def resolve_chemical_formulas(self, formula: Formula) -> None: Resolves the chemical formulas of the `ModelSystem` in different formats. Args: - formula (Formula): The Formula object from NOMAD atomutils containing the chemical formulas. + formula (Formula): The Formula object from NOMAD `atomutils` containing the chemical formulas. """ self.descriptive = formula.format('descriptive') self.reduced = formula.format('reduced') @@ -836,7 +836,7 @@ class ModelSystem(System): parent-child system trees. The quantities `branch_label`, `branch_depth`, `atom_indices`, and `bond_list` are used to define the parent-child tree. - The normalization is ran in the following order: + The normalization is run in the following order: 1. `OrbitalsState.normalize()` in atoms_state.py under `AtomsState` 2. `CoreHoleState.normalize()` in atoms_state.py under `AtomsState` 3. `HubbardInteractions.normalize()` in atoms_state.py under `AtomsState` @@ -867,7 +867,7 @@ class ModelSystem(System): - Example 5, a passivated heterostructure Si/(GaAs-CO2) has: 1 parent ModelSystem section (for Si/(GaAs-CO2)), 2 child ModelSystem sections (for Si and GaAs-CO2), - and 2 additional children sections in one of the childs (for GaAs and CO2). The number + and 2 additional children sections in one of the children (for GaAs and CO2). The number of AtomicCell and Symmetry sections can be inferred using a combination of example 2 and 3. """ @@ -877,7 +877,7 @@ class ModelSystem(System): name = Quantity( type=str, description=""" - Any verbose naming refering to the ModelSystem. Can be left empty if it is a simple + Any verbose naming referring to the ModelSystem. Can be left empty if it is a simple crystal or it can be filled up. For example, an heterostructure of graphene (G) sandwiched in between hexagonal boron nitrides (hBN) slabs could be named 'hBN/G/hBN'. """, @@ -950,7 +950,7 @@ class ModelSystem(System): branch_depth = Quantity( type=np.int32, description=""" - Index refering to the depth of a branch in the hierarchical `ModelSystem` tree. + Index referring to the depth of a branch in the hierarchical `ModelSystem` tree. """, ) From 6ddc77c4f467a74ee363529834d88910d6d00713 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 19 Apr 2024 16:02:50 +0200 Subject: [PATCH 03/30] Fix typos --- src/nomad_simulations/model_system.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 716c5d83..054d6733 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -614,7 +614,7 @@ def resolve_bulk_symmetry( """ Resolves the symmetry of the material being simulated using MatID and the originally parsed data under original_atomic_cell. It generates two other - `AtomicCell` sections (the primitive and standarized cells), as well as populating + `AtomicCell` sections (the primitive and standardized cells), as well as populating the `Symmetry` section. Args: @@ -623,7 +623,7 @@ def resolve_bulk_symmetry( logger (BoundLogger): The logger to log messages. Returns: primitive_atomic_cell (Optional[AtomicCell]): The primitive `AtomicCell` section. - conventional_atomic_cell (Optional[AtomicCell]): The standarized `AtomicCell` section. + conventional_atomic_cell (Optional[AtomicCell]): The standardized `AtomicCell` section. """ symmetry = {} try: @@ -669,8 +669,8 @@ def resolve_bulk_symmetry( symmetry_analyzer, 'conventional', logger ) - # Getting prototype_formula, prototype_aflow_id, and strukturbericht designation from - # standarized Wyckoff numbers and the space group number + # Getting `prototype_formula`, `prototype_aflow_id`, and strukturbericht designation from + # standardized Wyckoff numbers and the space group number if ( symmetry.get('space_group_number') and conventional_atomic_cell.atoms_state is not None @@ -719,7 +719,7 @@ def normalize(self, archive, logger) -> None: ) = self.resolve_bulk_symmetry(atomic_cell, logger) self.m_parent.m_add_sub_section(ModelSystem.cell, primitive_atomic_cell) self.m_parent.m_add_sub_section(ModelSystem.cell, conventional_atomic_cell) - # Reference to the standarized cell, and if not, fallback to the originally parsed one + # Reference to the standardized cell, and if not, fallback to the originally parsed one self.atomic_cell_ref = self.m_parent.cell[-1] From a1cd476d30d2aa202cc726ca476c2ca2518de911 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 19 Apr 2024 16:15:01 +0200 Subject: [PATCH 04/30] Improve pbc default system\ --- src/nomad_simulations/model_system.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 054d6733..af0c0e3a 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -266,6 +266,7 @@ class Cell(GeometricSpace): periodic_boundary_conditions = Quantity( type=bool, shape=[3], + default=[False, False, False], description=""" If periodic boundary conditions are applied to each direction of the crystal axes. """, @@ -382,11 +383,6 @@ def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: ase_atoms = ase.Atoms(symbols=atoms_labels) # PBC - if self.periodic_boundary_conditions is None: - logger.info( - 'Could not find `AtomicCell.periodic_boundary_conditions`. They will be set to [False, False, False].' - ) - self.periodic_boundary_conditions = [False, False, False] ase_atoms.set_pbc(self.periodic_boundary_conditions) # Positions (ensure they are parsed) From 68b5d32ec7fd47af7eb77cc0d0a6c50d18458111 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Tue, 23 Apr 2024 12:31:29 +0200 Subject: [PATCH 05/30] - Accomodate schema for hosting interatomic distances - Clarify some descriptions in the schema - Update tests - TODO: add angles --- src/nomad_simulations/model_system.py | 113 +++++++++++++++++--------- tests/test_system.py | 6 +- 2 files changed, 76 insertions(+), 43 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index af0c0e3a..8fda8523 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -234,25 +234,35 @@ class Cell(GeometricSpace): """, ) + # topology + bond_list = Quantity( + type=np.int32, + description=""" + List of index pairs corresponding to (covalent) bonds, e.g., as defined by a force field. + """, + ) + + # geometry and analysis positions = Quantity( type=np.float64, shape=['n_cell_points', 3], unit='meter', description=""" - Positions of all the atoms in Cartesian coordinates. + Positions of all the atoms in absolute, Cartesian coordinates. """, - ) + ) # ? @JosePizarro: This is already referencing to atoms: shouldn't it part of `AtomicCell` then? Or maybe we use the term "building block"? velocities = Quantity( type=np.float64, shape=['n_cell_points', 3], unit='meter / second', description=""" - Velocities of the atoms. It is the change in cartesian coordinates of the atom position - with time. + Velocities of the atoms defined with respect to absolute, Cartesian coordinate frame + centered at the respective atom position. """, - ) + ) # ? @JosePizarro: This is already referencing to atoms: shouldn't it part of `AtomicCell` then? Or maybe we use the term "building block"? + # box lattice_vectors = Quantity( type=np.float64, shape=[3, 3], @@ -321,6 +331,25 @@ class AtomicCell(Cell): """, ) + geometry_analysis_cutoffs = Quantity( + type=np.float64, + shape=['*'], + description=""" + Cutoff radius within which atomic geometries are analyzed. + Defaults to 3x the covalent radius denoted by the [`ase` package](https://wiki.fysik.dtu.dk/ase/ase/neighborlist.html). + """, + ) + + interatomic_distances = Quantity( + type=np.float64, + shape=['*', '*'], + unit='meter', + description=""" + Interatomic distances between atoms pairs within in `geometry_analysis_cutoffs`. + Periodic boundary conditions are taken into account. + """, + ) + @property def elements(self): """ @@ -405,12 +434,7 @@ def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: return ase_atoms - def setup_ase_analyze( - self, - ase_atoms: ase.Atoms, - neighbor_list: Optional[ase_nl.NeighborList] = None, - scale_cutoff: float = 3.0, - ) -> None: + def setup_ase_analysis(self) -> None: """ Analyzes the `AtomicCell` section using ASE Atoms object. Return a bonds and angles list. @@ -420,35 +444,50 @@ def setup_ase_analyze( logger (BoundLogger): The logger to log messages. """ - if neighbor_list is None: - neighbor_list = ase_nl.NeighborList( - ase_nl.natural_cutoffs(ase_atoms, mult=scale_cutoff), - self_interaction=False, - ) - neighbor_list.update(ase_atoms) # ? should the update always be triggered? - self._ase_analysis = ase_as.Analysis(ase_atoms, nl=neighbor_list) + neighbor_list = ase_nl.NeighborList( + self.geometry_analysis_cutoffs, + self_interaction=False, + ) + neighbor_list.update(self._ase_atoms) + self._ase_analysis = ase_as.Analysis(self._ase_atoms, nl=neighbor_list) - def get_bonds( - self, + def get_distances( + self, logger: BoundLogger ) -> list[tuple[int, int]]: # ! write this out to a separate subsection """ Returns a list of tuples with the indices of the atoms that are bonded in the `AtomicCell`. """ - bond_lengths: list[list[float]] = [] - for bonds in self.get_element_combos(2): - bond_lengths.extend( + if not hasattr(self, '_ase_analysis'): # ! make this more elegant + logger.error( + 'ASE analysis not set up. Run `setup_ase_analysis` before calling this method.' + ) + return None + + distances: list[list[float]] = [] + for pair in self.get_element_combos(2): + distances.extend( self._ase_analysis.get_values( - self._ase_analysis.get_bonds(*bonds, unique=True) + self._ase_analysis.get_bonds(*pair, unique=True) ) ) - return bond_lengths + return distances * ureg.angstrom def normalize(self, archive, logger) -> None: super().normalize(archive, logger) - # Set the name of the section self.name = self.m_def.name if self.name is None else self.name + self._ase_atoms = self.to_ase_atoms(logger) + if ( + self.geometry_analysis_cutoffs is None + or len(self.geometry_analysis_cutoffs) > 0 + ): # ! double-check + self.geometry_analysis_cutoffs = ase_nl.natural_cutoffs( + self._ase_atoms, mult=3.0 + ) + self.setup_ase_analysis() + self.interatomic_distances = self.get_distances(logger) + class Symmetry(ArchiveSection): """ @@ -561,7 +600,7 @@ class Symmetry(ArchiveSection): def resolve_analyzed_atomic_cell( self, symmetry_analyzer: SymmetryAnalyzer, cell_type: str, logger: BoundLogger - ) -> Optional[AtomicCell]: + ) -> Optional[AtomicCell]: # ! consider using an index list and a filter function """ Resolves the `AtomicCell` section from the `SymmetryAnalyzer` object and the cell_type (primitive or conventional). @@ -610,7 +649,7 @@ def resolve_bulk_symmetry( """ Resolves the symmetry of the material being simulated using MatID and the originally parsed data under original_atomic_cell. It generates two other - `AtomicCell` sections (the primitive and standardized cells), as well as populating + `AtomicCell` sections (the primitive and standardized cells) and populates the `Symmetry` section. Args: @@ -950,24 +989,18 @@ class ModelSystem(System): """, ) + # ? Move to `AtomicCell`? atom_indices = Quantity( type=np.int32, shape=['*'], description=""" Indices of the atoms in the child with respect to its parent. Example: - - We have SrTiO3, where `AtomicCell.labels = ['Sr', 'Ti', 'O', 'O', 'O']`. If - we create a `model_system` child for the `'Ti'` atom only, then in that child - `ModelSystem.model_system.atom_indices = [1]`. If now we want to refer both to - the `'Ti'` and the last `'O'` atoms, `ModelSystem.model_system.atom_indices = [1, 4]`. - """, - ) - # TODO improve description and add an example using the case in atom_indices - bond_list = Quantity( - type=np.int32, - description=""" - List of pairs of atom indices corresponding to bonds (e.g., as defined by a force field) - within this atoms_group. + We have SrTiO3, where `AtomicCell.labels = ['Sr', 'Ti', 'O', 'O', 'O']`. + - If we create a `model_system` child for the `'Ti'` atom only, then in that child + `ModelSystem.model_system.atom_indices = [1]`. + - If now we want to refer both to the `'Ti'` and the last `'O'` atoms, + `ModelSystem.model_system.atom_indices = [1, 4]`. """, ) diff --git a/tests/test_system.py b/tests/test_system.py index 85f65fa2..5cddd090 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -20,7 +20,6 @@ import numpy as np from . import logger -from .conftest import get_scf_electronic_band_gap_template from nomad.units import ureg from nomad_simulations.model_system import AtomicCell, AtomsState @@ -37,6 +36,7 @@ def test_geometry_analysis(): atoms_state=[AtomsState(chemical_symbol='H')] * 2, ) atomic_cell.normalize(None, logger) - atomic_cell.setup_ase_analyze(atomic_cell.to_ase_atoms(logger)) - assert atomic_cell.get_bonds() == [[0.223606797749979]] #! add approx function + assert atomic_cell.interatomic_distances.to('angstrom').magnitude == [ + [0.223606797749979] + ] #! add approx function From 8e3e859cac2f84bfc193ed0aecbb6052460daa19 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Tue, 23 Apr 2024 17:08:51 +0200 Subject: [PATCH 06/30] - Add angles - Improve combo generation: unique combos only now - Robustify `AtomicCell.get_distances()` - Add quantities storing elemental combinations - TODO: fix error for matrix storage of `str` (coming from elementatl combinations) --- src/nomad_simulations/model_system.py | 96 ++++++++++++++++++++------- tests/test_system.py | 8 ++- 2 files changed, 77 insertions(+), 27 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 8fda8523..98e73f8a 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -340,13 +340,39 @@ class AtomicCell(Cell): """, ) + elemental_pairs = Quantity( + type=str, + shape=['*', 2], + description=""" + Alphabetically sorted pairs of elements in the atomic cell. + """, + ) # ? also store indices of the atoms? + interatomic_distances = Quantity( type=np.float64, - shape=['*', '*'], + shape=['*'], unit='meter', description=""" - Interatomic distances between atoms pairs within in `geometry_analysis_cutoffs`. - Periodic boundary conditions are taken into account. + Interatomic distances between pairs of (different) atoms within in `geometry_analysis_cutoffs`. + Periodic boundary conditions are taken into account. Follows the order of `element_pairs`. + """, + ) + + elemental_triples = Quantity( + type=str, + shape=['*', 3], + description=""" + Alphabetically sorted triplets of elements in the atomic cell. + """, + ) # ? also store indices of the atoms? + + interatomic_angles = Quantity( + type=np.float64, + shape=['*'], + unit='radian', + description=""" + Interatomic angles between triplets within of (different) atoms in `geometry_analysis_cutoffs`. + Periodic boundary conditions are taken into account. Follows the order of `elemental_triples`. """, ) @@ -359,21 +385,6 @@ def elements(self): set([atom_state.chemical_symbol for atom_state in self.atoms_state]) ) - @property - def element_pairs(self): - """ - Returns a list of the unique element pairs in the `AtomicCell` based on the `atoms_state`. - """ - return list( - set( - [ - (a.chemical_symbol, b.chemical_symbol) - for a in self.atoms_state - for b in self.atoms_state - ] - ) - ) - def get_element_combos( self, n_elements: int ) -> list[tuple[str]]: # ! add logger and check for negative n @@ -386,7 +397,15 @@ def recursion(depth: int, combos: list[list[str]]): return combos return recursion( depth - 1, - list(set((*combo, elem) for combo in combos for elem in self.elements)), + list( + set( + tuple( + sorted([*combo, elem]) + ) # sort to avoid duplicates # ! sort by atomic number + for combo in combos + for elem in self.elements + ) + ), ) return recursion(n_elements - 1, self.elements) @@ -453,9 +472,9 @@ def setup_ase_analysis(self) -> None: def get_distances( self, logger: BoundLogger - ) -> list[tuple[int, int]]: # ! write this out to a separate subsection + ) -> tuple[list[tuple[str, str]], list[float]]: """ - Returns a list of tuples with the indices of the atoms that are bonded in the `AtomicCell`. + Compute the interatomic distances between all pairs of atoms within `geometry_analysis_cutoffs`. """ if not hasattr(self, '_ase_analysis'): # ! make this more elegant logger.error( @@ -464,13 +483,39 @@ def get_distances( return None distances: list[list[float]] = [] - for pair in self.get_element_combos(2): + pairs = self.get_element_combos(2) + for pair in pairs: + connections = self._ase_analysis.get_bonds(*pair, unique=True) + if len(connections[0]) == 0: + logger.info( + f'Could not find the pair {pair} in the `AtomicCell` section.' + ) + continue distances.extend( + self._ase_analysis.get_values(connections) + ) # ! disable uniquness when applying histograms + return pairs, distances * ureg.angstrom + + def get_angles( + self, logger: BoundLogger + ) -> tuple[list[tuple[str, str, str]], list[float]]: + if not hasattr(self, '_ase_analysis'): + logger.error( + 'ASE analysis not set up. Run `setup_ase_analysis` before calling this method.' + ) + return None + + angles: list[list[float]] = [] + triplets = self.get_element_combos(3) + for triplet in triplets: + angles.extend( self._ase_analysis.get_values( - self._ase_analysis.get_bonds(*pair, unique=True) + self._ase_analysis.get_angles( + *triplet, unique=True + ) # ! disable uniqueness when applying histograms ) ) - return distances * ureg.angstrom + return triplets, angles * ureg.radian def normalize(self, archive, logger) -> None: super().normalize(archive, logger) @@ -486,7 +531,8 @@ def normalize(self, archive, logger) -> None: self._ase_atoms, mult=3.0 ) self.setup_ase_analysis() - self.interatomic_distances = self.get_distances(logger) + self.elemental_pairs, self.interatomic_distances = self.get_distances(logger) + self.elemental_triples, self.interatomic_angles = self.get_angles(logger) class Symmetry(ArchiveSection): diff --git a/tests/test_system.py b/tests/test_system.py index 5cddd090..8ac692e5 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -32,8 +32,12 @@ def test_geometry_analysis(): length_vector_c=1.0, name='H2', type='original', - positions=np.array([[0, 0.2, 0], [0, 0, 0.1]]) * ureg('angstrom'), - atoms_state=[AtomsState(chemical_symbol='H')] * 2, + positions=np.array([[0, 0.2, 0], [0, 0, 0.1], [0.2, 0, 0]]) * ureg('angstrom'), + atoms_state=[ + AtomsState(chemical_symbol='H'), + AtomsState(chemical_symbol='O'), + AtomsState(chemical_symbol='H'), + ], ) atomic_cell.normalize(None, logger) From c05e46e5db1fc5191814c61cf4e5212be33de93a Mon Sep 17 00:00:00 2001 From: Nathan Daelman Date: Wed, 24 Apr 2024 23:22:44 +0200 Subject: [PATCH 07/30] - Fix shape issue with `elemental`s - Apply approximations in testing - Add angles to the tests - Add first prototyp of `decorator` checks --- src/nomad_simulations/model_system.py | 37 ++++++++++++++++++++++----- tests/test_system.py | 9 ++++--- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 98e73f8a..828a404f 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -342,7 +342,7 @@ class AtomicCell(Cell): elemental_pairs = Quantity( type=str, - shape=['*', 2], + shape=['*'], # ! once the updated `Quantity` is updated, change to ['*', 2] description=""" Alphabetically sorted pairs of elements in the atomic cell. """, @@ -360,7 +360,7 @@ class AtomicCell(Cell): elemental_triples = Quantity( type=str, - shape=['*', 3], + shape=['*'], # ! once the updated `Quantity` is updated, change to ['*', 3] description=""" Alphabetically sorted triplets of elements in the atomic cell. """, @@ -371,7 +371,7 @@ class AtomicCell(Cell): shape=['*'], unit='radian', description=""" - Interatomic angles between triplets within of (different) atoms in `geometry_analysis_cutoffs`. + Interatomic angles between triplets of (different) atoms in `geometry_analysis_cutoffs`. Periodic boundary conditions are taken into account. Follows the order of `elemental_triples`. """, ) @@ -385,6 +385,23 @@ def elements(self): set([atom_state.chemical_symbol for atom_state in self.atoms_state]) ) + # ! polish and test + def check_attributes(self, attributes: list[str]) -> callable: + """ + Check if the specified attributes are not None. + """ + + def decorator(func: callable) -> callable: + def wrapper(self, *args, **kwargs) -> Optional[callable]: + for attr in attributes: + if getattr(self, attr) is None: + return None + return func(self, *args, **kwargs) + + return wrapper + + return decorator + def get_element_combos( self, n_elements: int ) -> list[tuple[str]]: # ! add logger and check for negative n @@ -410,6 +427,12 @@ def recursion(depth: int, combos: list[list[str]]): return recursion(n_elements - 1, self.elements) + def convert_element_combos(self, n_elements: int) -> list[str]: + """ + Converts the element combinations into a list of hyphen-separated strings. + """ + return ['-'.join(elem) for elem in self.get_element_combos(n_elements)] + def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs): super().__init__(m_def, m_context, **kwargs) # Set the name of the section @@ -425,7 +448,7 @@ def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: Returns: (Optional[ase.Atoms]): The ASE Atoms object with the basic information from the `AtomicCell`. - """ + """ # ! split into smaller functions # Initialize ase.Atoms object with labels atoms_labels = [atom_state.chemical_symbol for atom_state in self.atoms_state] ase_atoms = ase.Atoms(symbols=atoms_labels) @@ -483,7 +506,7 @@ def get_distances( return None distances: list[list[float]] = [] - pairs = self.get_element_combos(2) + pairs = self.convert_element_combos(2) for pair in pairs: connections = self._ase_analysis.get_bonds(*pair, unique=True) if len(connections[0]) == 0: @@ -493,7 +516,7 @@ def get_distances( continue distances.extend( self._ase_analysis.get_values(connections) - ) # ! disable uniquness when applying histograms + ) # ! disable uniqueness when applying histograms return pairs, distances * ureg.angstrom def get_angles( @@ -506,7 +529,7 @@ def get_angles( return None angles: list[list[float]] = [] - triplets = self.get_element_combos(3) + triplets = self.convert_element_combos(3) for triplet in triplets: angles.extend( self._ase_analysis.get_values( diff --git a/tests/test_system.py b/tests/test_system.py index 8ac692e5..f345cb12 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -32,7 +32,7 @@ def test_geometry_analysis(): length_vector_c=1.0, name='H2', type='original', - positions=np.array([[0, 0.2, 0], [0, 0, 0.1], [0.2, 0, 0]]) * ureg('angstrom'), + positions=np.array([[0, 0.2, 0], [0, 0, 0.1], [0.2, 0, 0]]), atoms_state=[ AtomsState(chemical_symbol='H'), AtomsState(chemical_symbol='O'), @@ -41,6 +41,7 @@ def test_geometry_analysis(): ) atomic_cell.normalize(None, logger) - assert atomic_cell.interatomic_distances.to('angstrom').magnitude == [ - [0.223606797749979] - ] #! add approx function + assert np.isclose( + atomic_cell.interatomic_distances.magnitude, [[0.223606797749979]] + ).all() + assert np.isclose(atomic_cell.angles.magnitude, [[90.0]]).all() From 12246f1edd1e852abb5155a6cb1271486aabb769 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 26 Apr 2024 03:23:33 +0200 Subject: [PATCH 08/30] - Restructure functionalities into a chain of factory classes - TODO: extend testing - TODO: check `Cell` structure --- src/nomad_simulations/model_system.py | 483 +++++++++++++++----------- 1 file changed, 277 insertions(+), 206 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 828a404f..aa5a7898 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -176,6 +176,23 @@ class GeometricSpace(Entity): """, ) + def check_attributes(self, attributes: list[str]) -> Optional[callable]: + """ + Check if the specified attributes are not None. + """ + + def decorator(func: callable) -> callable: + def wrapper(self, *args, **kwargs) -> Optional[callable]: + for attr in attributes: + if self.m_contents[attr] is None: # ! verify + self.logger.error(f'Missing attribute: {attr}.') + return None + return func(self, *args, **kwargs) + + return wrapper + + return decorator + def get_geometric_space_for_atomic_cell(self, logger: BoundLogger) -> None: """ Get the real space parameters for the atomic cell using ASE. @@ -193,7 +210,7 @@ def get_geometric_space_for_atomic_cell(self, logger: BoundLogger) -> None: ) self.volume = cell.volume * ureg.angstrom**3 - def normalize(self, archive, logger) -> None: + def normalize(self, archive, logger: BoundLogger) -> None: # Skip normalization for `Entity` try: self.get_geometric_space_for_atomic_cell(logger) @@ -240,7 +257,7 @@ class Cell(GeometricSpace): description=""" List of index pairs corresponding to (covalent) bonds, e.g., as defined by a force field. """, - ) + ) # ? @JosephRudzinski: move position? # geometry and analysis positions = Quantity( @@ -250,7 +267,8 @@ class Cell(GeometricSpace): description=""" Positions of all the atoms in absolute, Cartesian coordinates. """, - ) # ? @JosePizarro: This is already referencing to atoms: shouldn't it part of `AtomicCell` then? Or maybe we use the term "building block"? + ) # ? @JosePizarro: The description is already referencing to atoms: shouldn't it part of `AtomicCell` then? Or maybe we use the term "building block"? + # ! @JosePizarro: the `positions` here has a different meaning than in `AtomicCell` velocities = Quantity( type=np.float64, @@ -260,7 +278,7 @@ class Cell(GeometricSpace): Velocities of the atoms defined with respect to absolute, Cartesian coordinate frame centered at the respective atom position. """, - ) # ? @JosePizarro: This is already referencing to atoms: shouldn't it part of `AtomicCell` then? Or maybe we use the term "building block"? + ) # ? @JosePizarro: The description already referencing to atoms: shouldn't it part of `AtomicCell` then? Or maybe we use the term "building block"? # box lattice_vectors = Quantity( @@ -297,265 +315,318 @@ def normalize(self, archive, logger) -> None: super().normalize(archive, logger) -class AtomicCell(Cell): +def convert_combo_to_name(combo: tuple[str]) -> str: + if 1 < len(combo) < 4: + if len(combo) == 3: + return '-'.join(combo[1], combo[0], combo[2]) # flip order + raise ValueError('Only pairs and triplets are supported.') + + +class GeometryDistribution(ArchiveSection): """ - A base section used to specify the atomic cell information of a system. + A base section used to specify the geometry distributions of the system. + These include frequency counts of inter-atomic distances and angles. """ - atoms_state = SubSection(sub_section=AtomsState.m_def, repeats=True) - - n_atoms = Quantity( - type=np.int32, + name = Quantity( + type=str, description=""" - Number of atoms in the atomic cell. + Denotes the elements involved. """, ) - equivalent_atoms = Quantity( - type=np.int32, - shape=['n_atoms'], + type = Quantity( + type=MEnum('distances', 'angles'), description=""" - List of equivalent atoms as defined in `atoms`. If no equivalent atoms are found, - then the list is simply the index of each element, e.g.: - - [0, 1, 2, 3] all four atoms are non-equivalent. - - [0, 0, 0, 3] three equivalent atoms and one non-equivalent. + Type of the geometry distribution described. """, ) - # ! improve description and clarify whether this belongs to `Symmetry` with @lauri-codes - wyckoff_letters = Quantity( + elements = Quantity( type=str, - shape=['n_atoms'], - description=""" - Wyckoff letters associated with each atom. - """, - ) - - geometry_analysis_cutoffs = Quantity( - type=np.float64, shape=['*'], description=""" - Cutoff radius within which atomic geometries are analyzed. - Defaults to 3x the covalent radius denoted by the [`ase` package](https://wiki.fysik.dtu.dk/ase/ase/neighborlist.html). + Elements involved in the geometry distribution: + - 2 for distance distributions. + - 3 for angle distributions, with the center element first. """, ) - elemental_pairs = Quantity( - type=str, - shape=['*'], # ! once the updated `Quantity` is updated, change to ['*', 2] + n_bins = Quantity( + type=np.int32, description=""" - Alphabetically sorted pairs of elements in the atomic cell. + Number of bins used to create the histogram. """, - ) # ? also store indices of the atoms? + ) - interatomic_distances = Quantity( + bins = Quantity( type=np.float64, - shape=['*'], - unit='meter', + shape=['n_bins'], description=""" - Interatomic distances between pairs of (different) atoms within in `geometry_analysis_cutoffs`. - Periodic boundary conditions are taken into account. Follows the order of `element_pairs`. + Binning used to create the histogram. """, ) - elemental_triples = Quantity( - type=str, - shape=['*'], # ! once the updated `Quantity` is updated, change to ['*', 3] - description=""" - Alphabetically sorted triplets of elements in the atomic cell. - """, - ) # ? also store indices of the atoms? - - interatomic_angles = Quantity( + frequency = Quantity( type=np.float64, - shape=['*'], - unit='radian', + shape=['n_bins'], description=""" - Interatomic angles between triplets of (different) atoms in `geometry_analysis_cutoffs`. - Periodic boundary conditions are taken into account. Follows the order of `elemental_triples`. + Frequency count of the histogram. """, ) - @property - def elements(self): - """ - Returns a list of the unique elements in the `AtomicCell` based on the `atoms_state`. - """ - return list( - set([atom_state.chemical_symbol for atom_state in self.atoms_state]) - ) + def normalize(self, archive, logger: BoundLogger): + super().normalize(archive, logger) + self.name = convert_combo_to_name(self.combo) + return self - # ! polish and test - def check_attributes(self, attributes: list[str]) -> callable: - """ - Check if the specified attributes are not None. - """ - def decorator(func: callable) -> callable: - def wrapper(self, *args, **kwargs) -> Optional[callable]: - for attr in attributes: - if getattr(self, attr) is None: - return None - return func(self, *args, **kwargs) +class DistributionHistogram: + """ + A conversion class to compute the histogram of a distribution. + It can also store the histogram in a NOMAD `GeometryDistribution` section via `produce_nomad_distribution`. + """ - return wrapper + def __init__( + self, + combo: tuple[str], + type: str, + distribution_values: np.ndarray = np.array([]), + bins: np.ndarray = np.array([]), + ) -> None: + self.name, self.combo = combo, convert_combo_to_name(combo) + self.type = type + + if len(bins) != len(distribution_values): + raise ValueError('Bins and counts must have the same length.') + counts, self.bins = np.histogram(distribution_values.magnitude, bins=bins) + self.bins *= distribution_values.u + self.frequency = counts / np.min(counts) # normalize so the minimum is 1 + + def produce_nomad_distribution(self) -> GeometryDistribution: + geom_dist = GeometryDistribution( + combo=self.combo, + n_bins=len(self.bins), + frequency=self.frequency, + ) - return decorator + # set the bin units in `GeometryDistribution` + geom_dist.m_contents['bins'].unit = self.bins.to_preferred( + [ureg.m, ureg.radian] + ).units + geom_dist.bins = self.bins + return geom_dist - def get_element_combos( - self, n_elements: int - ) -> list[tuple[str]]: # ! add logger and check for negative n - """ - Returns all unique n-pair combinations of elements. - """ - def recursion(depth: int, combos: list[list[str]]): - if depth < 1: - return combos - return recursion( - depth - 1, - list( - set( - tuple( - sorted([*combo, elem]) - ) # sort to avoid duplicates # ! sort by atomic number - for combo in combos - for elem in self.elements +class Distribution: + """ + A helper class to compute the distribution of a given geometry type, i.e., distances or angles. + It can also generate a histogram class via `produce_histogram`. + """ + + def __init__( + self, + combo: tuple[str], + type: str, + ase_atoms: ase.Atoms, + neighbor_list: ase_nl.NeighborList, + ) -> None: + self.name, self.combo = combo, convert_combo_to_name(combo) + self.type = type + + if self.type == 'distances': + self.values = ( + ase_as.Analysis(ase_atoms, neighbor_list).get_values( + ase_as.get_bonds(*combo, unique=True) + ) + * ureg.angstrom + ) + elif self.type == 'angles': + self.values = ( + abs( + ase_as.Analysis(ase_atoms, neighbor_list).get_values( + ase_as.get_angles(*combo, unique=True) ) - ), + ) + * ureg.radian ) + else: + raise ValueError('Type not recognized.') - return recursion(n_elements - 1, self.elements) + def produce_histogram(self, bins: np.ndarray) -> DistributionHistogram: + return DistributionHistogram(self.combo, self.type, self.values.magnitude, bins) - def convert_element_combos(self, n_elements: int) -> list[str]: - """ - Converts the element combinations into a list of hyphen-separated strings. - """ - return ['-'.join(elem) for elem in self.get_element_combos(n_elements)] - def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs): - super().__init__(m_def, m_context, **kwargs) - # Set the name of the section - self.name = self.m_def.name +class DistributionFactory: + """ + Factory class for exhaustively generating distributions of geometrical observables + for all element paris or triples. It will produce a list of `Distribution` objects via `produce_distributions`. + """ + + def __init__( + self, ase_atoms: ase.Atoms, neighbor_list: ase_nl.NeighborList + ) -> None: + self.ase_atoms = ase_atoms + self.neighbor_list = neighbor_list + self._elements = sorted(list(set(self.ase_atoms.get_chemical_symbols()))) - def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: + @property + def get_elemental_pairs(self) -> list[tuple[str, str]]: + """Generate all possible pairs of element symbols. + Permutations are not considered, i.e., (B, A) is converted into (A, B).""" + return [ + (atom_1, atom_2) + for i, atom_1 in enumerate(self.get_elements) + for atom_2 in self.get_elements[i:] + ] + + @property + def get_elemental_triples_centered(self) -> list[tuple[str, str, str]]: + """Generate all possible triples of element symbols with the center element first. + Permutations between the outer elements are not considered, i.e., (A, C, B) is converted into (A, B, C). """ - Generates an ASE Atoms object with the most basic information from the parsed `AtomicCell` - section (labels, periodic_boundary_conditions, positions, and lattice_vectors). + return [ + (atom_c, atom_1, atom_2) + for atom_c in self.get_elements + for i, atom_1 in enumerate(self.get_elements) + for atom_2 in self.get_elements[i:] + ] + + def produce_distributions(self) -> list[Distribution]: + """Generate all possible distributions of distances and angles + for all element pairs and triples, respectively.""" + return [ + Distribution(combos, type_decl, self.ase_atoms, self.neighbor_list) + for type_decl, combos in zip( + ['distances', 'angles'], + [*self.get_elemental_pairs, *self.get_elemental_triples_centered], + ) + ] - Args: - logger (BoundLogger): The logger to log messages. - Returns: - (Optional[ase.Atoms]): The ASE Atoms object with the basic information from the `AtomicCell`. - """ # ! split into smaller functions - # Initialize ase.Atoms object with labels - atoms_labels = [atom_state.chemical_symbol for atom_state in self.atoms_state] - ase_atoms = ase.Atoms(symbols=atoms_labels) - - # PBC - ase_atoms.set_pbc(self.periodic_boundary_conditions) - - # Positions (ensure they are parsed) - if self.positions is not None: - if len(self.positions) != len(self.atoms_state): - logger.error( - 'Length of `AtomicCell.positions` does not coincide with the length of the `AtomicCell.atoms_state`.' - ) - return None - ase_atoms.set_positions(self.positions.to('angstrom').magnitude) - else: - logger.error('Could not find `AtomicCell.positions`.') - return None +class AtomicCell(Cell): + """ + A base section used to specify the atomic cell information of a system. + """ - # Lattice vectors - if self.lattice_vectors is not None: - ase_atoms.set_cell(self.lattice_vectors.to('angstrom').magnitude) - else: - logger.info('Could not find `AtomicCell.lattice_vectors`.') + atoms_states = SubSection( + sub_section=AtomsState.m_def, + repeats=True, + ) - return ase_atoms + geometry_distributions = SubSection( + sub_section=GeometryDistribution.m_def, + repeats=True, + ) - def setup_ase_analysis(self) -> None: - """ - Analyzes the `AtomicCell` section using ASE Atoms object. - Return a bonds and angles list. + n_atoms = Quantity( + type=np.int32, + description=""" + Number of atoms in the atomic cell. + """, + ) - Args: - ase_atoms (ase.Atoms): The ASE Atoms object to analyze. - logger (BoundLogger): The logger to log messages. - """ + positions = Cell.positions.m_copy() + positions.shape = ['n_atoms', 3] # check the appropriate shape - neighbor_list = ase_nl.NeighborList( - self.geometry_analysis_cutoffs, - self_interaction=False, - ) - neighbor_list.update(self._ase_atoms) - self._ase_analysis = ase_as.Analysis(self._ase_atoms, nl=neighbor_list) + velocities = Cell.velocities.m_copy() + velocities.shape = ['n_atoms', 3] # check the appropriate shape - def get_distances( - self, logger: BoundLogger - ) -> tuple[list[tuple[str, str]], list[float]]: + equivalent_atoms = Quantity( + type=np.int32, + shape=['n_atoms'], + description=""" + List of equivalent atoms as defined in `atoms`. If no equivalent atoms are found, + then the list is simply the index of each element, e.g.: + - [0, 1, 2, 3] all four atoms are non-equivalent. + - [0, 0, 0, 3] three equivalent atoms and one non-equivalent. + """, + ) + + # ! improve description and clarify whether this belongs to `Symmetry` with @lauri-codes + wyckoff_letters = Quantity( + type=str, + shape=['n_atoms'], + description=""" + Wyckoff letters associated with each atom. + """, + ) + + geometry_analysis_cutoffs = Quantity( + type=np.float64, + shape=['*'], + description=""" + Cutoff radius within which atomic geometries are analyzed. + Defaults to 3x the covalent radius denoted by the [`ase` package](https://wiki.fysik.dtu.dk/ase/ase/neighborlist.html). + """, + ) + + @check_attributes( + self, + attributes=[ + 'atoms_state', + 'lattice_vectors', + 'positions', + 'periodic_boundary_conditions', + ], + ) + def set_ase_atoms(self): """ - Compute the interatomic distances between all pairs of atoms within `geometry_analysis_cutoffs`. + Generate an `ase.Atoms` object from `AtomicCell`. """ - if not hasattr(self, '_ase_analysis'): # ! make this more elegant - logger.error( - 'ASE analysis not set up. Run `setup_ase_analysis` before calling this method.' - ) - return None + self.ase_atoms = ase.Atoms( + symbols=[atom_state.chemical_symbol for atom_state in self.atoms_state], + pbc=self.periodic_boundary_conditions, + positions=self.positions.to('angstrom').magnitude, + cell=self.lattice_vectors.to('angstrom').magnitude, + ) + return self - distances: list[list[float]] = [] - pairs = self.convert_element_combos(2) - for pair in pairs: - connections = self._ase_analysis.get_bonds(*pair, unique=True) - if len(connections[0]) == 0: - logger.info( - f'Could not find the pair {pair} in the `AtomicCell` section.' - ) - continue - distances.extend( - self._ase_analysis.get_values(connections) - ) # ! disable uniqueness when applying histograms - return pairs, distances * ureg.angstrom - - def get_angles( - self, logger: BoundLogger - ) -> tuple[list[tuple[str, str, str]], list[float]]: - if not hasattr(self, '_ase_analysis'): - logger.error( - 'ASE analysis not set up. Run `setup_ase_analysis` before calling this method.' - ) - return None + def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs): + super().__init__(m_def, m_context, **kwargs) + self.name = self.m_def.name + self.analyze_geometry = kwargs.get('analyze_geometry', False) - angles: list[list[float]] = [] - triplets = self.convert_element_combos(3) - for triplet in triplets: - angles.extend( - self._ase_analysis.get_values( - self._ase_analysis.get_angles( - *triplet, unique=True - ) # ! disable uniqueness when applying histograms + def normalize(self, archive, logger: BoundLogger): + """When `analyze_geometry` is `true`, perform the geometry analysis. + All intermediate artifacts are stored in the `AtomicCell` section. + Only `GeometryDistribution` sections are retained in the archive written to disk.""" + super().normalize(archive, logger) + if self.analyze_geometry: + # set up neighbor list + if ( + self.geometry_analysis_cutoffs is None # ? needed + or len(self.geometry_analysis_cutoffs) == 0 + ): + self.geometry_analysis_cutoffs = ase_nl.natural_cutoffs( + self.ase_atoms, mult=3.0 ) + self.neighbor_list = ase_nl.build_neighbor_list( + self.geometry_analysis_cutoffs, + self_interaction=False, ) - return triplets, angles * ureg.radian - - def normalize(self, archive, logger) -> None: - super().normalize(archive, logger) - - self.name = self.m_def.name if self.name is None else self.name - self._ase_atoms = self.to_ase_atoms(logger) - if ( - self.geometry_analysis_cutoffs is None - or len(self.geometry_analysis_cutoffs) > 0 - ): # ! double-check - self.geometry_analysis_cutoffs = ase_nl.natural_cutoffs( - self._ase_atoms, mult=3.0 - ) - self.setup_ase_analysis() - self.elemental_pairs, self.interatomic_distances = self.get_distances(logger) - self.elemental_triples, self.interatomic_angles = self.get_angles(logger) + # set up distributions + self.simple_distributions = DistributionFactory( + self.ase_atoms, + self.neighbor_list, + ).produce_distributions() + + self.histogram_distributions, self.distributions = [], [] + for distribution in self.simple_distributions: + if distribution.type == 'distances': + bins = np.arange( + np.arange(0, max(self.geometry_analysis_cutoffs), 0.001) + ) + elif distribution.type == 'angles': + bins = np.arange(0, np.pi, 0.01) + self.histogram_distributions.append( + distribution.produce_histogram(bins) + ) + self.distributions.append( + self.histogram_distributions[-1].produce_nomad_distribution() + ) + return self class Symmetry(ArchiveSection): @@ -731,7 +802,7 @@ def resolve_bulk_symmetry( """ symmetry = {} try: - ase_atoms = original_atomic_cell.to_ase_atoms(logger) + ase_atoms = original_atomic_cell.set_ase_atoms(logger) symmetry_analyzer = SymmetryAnalyzer( ase_atoms, symmetry_tol=config.normalize.symmetry_tolerance ) From e63c9877f3e08e17ff5662268a9f1cc7930b6f45 Mon Sep 17 00:00:00 2001 From: Nathan Daelman Date: Fri, 26 Apr 2024 11:58:56 +0200 Subject: [PATCH 09/30] Fix some mistakes --- src/nomad_simulations/model_system.py | 78 +++++++++++++++------------ 1 file changed, 43 insertions(+), 35 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index aa5a7898..8e9dd330 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -49,6 +49,26 @@ from .utils import get_sibling_section, is_not_representative +def check_attributes(attributes: list[str]) -> Optional[callable]: + """ + Check if the specified attributes are not None. + """ + + def decorator(func: callable) -> callable: + def wrapper(self, *args, **kwargs) -> Optional[callable]: + for attr in attributes: + if attr not in self.all_quantities: # ! verify + self.logger.error( + f'Missing attribute: {attr}.' + ) # require the class to provide a logger + return None + return func(self, *args, **kwargs) + + return wrapper + + return decorator + + class GeometricSpace(Entity): """ A base section used to define geometrical spaces and their entities. @@ -176,23 +196,6 @@ class GeometricSpace(Entity): """, ) - def check_attributes(self, attributes: list[str]) -> Optional[callable]: - """ - Check if the specified attributes are not None. - """ - - def decorator(func: callable) -> callable: - def wrapper(self, *args, **kwargs) -> Optional[callable]: - for attr in attributes: - if self.m_contents[attr] is None: # ! verify - self.logger.error(f'Missing attribute: {attr}.') - return None - return func(self, *args, **kwargs) - - return wrapper - - return decorator - def get_geometric_space_for_atomic_cell(self, logger: BoundLogger) -> None: """ Get the real space parameters for the atomic cell using ASE. @@ -316,9 +319,12 @@ def normalize(self, archive, logger) -> None: def convert_combo_to_name(combo: tuple[str]) -> str: + """Convert a combo tuple to a hypen-separated string. + For angles, the center element is now indeed placed at the center.""" if 1 < len(combo) < 4: if len(combo) == 3: - return '-'.join(combo[1], combo[0], combo[2]) # flip order + combo = (combo[1], combo[0], combo[2]) # flip order + return '-'.join(combo) raise ValueError('Only pairs and triplets are supported.') @@ -411,7 +417,7 @@ def produce_nomad_distribution(self) -> GeometryDistribution: ) # set the bin units in `GeometryDistribution` - geom_dist.m_contents['bins'].unit = self.bins.to_preferred( + geom_dist.all_quantities['bins'].unit = self.bins.to_preferred( [ureg.m, ureg.radian] ).units geom_dist.bins = self.bins @@ -434,18 +440,19 @@ def __init__( self.name, self.combo = combo, convert_combo_to_name(combo) self.type = type + geom_analysis = ase_as.Analysis( + ase_atoms, neighbor_list + ) # slightly less optimal if self.type == 'distances': self.values = ( - ase_as.Analysis(ase_atoms, neighbor_list).get_values( - ase_as.get_bonds(*combo, unique=True) - ) + geom_analysis.get_values(geom_analysis.get_bonds(*combo, unique=True)) * ureg.angstrom ) elif self.type == 'angles': self.values = ( - abs( - ase_as.Analysis(ase_atoms, neighbor_list).get_values( - ase_as.get_angles(*combo, unique=True) + np.abs( + geom_analysis.get_values( + geom_analysis.get_angles(*combo, unique=True) ) ) * ureg.radian @@ -454,7 +461,7 @@ def __init__( raise ValueError('Type not recognized.') def produce_histogram(self, bins: np.ndarray) -> DistributionHistogram: - return DistributionHistogram(self.combo, self.type, self.values.magnitude, bins) + return DistributionHistogram(self.combo, self.type, self.values, bins) class DistributionFactory: @@ -476,8 +483,8 @@ def get_elemental_pairs(self) -> list[tuple[str, str]]: Permutations are not considered, i.e., (B, A) is converted into (A, B).""" return [ (atom_1, atom_2) - for i, atom_1 in enumerate(self.get_elements) - for atom_2 in self.get_elements[i:] + for i, atom_1 in enumerate(self._elements) + for atom_2 in self._elements[i:] ] @property @@ -487,9 +494,9 @@ def get_elemental_triples_centered(self) -> list[tuple[str, str, str]]: """ return [ (atom_c, atom_1, atom_2) - for atom_c in self.get_elements - for i, atom_1 in enumerate(self.get_elements) - for atom_2 in self.get_elements[i:] + for atom_c in self._elements + for i, atom_1 in enumerate(self._elements) + for atom_2 in self._elements[i:] ] def produce_distributions(self) -> list[Distribution]: @@ -562,7 +569,6 @@ class AtomicCell(Cell): ) @check_attributes( - self, attributes=[ 'atoms_state', 'lattice_vectors', @@ -586,6 +592,7 @@ def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs): super().__init__(m_def, m_context, **kwargs) self.name = self.m_def.name self.analyze_geometry = kwargs.get('analyze_geometry', False) + self.ase_atoms = None def normalize(self, archive, logger: BoundLogger): """When `analyze_geometry` is `true`, perform the geometry analysis. @@ -804,7 +811,8 @@ def resolve_bulk_symmetry( try: ase_atoms = original_atomic_cell.set_ase_atoms(logger) symmetry_analyzer = SymmetryAnalyzer( - ase_atoms, symmetry_tol=config.normalize.symmetry_tolerance + ase_atoms, + symmetry_tol=config.normalize.symmetry_tolerance, # ! @JosePizarro: it cannot find this import ) except ValueError as e: logger.debug( @@ -1164,12 +1172,12 @@ def resolve_system_type_and_dimensionality( system_type, dimensionality = self.type, self.dimensionality if ( len(ase_atoms) - <= config.normalize.system_classification_with_clusters_threshold + <= config.normalize.system_classification_with_clusters_threshold # ! @JosePizarro: it cannot find this import ): try: classifier = Classifier( radii='covalent', - cluster_threshold=config.normalize.cluster_threshold, + cluster_threshold=config.normalize.cluster_threshold, # ! @JosePizarro: it cannot find this import ) classification = classifier.classify(ase_atoms) except Exception as e: From 479725a1b3581cb66ed6c809d8bc882cb3496f88 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Sat, 27 Apr 2024 00:28:32 +0200 Subject: [PATCH 10/30] - Work out distance and angle values tests for ethane - Fix minor bugs in src --- src/nomad_simulations/model_system.py | 28 +++++---- tests/test_system.py | 91 +++++++++++++++++++++------ 2 files changed, 86 insertions(+), 33 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 8e9dd330..3686ec7b 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -444,19 +444,20 @@ def __init__( ase_atoms, neighbor_list ) # slightly less optimal if self.type == 'distances': - self.values = ( - geom_analysis.get_values(geom_analysis.get_bonds(*combo, unique=True)) - * ureg.angstrom - ) + if len((matches := geom_analysis.get_bonds(*combo, unique=True))[0]): + self.values = geom_analysis.get_values(matches) + else: + self.values = np.array([]) + self.values *= ureg.angstrom elif self.type == 'angles': - self.values = ( - np.abs( - geom_analysis.get_values( - geom_analysis.get_angles(*combo, unique=True) - ) - ) - * ureg.radian - ) + if len((matches := geom_analysis.get_angles(*combo, unique=True))[0]): + self.values = np.array(geom_analysis.get_values(matches)) + self.values = np.abs( + np.where(self.values > 180, 360 - self.values, self.values) + ) # restrict to [0, 180] + else: + self.values = np.array([]) + self.values *= ureg.degree else: raise ValueError('Type not recognized.') @@ -609,6 +610,7 @@ def normalize(self, archive, logger: BoundLogger): self.ase_atoms, mult=3.0 ) self.neighbor_list = ase_nl.build_neighbor_list( + self.ase_atoms, self.geometry_analysis_cutoffs, self_interaction=False, ) @@ -626,7 +628,7 @@ def normalize(self, archive, logger: BoundLogger): np.arange(0, max(self.geometry_analysis_cutoffs), 0.001) ) elif distribution.type == 'angles': - bins = np.arange(0, np.pi, 0.01) + bins = np.arange(0, 180, 0.01) self.histogram_distributions.append( distribution.produce_histogram(bins) ) diff --git a/tests/test_system.py b/tests/test_system.py index f345cb12..18505012 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -16,32 +16,83 @@ # limitations under the License. # +import ase +from ase import neighborlist as ase_nl import pytest import numpy as np from . import logger from nomad.units import ureg -from nomad_simulations.model_system import AtomicCell, AtomsState - - -def test_geometry_analysis(): - atomic_cell = AtomicCell( - length_vector_a=1.0, - length_vector_b=1.0, - length_vector_c=1.0, - name='H2', - type='original', - positions=np.array([[0, 0.2, 0], [0, 0, 0.1], [0.2, 0, 0]]), - atoms_state=[ - AtomsState(chemical_symbol='H'), - AtomsState(chemical_symbol='O'), - AtomsState(chemical_symbol='H'), +from nomad_simulations.model_system import ( + AtomicCell, + AtomsState, + Distribution, + DistributionHistogram, + DistributionFactory, +) + + +def setup_geometry_analysis(): # ? move to conftest.py + """Produce a highly symmetric ethane molecule in the staggered conformation.""" + return ase.Atoms( + symbols=(['C'] * 2 + ['H'] * 6), + positions=[ + [0, 0, 0.765], + [0, 0, -0.765], + [0, 1.01692, 1.1574], + [-0.88068, -0.050846, 1.1574], + [0.88068, -0.050846, 1.1574], + [0, -1.01692, -1.1574], + [-0.88068, 0.050846, -1.1574], + [0.88068, 0.050846, -1.1574], + ], # in angstrom + cell=[ + [3, 0, 0], + [0, 3, 0], + [0, 0, 3], + ], # in angstrom + ) + + +@pytest.mark.parametrize( + 'atomic_cell, analysis_type, elements, references', + [ + [setup_geometry_analysis(), 'distances', ['C', 'C'], [1.53]], + [setup_geometry_analysis(), 'distances', ['C', 'H'], [0.97] * 4 + [1.09] * 2], + [ + setup_geometry_analysis(), + 'angles', + ['C', 'C', 'H'], + [111.1] * 2 + [113.98] * 4, ], + [setup_geometry_analysis(), 'angles', ['C', 'H', 'H'], [120]], + ], +) # references should be in ascending order +def test_distribution( + atomic_cell: AtomicCell, + analysis_type: str, + elements: tuple[str], + references: list[float], +): + """ + Check the actual interatomic distances against the expected values. + """ + neighbor_list = ase_nl.build_neighbor_list( + atomic_cell, + ase_nl.natural_cutoffs(atomic_cell, mult=1.0), # ? test element X + self_interaction=False, + ) + values = np.sort( + Distribution( + elements, analysis_type, atomic_cell, neighbor_list + ).values.magnitude ) - atomic_cell.normalize(None, logger) - assert np.isclose( - atomic_cell.interatomic_distances.magnitude, [[0.223606797749979]] - ).all() - assert np.isclose(atomic_cell.angles.magnitude, [[90.0]]).all() + if analysis_type == 'distances': + # only positive reference values allowed + assert (0 <= values).all() # ? should this be restricted to positive definite + elif analysis_type == 'angles': + # only angles between 0 and pi allowed + assert (0 <= values).all() and (values <= 180).all() + assert np.allclose(values, references, atol=0.01) # check the actual values From d0506a3d9a8c0aea16c545bb043df0d329463d43 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Sun, 28 Apr 2024 20:27:03 +0200 Subject: [PATCH 11/30] Add test for `DistributionFactory` --- tests/test_system.py | 100 ++++++++++++++++++++++++++++++------------- 1 file changed, 70 insertions(+), 30 deletions(-) diff --git a/tests/test_system.py b/tests/test_system.py index 18505012..904c5493 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -33,59 +33,99 @@ ) -def setup_geometry_analysis(): # ? move to conftest.py - """Produce a highly symmetric ethane molecule in the staggered conformation.""" - return ase.Atoms( - symbols=(['C'] * 2 + ['H'] * 6), - positions=[ - [0, 0, 0.765], - [0, 0, -0.765], - [0, 1.01692, 1.1574], - [-0.88068, -0.050846, 1.1574], - [0.88068, -0.050846, 1.1574], - [0, -1.01692, -1.1574], - [-0.88068, 0.050846, -1.1574], - [0.88068, 0.050846, -1.1574], - ], # in angstrom - cell=[ - [3, 0, 0], - [0, 3, 0], - [0, 0, 3], - ], # in angstrom +# Produce a highly symmetric ethane molecule in the staggered conformation. +ethane = ase.Atoms( + symbols=(['C'] * 2 + ['H'] * 6), + positions=[ + [0, 0, 0.765], + [0, 0, -0.765], + [0, 1.01692, 1.1574], + [-0.88068, -0.050846, 1.1574], + [0.88068, -0.050846, 1.1574], + [0, -1.01692, -1.1574], + [-0.88068, 0.050846, -1.1574], + [0.88068, 0.050846, -1.1574], + ], # in angstrom + cell=[ + [3, 0, 0], + [0, 3, 0], + [0, 0, 3], + ], # in angstrom +) + + +def setup_neighbor_list(atomic_cell: ase.Atoms): + """Build a neighbor list for the ethane molecule.""" + return ase_nl.build_neighbor_list( + atomic_cell, + ase_nl.natural_cutoffs(atomic_cell, mult=1.0), + self_interaction=False, ) +@pytest.mark.parametrize( + 'atomic_cell, pair_references, triple_references', + [ + [ + ethane, + [ + ('C', 'C'), + ('C', 'H'), + ('H', 'H'), + ], + [ + ('C', 'C', 'C'), + ('C', 'C', 'H'), + ('C', 'H', 'H'), + ('H', 'C', 'C'), + ('H', 'C', 'H'), + ('H', 'H', 'H'), + ], + ] + ], +) +def test_distribution_factory( + atomic_cell: ase.Atoms, + pair_references: list[tuple[str, str]], + triple_references: list[tuple[str, str, str]], +): + """ + Check the correct generation of elemental paris and triples. + Important factors include: + - no duplicates: commutation laws apply, i.e. the whole pair and the last 2 elements in a triple + - alphabetical ordering of the elements + """ + df = DistributionFactory(atomic_cell, setup_neighbor_list(atomic_cell)) + assert df.get_elemental_pairs == pair_references + assert df.get_elemental_triples_centered == triple_references + + @pytest.mark.parametrize( 'atomic_cell, analysis_type, elements, references', [ - [setup_geometry_analysis(), 'distances', ['C', 'C'], [1.53]], - [setup_geometry_analysis(), 'distances', ['C', 'H'], [0.97] * 4 + [1.09] * 2], + [ethane, 'distances', ['C', 'C'], [1.53]], + [ethane, 'distances', ['C', 'H'], [0.97] * 4 + [1.09] * 2], [ - setup_geometry_analysis(), + ethane, 'angles', ['C', 'C', 'H'], [111.1] * 2 + [113.98] * 4, ], - [setup_geometry_analysis(), 'angles', ['C', 'H', 'H'], [120]], + [ethane, 'angles', ['C', 'H', 'H'], [120]], ], ) # references should be in ascending order def test_distribution( atomic_cell: AtomicCell, analysis_type: str, - elements: tuple[str], + elements: list[str], references: list[float], ): """ Check the actual interatomic distances against the expected values. """ - neighbor_list = ase_nl.build_neighbor_list( - atomic_cell, - ase_nl.natural_cutoffs(atomic_cell, mult=1.0), # ? test element X - self_interaction=False, - ) values = np.sort( Distribution( - elements, analysis_type, atomic_cell, neighbor_list + elements, analysis_type, atomic_cell, setup_neighbor_list(atomic_cell) ).values.magnitude ) From 81b3ece09b1225d0905fa922041fe67d29b64d9b Mon Sep 17 00:00:00 2001 From: ndaelman Date: Sun, 28 Apr 2024 22:22:23 +0200 Subject: [PATCH 12/30] - Add tests for `DistributionHistogram` - Fix bugs in src --- src/nomad_simulations/model_system.py | 16 +++++---- tests/test_system.py | 48 +++++++++++++++++++++++++-- 2 files changed, 54 insertions(+), 10 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 3686ec7b..e5975592 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -339,7 +339,7 @@ class GeometryDistribution(ArchiveSection): description=""" Denotes the elements involved. """, - ) + ) # ! add combo with a list of elements, but what about triples / quadruples centers? type = Quantity( type=MEnum('distances', 'angles'), @@ -396,22 +396,24 @@ class DistributionHistogram: def __init__( self, combo: tuple[str], - type: str, + type: str, # ? replace for automated check? distribution_values: np.ndarray = np.array([]), bins: np.ndarray = np.array([]), ) -> None: - self.name, self.combo = combo, convert_combo_to_name(combo) + self.combo = combo self.type = type - if len(bins) != len(distribution_values): - raise ValueError('Bins and counts must have the same length.') counts, self.bins = np.histogram(distribution_values.magnitude, bins=bins) self.bins *= distribution_values.u - self.frequency = counts / np.min(counts) # normalize so the minimum is 1 + # normalize so the minimum is 1 + if len(nonzero_counts := counts[np.where(counts > 0)]): + self.frequency = counts / np.min(nonzero_counts) + else: + self.frequency = counts def produce_nomad_distribution(self) -> GeometryDistribution: geom_dist = GeometryDistribution( - combo=self.combo, + name=convert_combo_to_name(self.combo), n_bins=len(self.bins), frequency=self.frequency, ) diff --git a/tests/test_system.py b/tests/test_system.py index 904c5493..e6e9a1b5 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -91,11 +91,12 @@ def test_distribution_factory( ): """ Check the correct generation of elemental paris and triples. - Important factors include: + Important factors (enforced by the references) include: - no duplicates: commutation laws apply, i.e. the whole pair and the last 2 elements in a triple - alphabetical ordering of the elements """ df = DistributionFactory(atomic_cell, setup_neighbor_list(atomic_cell)) + assert df.get_elemental_pairs == pair_references assert df.get_elemental_triples_centered == triple_references @@ -122,6 +123,9 @@ def test_distribution( ): """ Check the actual interatomic distances against the expected values. + Extra checks are performed for: + - distances: positive values only + - angles: values between 0 and 180 """ values = np.sort( Distribution( @@ -130,9 +134,47 @@ def test_distribution( ) if analysis_type == 'distances': - # only positive reference values allowed assert (0 <= values).all() # ? should this be restricted to positive definite elif analysis_type == 'angles': - # only angles between 0 and pi allowed assert (0 <= values).all() and (values <= 180).all() assert np.allclose(values, references, atol=0.01) # check the actual values + + +@pytest.mark.parametrize( + 'atomic_cell, elements, bins, count_reference_map', + [ + [ethane, ['C', 'C'], np.array([]), {}], # empty bins + [ethane, ['C', 'C'], np.arange(0, 2, 0.01), {1.53: 1}], + [ethane, ['C', 'H'], np.arange(0, 2, 0.01), {0.96: 2, 1.09: 1}], + [ethane, ['C', 'C', 'H'], np.arange(0, 180, 1), {111: 1, 113: 2}], + # [ethane, ['C', 'H', 'H'], np.arange(0, 180, 1), {120: 1}], + ], # note that the exact bin is hard to pin down: may deviate by 1 index +) +def test_distribution_histogram( + atomic_cell: AtomicCell, + elements: list[str], + bins: np.ndarray, + count_reference_map: dict[float, int], +): + """ + Check the conversion of a distribution into a histogram, such as: + Tests focusing on the binning include: + 1. test empty bins generating empty histogram + 2. test bin units + Lastly, the histogram frequency count is checked, with attention for: + 3. normalization of the count, so the proper lower limit is 1 + 4. the actual count + """ + to_type = 'distances' if len(elements) == 2 else 'angles' + dist = Distribution( + elements, to_type, atomic_cell, setup_neighbor_list(atomic_cell) + ) + dh = DistributionHistogram(elements, to_type, dist.values, bins) + + if len(bins) == 0: # 1. + assert len(dh.frequency) == 0 + return + assert dh.bins.u == ureg.angstrom if to_type == 'distances' else ureg.degrees # 2. + assert np.min(dh.frequency[dh.frequency > 0]) == 1 # 3. + for bin, count in count_reference_map.items(): # 4. + assert dh.frequency[np.where(dh.bins.magnitude == bin)] == count From 897ccb6c13aef86993be39b24d8f07bf9367cb6b Mon Sep 17 00:00:00 2001 From: ndaelman Date: Sun, 28 Apr 2024 22:56:44 +0200 Subject: [PATCH 13/30] Improve `analyze_geometry` check --- src/nomad_simulations/model_system.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index e5975592..d0e075d6 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -604,10 +604,7 @@ def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) if self.analyze_geometry: # set up neighbor list - if ( - self.geometry_analysis_cutoffs is None # ? needed - or len(self.geometry_analysis_cutoffs) == 0 - ): + if not self.geometry_analysis_cutoffs: self.geometry_analysis_cutoffs = ase_nl.natural_cutoffs( self.ase_atoms, mult=3.0 ) From 5440b4d4e522663a095757978c075270bc664c27 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Mon, 13 May 2024 10:29:26 +0200 Subject: [PATCH 14/30] Apply reviewer feedback --- src/nomad_simulations/model_system.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index d0e075d6..9b7c3f1c 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -319,7 +319,7 @@ def normalize(self, archive, logger) -> None: def convert_combo_to_name(combo: tuple[str]) -> str: - """Convert a combo tuple to a hypen-separated string. + """Convert a combo tuple to a hyphen-separated string. For angles, the center element is now indeed placed at the center.""" if 1 < len(combo) < 4: if len(combo) == 3: @@ -337,7 +337,9 @@ class GeometryDistribution(ArchiveSection): name = Quantity( type=str, description=""" - Denotes the elements involved. + Elements of which the geometry distribution is described. + The name is a hyphen-separated and sorted according to atomic number. + Central elements are placed in the middle. """, ) # ! add combo with a list of elements, but what about triples / quadruples centers? @@ -564,6 +566,7 @@ class AtomicCell(Cell): geometry_analysis_cutoffs = Quantity( type=np.float64, + unit='meter', shape=['*'], description=""" Cutoff radius within which atomic geometries are analyzed. @@ -605,12 +608,12 @@ def normalize(self, archive, logger: BoundLogger): if self.analyze_geometry: # set up neighbor list if not self.geometry_analysis_cutoffs: - self.geometry_analysis_cutoffs = ase_nl.natural_cutoffs( - self.ase_atoms, mult=3.0 + self.geometry_analysis_cutoffs = ( + ase_nl.natural_cutoffs(self.ase_atoms, mult=3.0) * ureg.angstrom ) self.neighbor_list = ase_nl.build_neighbor_list( self.ase_atoms, - self.geometry_analysis_cutoffs, + self.geometry_analysis_cutoffs.to('angstrom').magnitude, self_interaction=False, ) From fd1bff9a2cd175b314e61b6b6f37f6fb24842458 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Mon, 13 May 2024 10:33:45 +0200 Subject: [PATCH 15/30] Rename `GeometryDistribution.elements` to `GeometryDistribution.atom_labels` as per reviewer request --- src/nomad_simulations/model_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 9b7c3f1c..681aba31 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -350,7 +350,7 @@ class GeometryDistribution(ArchiveSection): """, ) - elements = Quantity( + atom_labels = Quantity( type=str, shape=['*'], description=""" From 1539c150a37e76cf78ebdd895ab477fd45561835 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Mon, 13 May 2024 12:30:58 +0200 Subject: [PATCH 16/30] - Split out `GeometryDistribution` in to subcases (resolving reviewer feedback) - Add dihedrals to computation --- src/nomad_simulations/model_system.py | 150 +++++++++++++++++++------- tests/test_system.py | 3 + 2 files changed, 115 insertions(+), 38 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 681aba31..51661406 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -318,16 +318,6 @@ def normalize(self, archive, logger) -> None: super().normalize(archive, logger) -def convert_combo_to_name(combo: tuple[str]) -> str: - """Convert a combo tuple to a hyphen-separated string. - For angles, the center element is now indeed placed at the center.""" - if 1 < len(combo) < 4: - if len(combo) == 3: - combo = (combo[1], combo[0], combo[2]) # flip order - return '-'.join(combo) - raise ValueError('Only pairs and triplets are supported.') - - class GeometryDistribution(ArchiveSection): """ A base section used to specify the geometry distributions of the system. @@ -338,25 +328,31 @@ class GeometryDistribution(ArchiveSection): type=str, description=""" Elements of which the geometry distribution is described. - The name is a hyphen-separated and sorted according to atomic number. - Central elements are placed in the middle. + The name is a hyphen-separated and sorted alphabetically. """, - ) # ! add combo with a list of elements, but what about triples / quadruples centers? + ) type = Quantity( - type=MEnum('distances', 'angles'), + type=str, description=""" Type of the geometry distribution described. """, ) - atom_labels = Quantity( + extremity_atom_labels = Quantity( type=str, - shape=['*'], + shape=[2], description=""" - Elements involved in the geometry distribution: - - 2 for distance distributions. - - 3 for angle distributions, with the center element first. + Elements for which the geometry distribution is defined. + (Note: the order of the elements is not algorithmically enforced.) + """, + ) # ? should we enforce this algorithmically. this also has implications for the distribution + + bins = Quantity( + type=np.float64, + shape=['n_bins'], + description=""" + Binning used to create the histogram. """, ) @@ -367,25 +363,85 @@ class GeometryDistribution(ArchiveSection): """, ) - bins = Quantity( + frequency = Quantity( type=np.float64, shape=['n_bins'], description=""" - Binning used to create the histogram. + Frequency count of the histogram. """, ) - frequency = Quantity( - type=np.float64, - shape=['n_bins'], + def normalize(self, archive, logger: BoundLogger): + super().normalize(archive, logger) + return self + + +class DistanceGeometryDistribution(GeometryDistribution): + bins = GeometryDistribution.bins.m_copy() + bins.unit = 'meter' + + def normalize(self, archive, logger: BoundLogger): + super().normalize(archive, logger) + self.name = '-'.join(self.extremity_atom_labels) + self.type = 'distances' + return self + + +class AngleGeometryDistribution(GeometryDistribution): + name = GeometryDistribution.name.m_copy() + name.description += """ + The center element is placed at the center of the name. + """ + + central_atom_labels = Quantity( + type=str, + shape=[1], description=""" - Frequency count of the histogram. + Element at the center of the angle. """, ) + bins = GeometryDistribution.bins.m_copy() + bins.unit = 'radian' + + def normalize(self, archive, logger: BoundLogger): + super().normalize(archive, logger) + self.name = '-'.join( + [self.extremity_atom_labels[0]] + + self.central_atom_labels + + [self.extremity_atom_labels[1]] + ) + self.type = 'angles' + return self + + +class DihedralGeometryDistribution(GeometryDistribution): + name = GeometryDistribution.name.m_copy() + name.description += """ + The central axis is placed in the middle, with the same ordering. + """ + + central_atom_labels = Quantity( + type=str, + shape=[2], + description=""" + Elements along which the axis of the dihedral angle is aligned. + The direction vector runs from element 1 to 2. + (Note: the order of the elements is not algorithmically enforced.) + """, + ) # ? should we enforce this algorithmically. this also has implications for the distribution + + bins = GeometryDistribution.bins.m_copy() + bins.unit = 'radian' + def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) - self.name = convert_combo_to_name(self.combo) + self.name = '-'.join( + [self.extremity_atom_labels[0]] + + self.central_atom_labels + + [self.extremity_atom_labels[1]] + ) + self.type = 'dihedrals' return self @@ -414,17 +470,22 @@ def __init__( self.frequency = counts def produce_nomad_distribution(self) -> GeometryDistribution: - geom_dist = GeometryDistribution( - name=convert_combo_to_name(self.combo), + if self.type == 'distances': + constructor = DistanceGeometryDistribution + elif self.type == 'angles': + constructor = AngleGeometryDistribution + elif self.type == 'dihedrals': + constructor = DihedralGeometryDistribution + else: + raise ValueError('Type not recognized.') + + geom_dist = constructor( + extremity_atom_labels=[self.combo[0], self.combo[-1]], n_bins=len(self.bins), frequency=self.frequency, ) - - # set the bin units in `GeometryDistribution` - geom_dist.all_quantities['bins'].unit = self.bins.to_preferred( - [ureg.m, ureg.radian] - ).units - geom_dist.bins = self.bins + if len(self.combo) > 2: + geom_dist.central_atom_labels = self.combo[1:-1] return geom_dist @@ -441,7 +502,7 @@ def __init__( ase_atoms: ase.Atoms, neighbor_list: ase_nl.NeighborList, ) -> None: - self.name, self.combo = combo, convert_combo_to_name(combo) + self.combo = combo self.type = type geom_analysis = ase_as.Analysis( @@ -453,7 +514,7 @@ def __init__( else: self.values = np.array([]) self.values *= ureg.angstrom - elif self.type == 'angles': + elif self.type == 'angles' or self.type == 'dihedrals': if len((matches := geom_analysis.get_angles(*combo, unique=True))[0]): self.values = np.array(geom_analysis.get_values(matches)) self.values = np.abs( @@ -498,11 +559,24 @@ def get_elemental_triples_centered(self) -> list[tuple[str, str, str]]: Permutations between the outer elements are not considered, i.e., (A, C, B) is converted into (A, B, C). """ return [ - (atom_c, atom_1, atom_2) + (atom_1, atom_c, atom_2) for atom_c in self._elements for i, atom_1 in enumerate(self._elements) for atom_2 in self._elements[i:] - ] + ] # matching the order of the `ase` package: https://wiki.fysik.dtu.dk/ase/_modules/ase/geometry/analysis.html#Analysis.get_angles + + @property + def get_elemental_quadruples_centered(self) -> list[tuple[str, str, str, str]]: + """Generate all possible quadruples of element symbols with the center elements first. + Permutations between the outer elements are not considered, i.e., (A, D, B, C) is converted into (A, B, C, D). + """ + return [ + (atom_1, atom_c, atom_d, atom_2) + for atom_c in self._elements + for atom_d in self._elements + for i, atom_1 in enumerate(self._elements) + for atom_2 in self._elements[i:] + ] # matching the order of the `ase` package: https://wiki.fysik.dtu.dk/ase/_modules/ase/geometry/analysis.html#Analysis.get_dihedrals def produce_distributions(self) -> list[Distribution]: """Generate all possible distributions of distances and angles diff --git a/tests/test_system.py b/tests/test_system.py index e6e9a1b5..4c1a39bb 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -178,3 +178,6 @@ def test_distribution_histogram( assert np.min(dh.frequency[dh.frequency > 0]) == 1 # 3. for bin, count in count_reference_map.items(): # 4. assert dh.frequency[np.where(dh.bins.magnitude == bin)] == count + + +# ! add normalization test for `GeometryDistribution`, i.e. `dh.produce_nomad_distribution().normalize(None, None)` From 333f15c8cf91f25cd7166871e25e470a9095580a Mon Sep 17 00:00:00 2001 From: ndaelman Date: Mon, 13 May 2024 12:55:01 +0200 Subject: [PATCH 17/30] Correct typing (mypy) --- src/nomad_simulations/model_system.py | 40 +++++++++++++-------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 51661406..b460255a 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -22,8 +22,9 @@ import ase from ase import neighborlist as ase_nl from ase.geometry import analysis as ase_as -from typing import Tuple, Optional +from typing import Optional, Callable from structlog.stdlib import BoundLogger +import pint from matid import SymmetryAnalyzer, Classifier # pylint: disable=import-error from matid.classification.classifications import ( @@ -49,13 +50,13 @@ from .utils import get_sibling_section, is_not_representative -def check_attributes(attributes: list[str]) -> Optional[callable]: +def check_attributes(attributes: list[str]) -> Optional[Callable]: """ Check if the specified attributes are not None. """ - def decorator(func: callable) -> callable: - def wrapper(self, *args, **kwargs) -> Optional[callable]: + def decorator(func: Callable) -> Callable: + def wrapper(self, *args, **kwargs) -> Optional[Callable]: for attr in attributes: if attr not in self.all_quantities: # ! verify self.logger.error( @@ -453,10 +454,10 @@ class DistributionHistogram: def __init__( self, - combo: tuple[str], + combo: list[str], type: str, # ? replace for automated check? - distribution_values: np.ndarray = np.array([]), - bins: np.ndarray = np.array([]), + distribution_values: pint.Quantity, + bins: np.ndarray, ) -> None: self.combo = combo self.type = type @@ -483,6 +484,7 @@ def produce_nomad_distribution(self) -> GeometryDistribution: extremity_atom_labels=[self.combo[0], self.combo[-1]], n_bins=len(self.bins), frequency=self.frequency, + bins=self.bins, ) if len(self.combo) > 2: geom_dist.central_atom_labels = self.combo[1:-1] @@ -497,7 +499,7 @@ class Distribution: def __init__( self, - combo: tuple[str], + combo: list[str], type: str, ase_atoms: ase.Atoms, neighbor_list: ase_nl.NeighborList, @@ -544,34 +546,34 @@ def __init__( self._elements = sorted(list(set(self.ase_atoms.get_chemical_symbols()))) @property - def get_elemental_pairs(self) -> list[tuple[str, str]]: + def get_elemental_pairs(self) -> list[list[str]]: """Generate all possible pairs of element symbols. Permutations are not considered, i.e., (B, A) is converted into (A, B).""" return [ - (atom_1, atom_2) + [atom_1, atom_2] for i, atom_1 in enumerate(self._elements) for atom_2 in self._elements[i:] ] @property - def get_elemental_triples_centered(self) -> list[tuple[str, str, str]]: + def get_elemental_triples_centered(self) -> list[list[str]]: """Generate all possible triples of element symbols with the center element first. Permutations between the outer elements are not considered, i.e., (A, C, B) is converted into (A, B, C). """ return [ - (atom_1, atom_c, atom_2) + [atom_1, atom_c, atom_2] for atom_c in self._elements for i, atom_1 in enumerate(self._elements) for atom_2 in self._elements[i:] ] # matching the order of the `ase` package: https://wiki.fysik.dtu.dk/ase/_modules/ase/geometry/analysis.html#Analysis.get_angles @property - def get_elemental_quadruples_centered(self) -> list[tuple[str, str, str, str]]: + def get_elemental_quadruples_centered(self) -> list[list[str]]: """Generate all possible quadruples of element symbols with the center elements first. Permutations between the outer elements are not considered, i.e., (A, D, B, C) is converted into (A, B, C, D). """ return [ - (atom_1, atom_c, atom_d, atom_2) + [atom_1, atom_c, atom_d, atom_2] for atom_c in self._elements for atom_d in self._elements for i, atom_1 in enumerate(self._elements) @@ -700,10 +702,8 @@ def normalize(self, archive, logger: BoundLogger): self.histogram_distributions, self.distributions = [], [] for distribution in self.simple_distributions: if distribution.type == 'distances': - bins = np.arange( - np.arange(0, max(self.geometry_analysis_cutoffs), 0.001) - ) - elif distribution.type == 'angles': + bins = np.arange(0, max(self.geometry_analysis_cutoffs), 0.001) + elif distribution.type == 'angles' or distribution.type == 'dihedrals': bins = np.arange(0, 180, 0.01) self.histogram_distributions.append( distribution.produce_histogram(bins) @@ -870,7 +870,7 @@ def resolve_analyzed_atomic_cell( def resolve_bulk_symmetry( self, original_atomic_cell: AtomicCell, logger: BoundLogger - ) -> Tuple[Optional[AtomicCell], Optional[AtomicCell]]: + ) -> tuple[Optional[AtomicCell], Optional[AtomicCell]]: """ Resolves the symmetry of the material being simulated using MatID and the originally parsed data under original_atomic_cell. It generates two other @@ -1234,7 +1234,7 @@ class ModelSystem(System): def resolve_system_type_and_dimensionality( self, ase_atoms: ase.Atoms, logger: BoundLogger - ) -> Tuple[str, int]: + ) -> tuple[str, int]: """ Resolves the `ModelSystem.type` and `ModelSystem.dimensionality` using `MatID` classification analyzer: From e8f59f8db7dfeb546e7d1e8a908ca7f12259d065 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 16:01:54 +0200 Subject: [PATCH 18/30] Fix indentation error `AtomicCell.to_ase_atoms()` --- src/nomad_simulations/model_system.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index d3dacc0e..ef1f7b43 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -650,8 +650,7 @@ class AtomicCell(Cell): """, ) - -def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: + def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: """ Generates an ASE Atoms object with the most basic information from the parsed `AtomicCell` section (labels, periodic_boundary_conditions, positions, and lattice_vectors). @@ -694,7 +693,6 @@ def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: return ase_atoms - def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs): super().__init__(m_def, m_context, **kwargs) self.name = self.m_def.name @@ -933,13 +931,13 @@ def resolve_bulk_symmetry( symmetry['hall_symbol'] = symmetry_analyzer.get_hall_symbol() symmetry['point_group_symbol'] = symmetry_analyzer.get_point_group() symmetry['space_group_number'] = symmetry_analyzer.get_space_group_number() - symmetry['space_group_symbol'] = ( - symmetry_analyzer.get_space_group_international_short() - ) + symmetry[ + 'space_group_symbol' + ] = symmetry_analyzer.get_space_group_international_short() symmetry['origin_shift'] = symmetry_analyzer._get_spglib_origin_shift() - symmetry['transformation_matrix'] = ( - symmetry_analyzer._get_spglib_transformation_matrix() - ) + symmetry[ + 'transformation_matrix' + ] = symmetry_analyzer._get_spglib_transformation_matrix() # Populating the originally parsed AtomicCell wyckoff_letters and equivalent_atoms information original_wyckoff = symmetry_analyzer.get_wyckoff_letters_original() From 208c2ba85e6dd25d206da6108247ab10acd4eb92 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 16:17:28 +0200 Subject: [PATCH 19/30] Move `bond_list` from `Cell` ro `AtomicCell` upon reviewer request --- src/nomad_simulations/model_system.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index ef1f7b43..0b7ff078 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -255,14 +255,6 @@ class Cell(GeometricSpace): """, ) - # topology - bond_list = Quantity( - type=np.int32, - description=""" - List of index pairs corresponding to (covalent) bonds, e.g., as defined by a force field. - """, - ) # ? @JosephRudzinski: move position? - # geometry and analysis positions = Quantity( type=np.float64, @@ -602,6 +594,7 @@ class AtomicCell(Cell): repeats=True, ) + # geometry geometry_distributions = SubSection( sub_section=GeometryDistribution.m_def, repeats=True, @@ -620,6 +613,7 @@ class AtomicCell(Cell): velocities = Cell.velocities.m_copy() velocities.shape = ['n_atoms', 3] # check the appropriate shape + # symmetry equivalent_atoms = Quantity( type=np.int32, shape=['n_atoms'], @@ -650,6 +644,14 @@ class AtomicCell(Cell): """, ) + # topology + bond_list = Quantity( + type=np.int32, + description=""" + List of index pairs corresponding to (covalent) bonds, e.g., as defined by a force field. + """, + ) + def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: """ Generates an ASE Atoms object with the most basic information from the parsed `AtomicCell` From a398857970b07c8b8ae17d62a93672563ab2bd75 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 16:25:07 +0200 Subject: [PATCH 20/30] Reapply `@check_attributes` to `to_ase_atoms` --- src/nomad_simulations/model_system.py | 55 ++++++++------------------- 1 file changed, 16 insertions(+), 39 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 0b7ff078..4eec19f5 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -652,47 +652,24 @@ class AtomicCell(Cell): """, ) - def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: + @check_attributes( + attributes=[ + 'atoms_state', + 'lattice_vectors', + 'positions', + 'periodic_boundary_conditions', + ], + ) + def to_ase_atoms(self): """ - Generates an ASE Atoms object with the most basic information from the parsed `AtomicCell` - section (labels, periodic_boundary_conditions, positions, and lattice_vectors). - - Args: - logger (BoundLogger): The logger to log messages. - - Returns: - (Optional[ase.Atoms]): The ASE Atoms object with the basic information from the `AtomicCell`. + Generate an `ase.Atoms` object from `AtomicCell`. """ - # Initialize ase.Atoms object with labels - atoms_labels = [atom_state.chemical_symbol for atom_state in self.atoms_state] - ase_atoms = ase.Atoms(symbols=atoms_labels) - - # PBC - if self.periodic_boundary_conditions is None: - logger.info( - 'Could not find `AtomicCell.periodic_boundary_conditions`. They will be set to [False, False, False].' - ) - self.periodic_boundary_conditions = [False, False, False] - ase_atoms.set_pbc(self.periodic_boundary_conditions) - - # Lattice vectors - if self.lattice_vectors is not None: - ase_atoms.set_cell(self.lattice_vectors.to('angstrom').magnitude) - else: - logger.info('Could not find `AtomicCell.lattice_vectors`.') - - # Positions - if self.positions is not None: - if len(self.positions) != len(self.atoms_state): - logger.error( - 'Length of `AtomicCell.positions` does not coincide with the length of the `AtomicCell.atoms_state`.' - ) - return None - ase_atoms.set_positions(self.positions.to('angstrom').magnitude) - else: - logger.warning('Could not find `AtomicCell.positions`.') - return None - + ase_atoms = ase.Atoms( + symbols=[atom_state.chemical_symbol for atom_state in self.atoms_state], + pbc=self.periodic_boundary_conditions, + positions=self.positions.to('angstrom').magnitude, + cell=self.lattice_vectors.to('angstrom').magnitude, + ) return ase_atoms def __init__(self, m_def: Section = None, m_context: Context = None, **kwargs): From 7265b53204d011f1fb34cca44dd40355e9f9abf9 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 17:24:42 +0200 Subject: [PATCH 21/30] - Remove `positions` and `velocities` from `AtomicCell`, in favor of those in `Cell`. - Rename `Cell.n_cell_points` to `Cell.n_objects` --- src/nomad_simulations/model_system.py | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 4eec19f5..a1acaed8 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -248,7 +248,7 @@ class Cell(GeometricSpace): """, ) - n_cell_points = Quantity( + n_objects = Quantity( type=np.int32, description=""" Number of cell points. @@ -263,8 +263,7 @@ class Cell(GeometricSpace): description=""" Positions of all the atoms in absolute, Cartesian coordinates. """, - ) # ? @JosePizarro: The description is already referencing to atoms: shouldn't it part of `AtomicCell` then? Or maybe we use the term "building block"? - # ! @JosePizarro: the `positions` here has a different meaning than in `AtomicCell` + ) velocities = Quantity( type=np.float64, @@ -274,7 +273,7 @@ class Cell(GeometricSpace): Velocities of the atoms defined with respect to absolute, Cartesian coordinate frame centered at the respective atom position. """, - ) # ? @JosePizarro: The description already referencing to atoms: shouldn't it part of `AtomicCell` then? Or maybe we use the term "building block"? + ) # box lattice_vectors = Quantity( @@ -600,23 +599,10 @@ class AtomicCell(Cell): repeats=True, ) - n_atoms = Quantity( - type=np.int32, - description=""" - Number of atoms in the atomic cell. - """, - ) - - positions = Cell.positions.m_copy() - positions.shape = ['n_atoms', 3] # check the appropriate shape - - velocities = Cell.velocities.m_copy() - velocities.shape = ['n_atoms', 3] # check the appropriate shape - # symmetry equivalent_atoms = Quantity( type=np.int32, - shape=['n_atoms'], + shape=['n_objects'], description=""" List of equivalent atoms as defined in `atoms`. If no equivalent atoms are found, then the list is simply the index of each element, e.g.: @@ -628,7 +614,7 @@ class AtomicCell(Cell): # ! improve description and clarify whether this belongs to `Symmetry` with @lauri-codes wyckoff_letters = Quantity( type=str, - shape=['n_atoms'], + shape=['n_objects'], description=""" Wyckoff letters associated with each atom. """, From 4333a53c4e786acf3d7860616258393032f74787 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 18:50:27 +0200 Subject: [PATCH 22/30] Add test for `GeometryDistribution` (instantiation and normalization) --- src/nomad_simulations/model_system.py | 4 +-- tests/test_system.py | 41 +++++++++++++++++++++++++-- 2 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index a1acaed8..a325f794 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -394,7 +394,7 @@ class AngleGeometryDistribution(GeometryDistribution): ) bins = GeometryDistribution.bins.m_copy() - bins.unit = 'radian' + bins.unit = 'degrees' def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) @@ -424,7 +424,7 @@ class DihedralGeometryDistribution(GeometryDistribution): ) # ? should we enforce this algorithmically. this also has implications for the distribution bins = GeometryDistribution.bins.m_copy() - bins.unit = 'radian' + bins.unit = 'degrees' def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) diff --git a/tests/test_system.py b/tests/test_system.py index 4c1a39bb..895181c1 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -26,7 +26,6 @@ from nomad.units import ureg from nomad_simulations.model_system import ( AtomicCell, - AtomsState, Distribution, DistributionHistogram, DistributionFactory, @@ -146,7 +145,7 @@ def test_distribution( [ethane, ['C', 'C'], np.array([]), {}], # empty bins [ethane, ['C', 'C'], np.arange(0, 2, 0.01), {1.53: 1}], [ethane, ['C', 'H'], np.arange(0, 2, 0.01), {0.96: 2, 1.09: 1}], - [ethane, ['C', 'C', 'H'], np.arange(0, 180, 1), {111: 1, 113: 2}], + [ethane, ['C', 'H', 'C'], np.arange(0, 180, 1), {111: 1, 113: 2}], # [ethane, ['C', 'H', 'H'], np.arange(0, 180, 1), {120: 1}], ], # note that the exact bin is hard to pin down: may deviate by 1 index ) @@ -180,4 +179,40 @@ def test_distribution_histogram( assert dh.frequency[np.where(dh.bins.magnitude == bin)] == count -# ! add normalization test for `GeometryDistribution`, i.e. `dh.produce_nomad_distribution().normalize(None, None)` +@pytest.mark.parametrize( + 'elements', + [['C', 'C'], ['C', 'H'], ['C', 'H', 'C'], ['C', 'H', 'H']], +) +def test_nomad_distribution(elements): + """ + Check the instantiation of `GeometryDistribution`. via `DistributionHistogram`. + Specifically, check sub-typing, value storage, and reporting of the structure. + """ + # define variables + gd = 'GeometryDistribution' + if len(elements) == 2: + subtype = 'Distance' + bins = np.arange(0, 2, 0.01) + units = ureg.angstrom + else: + subtype = 'Angle' + bins = np.arange(0, 180, 0.001) + units = ureg.degrees + + # instantiate objects + dh = DistributionHistogram( + elements, + subtype.lower() + 's', + np.random.rand(len(bins)) * units, + bins, + ) + nomad_dh = dh.produce_nomad_distribution().normalize(None, None) + + # test + assert nomad_dh.__class__.__name__ == subtype + gd + assert np.all(dh.frequency == nomad_dh.frequency) + assert np.allclose(dh.bins, nomad_dh.bins.to(units), atol=0.01) + + assert nomad_dh.extremity_atom_labels == [elements[0], elements[-1]] + if subtype == 'Angle': + assert nomad_dh.central_atom_labels == [elements[1]] From 2b9c28760eb9e06c018c00c788ff83948074035f Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 21:51:10 +0200 Subject: [PATCH 23/30] - Add test case to `test_distribution_histogram` - Update tests to be in-line with new elemental order (central atoms in the middle) --- tests/test_system.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/test_system.py b/tests/test_system.py index 895181c1..61ca476d 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -68,17 +68,17 @@ def setup_neighbor_list(atomic_cell: ase.Atoms): [ ethane, [ - ('C', 'C'), - ('C', 'H'), - ('H', 'H'), + ['C', 'C'], + ['C', 'H'], + ['H', 'H'], ], [ - ('C', 'C', 'C'), - ('C', 'C', 'H'), - ('C', 'H', 'H'), - ('H', 'C', 'C'), - ('H', 'C', 'H'), - ('H', 'H', 'H'), + ['C', 'C', 'C'], + ['C', 'C', 'H'], + ['H', 'C', 'H'], + ['C', 'H', 'C'], + ['C', 'H', 'H'], + ['H', 'H', 'H'], ], ] ], @@ -145,8 +145,8 @@ def test_distribution( [ethane, ['C', 'C'], np.array([]), {}], # empty bins [ethane, ['C', 'C'], np.arange(0, 2, 0.01), {1.53: 1}], [ethane, ['C', 'H'], np.arange(0, 2, 0.01), {0.96: 2, 1.09: 1}], - [ethane, ['C', 'H', 'C'], np.arange(0, 180, 1), {111: 1, 113: 2}], - # [ethane, ['C', 'H', 'H'], np.arange(0, 180, 1), {120: 1}], + [ethane, ['C', 'C', 'H'], np.arange(0, 180, 1), {111: 1, 113: 2}], + [ethane, ['H', 'C', 'H'], np.arange(0, 180, 1), {131: 1}], # why 131? ], # note that the exact bin is hard to pin down: may deviate by 1 index ) def test_distribution_histogram( @@ -181,7 +181,7 @@ def test_distribution_histogram( @pytest.mark.parametrize( 'elements', - [['C', 'C'], ['C', 'H'], ['C', 'H', 'C'], ['C', 'H', 'H']], + [['C', 'C'], ['C', 'H'], ['C', 'C', 'H'], ['C', 'H', 'H']], ) def test_nomad_distribution(elements): """ From a3ac6a39107697b7dfdcefd295b6102ca104a151 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 21:58:52 +0200 Subject: [PATCH 24/30] Change `GeometricDistribution` units for angles from degrees to radian --- src/nomad_simulations/model_system.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index a325f794..a1acaed8 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -394,7 +394,7 @@ class AngleGeometryDistribution(GeometryDistribution): ) bins = GeometryDistribution.bins.m_copy() - bins.unit = 'degrees' + bins.unit = 'radian' def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) @@ -424,7 +424,7 @@ class DihedralGeometryDistribution(GeometryDistribution): ) # ? should we enforce this algorithmically. this also has implications for the distribution bins = GeometryDistribution.bins.m_copy() - bins.unit = 'degrees' + bins.unit = 'radian' def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) From 90238379b5c7697c67a026bf0097a04cecc23e33 Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 22:16:12 +0200 Subject: [PATCH 25/30] - Add documentation - TODO: add testing for dihedral angles - TODO: decide on binning --- docs/model_system/model_system.md | 35 ++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/docs/model_system/model_system.md b/docs/model_system/model_system.md index 037ee64c..6724efd6 100644 --- a/docs/model_system/model_system.md +++ b/docs/model_system/model_system.md @@ -1,4 +1,37 @@ # `ModelSystem` !!! warning - This page is still under construction. \ No newline at end of file + This page is still under construction. + +## Distributions of geometric properties + +### Schema structure +These distributions are stored in `AtomicCell.geometry_distributions`, which is a list of repeating `GeometryDistribution` subsections. +Information on the bins and their setup parameters (e.g. neighbor cutoff distances), are meanwhile stored under `AtomicCell` directly. + +`GeometryDistribution` objects are effectively histograms that specialize further in sections for elemental pairs / triples / quadruples. +This is a choice to facilitate searches and visualizations over the distribution themselves, which are normalized to reproduce the same frequency for primitive cells as supercells. +Additional advantages include a limited storage consumption. +It is, however, not suitable for extracting the exacting distances / angles / dihedral angles by a given value. + +!!! note + Triples / quadruples are defined around a specific geometric primitive, i.e. a point / arrow vector. + The elements making up these primitives are stored under `central_atom_labels`. + +### Obejct initialization +To keep state within `AtomicCell` simple and exert control over the stages in calculation, we use pure Python helper classes. +Each class tackles a specific stage, in line with the _Single Responsibility Principle_: + + - `DistributionFactory` scans the combinatorial space of elements and generates their `Distributions`. + - `Distribution` leverages `ase` for computing the distances / (dihedral) angles for the elemental combo provided. + It can also instantiate a `DistributionHistogram` of itself. + - `DistributionHistogram` contains the histogram version of their distribution. + +While only `DistributionHistogram` will eventually be written out to `GeometryDistribution`, the rest of the data artifacts are retained under `AtomicCell` during run-time. +As such, once computed, they can be used in other computation, or data analysis. + +!!! info + Conversion to `pandas.DataFrame` is a planned feature. + +Inclusion of the actual computation pipeline, meanwhile, is toggled via the boolean `AtomicCell.analyze_geometry`. +The execution itself is handled by the normalizer. \ No newline at end of file From 4da90603da20af2efd1529102b5d9b08c860b47e Mon Sep 17 00:00:00 2001 From: ndaelman Date: Fri, 17 May 2024 22:30:52 +0200 Subject: [PATCH 26/30] Fix mypy typing nameclash (gosh, mypy sucks) --- src/nomad_simulations/model_system.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index a1acaed8..6426b332 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -453,8 +453,8 @@ def __init__( self.combo = combo self.type = type - counts, self.bins = np.histogram(distribution_values.magnitude, bins=bins) - self.bins *= distribution_values.u + counts, _bins = np.histogram(distribution_values.magnitude, bins=bins) + self.bins = _bins * distribution_values.u # normalize so the minimum is 1 if len(nonzero_counts := counts[np.where(counts > 0)]): self.frequency = counts / np.min(nonzero_counts) From 6a6305ae2635923b20f6171580a476ac32a34206 Mon Sep 17 00:00:00 2001 From: nathan Date: Fri, 24 May 2024 00:21:47 +0200 Subject: [PATCH 27/30] - Build in non-stored default for bins in archive storage - Migrate cutoff quantities to `GeometryDistribution` - Update neighbor_list in pipeline and normalization - Update docs - TODO: add bin units - TODO: verify neighbor_list elements - TODO: remove `check_attributes` --- docs/model_system/model_system.md | 7 +- src/nomad_simulations/model_system.py | 174 ++++++++++++++++++-------- 2 files changed, 126 insertions(+), 55 deletions(-) diff --git a/docs/model_system/model_system.md b/docs/model_system/model_system.md index 6724efd6..0b09eac9 100644 --- a/docs/model_system/model_system.md +++ b/docs/model_system/model_system.md @@ -6,8 +6,9 @@ ## Distributions of geometric properties ### Schema structure -These distributions are stored in `AtomicCell.geometry_distributions`, which is a list of repeating `GeometryDistribution` subsections. -Information on the bins and their setup parameters (e.g. neighbor cutoff distances), are meanwhile stored under `AtomicCell` directly. +These distributions are stored under `AtomicCell`, with different specializations as repeating subsections of `GeometryDistribution` or its child classes. +This separation simplifies filtering to the `results` section and helps with indexing, as bin distributions tend to follow the default. +Other information, such as cutoff distances used to construct the neighbor list, are also stored here. `GeometryDistribution` objects are effectively histograms that specialize further in sections for elemental pairs / triples / quadruples. This is a choice to facilitate searches and visualizations over the distribution themselves, which are normalized to reproduce the same frequency for primitive cells as supercells. @@ -18,7 +19,7 @@ It is, however, not suitable for extracting the exacting distances / angles / di Triples / quadruples are defined around a specific geometric primitive, i.e. a point / arrow vector. The elements making up these primitives are stored under `central_atom_labels`. -### Obejct initialization +### Object initialization To keep state within `AtomicCell` simple and exert control over the stages in calculation, we use pure Python helper classes. Each class tackles a specific stage, in line with the _Single Responsibility Principle_: diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 6426b332..42644284 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -210,11 +210,11 @@ def get_geometric_space_for_atomic_cell(self, logger: BoundLogger) -> None: cell.lengths() * ureg.angstrom ) self.angle_vectors_b_c, self.angle_vectors_a_c, self.angle_vectors_a_b = ( - cell.angles() * ureg.degree + cell.angles() * ureg.radians ) self.volume = cell.volume * ureg.angstrom**3 - def normalize(self, archive, logger: BoundLogger) -> None: + def normalize(self, archive: ArchiveSection, logger: BoundLogger) -> None: # Skip normalization for `Entity` try: self.get_geometric_space_for_atomic_cell(logger) @@ -340,6 +340,31 @@ class GeometryDistribution(ArchiveSection): """, ) # ? should we enforce this algorithmically. this also has implications for the distribution + n_cutoff_specifications = Quantity( + type=np.int32, + description=""" + Number of cutoff specifications used to analyze the geometry. + """, + ) + + element_cutoff_selection = Quantity( + type=str, + shape=['n_cutoff_specifications'], + description=""" + Elements for which cutoffs are defined. The order of the elements corresponds to those in `distance_cutoffs`. + """, + ) + + distance_cutoffs = Quantity( + type=np.float64, + unit='meter', + shape=['n_cutoff_specifications'], + description=""" + Cutoff radii within which atomic geometries are analyzed. This incidentally defines the upper-bound for distances. + Defaults to 3x the covalent radius denoted by the [`ase` package](https://wiki.fysik.dtu.dk/ase/ase/neighborlist.html). + """, + ) + bins = Quantity( type=np.float64, shape=['n_bins'], @@ -363,6 +388,23 @@ class GeometryDistribution(ArchiveSection): """, ) + def get_bins(self) -> np.ndarray: + """Return the distribution bins in their SI units. + Fall back to a default value if none are present.""" + return self.bins + + def sort_cutoffs(self, sort_key: Callable = lambda x: ase.Atom(x).number) -> None: + """Sort the cutoffs and the elements accordingly. + Args: + sort_key (Callable): A function to sort the elements. Defaults to the atomic number. + Returns: + GeometryDistribution: The sorted geometry distribution object itself. + """ + sorted_indices = np.argsort(self.element_cutoff_selection, key=sort_key) + self.distance_cutoffs = self.distance_cutoffs[sorted_indices] + self.element_cutoff_selection = self.element_cutoff_selection[sorted_indices] + return self + def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) return self @@ -394,7 +436,14 @@ class AngleGeometryDistribution(GeometryDistribution): ) bins = GeometryDistribution.bins.m_copy() - bins.unit = 'radian' + bins.unit = 'radians' + + def get_bins(self) -> np.ndarray: + """Return the distribution bins in their SI units, i.e. radians. + Fall back to a default value, i.e. [0 ... 0.01 ... 180] degrees, if none are present.""" + if not self.bins: + return (np.arange(0, 180, 0.01) * ureg.degrees).to('radians') + return self.bins def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) @@ -408,7 +457,9 @@ def normalize(self, archive, logger: BoundLogger): class DihedralGeometryDistribution(GeometryDistribution): - name = GeometryDistribution.name.m_copy() + name = ( + GeometryDistribution.name.m_copy() + ) # ? forego storage of constant values for @property name.description += """ The central axis is placed in the middle, with the same ordering. """ @@ -424,7 +475,14 @@ class DihedralGeometryDistribution(GeometryDistribution): ) # ? should we enforce this algorithmically. this also has implications for the distribution bins = GeometryDistribution.bins.m_copy() - bins.unit = 'radian' + bins.unit = 'radians' + + def get_bins(self) -> np.ndarray: + """Return the distribution bins in their SI units, i.e. radians. + Fall back to a default value, i.e. [0 ... 0.01 ... 180] degrees, if none are present.""" + if not self.bins: + return (np.arange(0, 180, 0.01) * ureg.degrees).to('radians') + return self.bins def normalize(self, archive, logger: BoundLogger): super().normalize(archive, logger) @@ -449,34 +507,46 @@ def __init__( type: str, # ? replace for automated check? distribution_values: pint.Quantity, bins: np.ndarray, + neighbor_list: pint.Quantity, ) -> None: self.combo = combo self.type = type + self.neighbor_list = neighbor_list + # normalize so the minimum is 1 counts, _bins = np.histogram(distribution_values.magnitude, bins=bins) self.bins = _bins * distribution_values.u - # normalize so the minimum is 1 if len(nonzero_counts := counts[np.where(counts > 0)]): self.frequency = counts / np.min(nonzero_counts) else: self.frequency = counts def produce_nomad_distribution(self) -> GeometryDistribution: + # select the constructor based on the type if self.type == 'distances': constructor = DistanceGeometryDistribution elif self.type == 'angles': constructor = AngleGeometryDistribution elif self.type == 'dihedrals': constructor = DihedralGeometryDistribution + elif self.type == 'generic': + constructor = GeometryDistribution else: raise ValueError('Type not recognized.') + # instantiate the NOMAD distribution geom_dist = constructor( extremity_atom_labels=[self.combo[0], self.combo[-1]], - n_bins=len(self.bins), frequency=self.frequency, - bins=self.bins, + n_cutoff_specifications=len(self.neighbor_list), + element_cutoff_selection=self.combo, # ! check if this is correct + distance_cutoffs=self.neighbor_list, ) + if self.type == 'distances': + geom_dist.n_bins = len(self.bins) + geom_dist.bins = self.bins + + # add the central atom labels if they exist if len(self.combo) > 2: geom_dist.central_atom_labels = self.combo[1:-1] return geom_dist @@ -484,8 +554,8 @@ def produce_nomad_distribution(self) -> GeometryDistribution: class Distribution: """ - A helper class to compute the distribution of a given geometry type, i.e., distances or angles. - It can also generate a histogram class via `produce_histogram`. + A helper class to compute the distribution of a given geometry type, i.e., distances or (dihedral) angles. + Also generates a histogram class via `produce_histogram`. """ def __init__( @@ -493,13 +563,14 @@ def __init__( combo: list[str], type: str, ase_atoms: ase.Atoms, - neighbor_list: ase_nl.NeighborList, + neighbor_list: pint.Quantity, ) -> None: self.combo = combo self.type = type + self.neighbor_list = neighbor_list geom_analysis = ase_as.Analysis( - ase_atoms, neighbor_list + ase_atoms, neighbor_list.to('angstrom').magnitude ) # slightly less optimal if self.type == 'distances': if len((matches := geom_analysis.get_bonds(*combo, unique=True))[0]): @@ -511,16 +582,18 @@ def __init__( if len((matches := geom_analysis.get_angles(*combo, unique=True))[0]): self.values = np.array(geom_analysis.get_values(matches)) self.values = np.abs( - np.where(self.values > 180, 360 - self.values, self.values) - ) # restrict to [0, 180] + np.where(self.values > np.pi, 2 * np.pi - self.values, self.values) + ) # restrict to [0, pi] else: self.values = np.array([]) - self.values *= ureg.degree + self.values *= ureg.radians else: raise ValueError('Type not recognized.') def produce_histogram(self, bins: np.ndarray) -> DistributionHistogram: - return DistributionHistogram(self.combo, self.type, self.values, bins) + return DistributionHistogram( + self.combo, self.type, self.values, bins, self.neighbor_list + ) class DistributionFactory: @@ -529,9 +602,7 @@ class DistributionFactory: for all element paris or triples. It will produce a list of `Distribution` objects via `produce_distributions`. """ - def __init__( - self, ase_atoms: ase.Atoms, neighbor_list: ase_nl.NeighborList - ) -> None: + def __init__(self, ase_atoms: ase.Atoms, neighbor_list: pint.Quantity) -> None: self.ase_atoms = ase_atoms self.neighbor_list = neighbor_list self._elements = sorted(list(set(self.ase_atoms.get_chemical_symbols()))) @@ -593,7 +664,19 @@ class AtomicCell(Cell): repeats=True, ) - # geometry + # geometry distributions + ## I separated the distributions into different categories + ## to facilitate indexing when copied over to `results` section + distance_distributions = SubSection( + sub_section=DistanceGeometryDistribution.m_def, + repeats=True, + ) + + angle_distributions = SubSection( + sub_section=AngleGeometryDistribution.m_def, + repeats=True, + ) + geometry_distributions = SubSection( sub_section=GeometryDistribution.m_def, repeats=True, @@ -620,16 +703,6 @@ class AtomicCell(Cell): """, ) - geometry_analysis_cutoffs = Quantity( - type=np.float64, - unit='meter', - shape=['*'], - description=""" - Cutoff radius within which atomic geometries are analyzed. - Defaults to 3x the covalent radius denoted by the [`ase` package](https://wiki.fysik.dtu.dk/ase/ase/neighborlist.html). - """, - ) - # topology bond_list = Quantity( type=np.int32, @@ -672,35 +745,32 @@ def normalize(self, archive, logger: BoundLogger): self.name = self.m_def.name if self.name is None else self.name if self.analyze_geometry: - # set up neighbor list - if not self.geometry_analysis_cutoffs: - self.geometry_analysis_cutoffs = ( - ase_nl.natural_cutoffs(self.ase_atoms, mult=3.0) * ureg.angstrom - ) - self.neighbor_list = ase_nl.build_neighbor_list( - self.ase_atoms, - self.geometry_analysis_cutoffs.to('angstrom').magnitude, - self_interaction=False, - ) - # set up distributions + cutoffs = ase_nl.natural_cutoffs(self.ase_atoms, mult=3.0) * ureg.angstrom self.simple_distributions = DistributionFactory( self.ase_atoms, - self.neighbor_list, + ase_nl.build_neighbor_list( + self.ase_atoms, + cutoffs.magnitude, # ase works in angstrom + self_interaction=False, + ), ).produce_distributions() + # write distributions to the archive self.histogram_distributions, self.distributions = [], [] for distribution in self.simple_distributions: + # set binning if distribution.type == 'distances': - bins = np.arange(0, max(self.geometry_analysis_cutoffs), 0.001) - elif distribution.type == 'angles' or distribution.type == 'dihedrals': + bins = np.arange(0, max(cutoffs), 0.001) # so binning may deviate + elif distribution.type in ['angles', 'dihedrals']: bins = np.arange(0, 180, 0.01) + self.histogram_distributions.append( distribution.produce_histogram(bins) - ) + ) # temporary histogram for manipulation self.distributions.append( self.histogram_distributions[-1].produce_nomad_distribution() - ) + ) # stored distributions return self @@ -896,13 +966,13 @@ def resolve_bulk_symmetry( symmetry['hall_symbol'] = symmetry_analyzer.get_hall_symbol() symmetry['point_group_symbol'] = symmetry_analyzer.get_point_group() symmetry['space_group_number'] = symmetry_analyzer.get_space_group_number() - symmetry[ - 'space_group_symbol' - ] = symmetry_analyzer.get_space_group_international_short() + symmetry['space_group_symbol'] = ( + symmetry_analyzer.get_space_group_international_short() + ) symmetry['origin_shift'] = symmetry_analyzer._get_spglib_origin_shift() - symmetry[ - 'transformation_matrix' - ] = symmetry_analyzer._get_spglib_transformation_matrix() + symmetry['transformation_matrix'] = ( + symmetry_analyzer._get_spglib_transformation_matrix() + ) # Populating the originally parsed AtomicCell wyckoff_letters and equivalent_atoms information original_wyckoff = symmetry_analyzer.get_wyckoff_letters_original() From 4c2bf8cdad5d9e9aec4076a422c441039bd3da20 Mon Sep 17 00:00:00 2001 From: nathan Date: Mon, 27 May 2024 10:57:04 +0200 Subject: [PATCH 28/30] - Remove `check_attrributes` - clean up `to_ase_atoms` --- src/nomad_simulations/model_system.py | 52 +++++++++++---------------- 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 42644284..d627d830 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -50,26 +50,6 @@ from .utils import get_sibling_section, is_not_representative -def check_attributes(attributes: list[str]) -> Optional[Callable]: - """ - Check if the specified attributes are not None. - """ - - def decorator(func: Callable) -> Callable: - def wrapper(self, *args, **kwargs) -> Optional[Callable]: - for attr in attributes: - if attr not in self.all_quantities: # ! verify - self.logger.error( - f'Missing attribute: {attr}.' - ) # require the class to provide a logger - return None - return func(self, *args, **kwargs) - - return wrapper - - return decorator - - class GeometricSpace(Entity): """ A base section used to define geometrical spaces and their entities. @@ -289,7 +269,7 @@ class Cell(GeometricSpace): periodic_boundary_conditions = Quantity( type=bool, shape=[3], - default=[False, False, False], + default=[False, False, False], # when is the default value set? description=""" If periodic boundary conditions are applied to each direction of the crystal axes. """, @@ -711,23 +691,31 @@ class AtomicCell(Cell): """, ) - @check_attributes( - attributes=[ - 'atoms_state', - 'lattice_vectors', - 'positions', - 'periodic_boundary_conditions', - ], - ) - def to_ase_atoms(self): + def to_ase_atoms(self, logger: BoundLogger) -> Optional[ase.Atoms]: """ Generate an `ase.Atoms` object from `AtomicCell`. """ + # check all prerequisites + attributes = ('atoms_states', 'lattice_vectors', 'positions') + for attribute in attributes: + if getattr(self, attribute) is None: + logger.warning(f'Attribute {attribute} not found in AtomicCell.') + return None + + if len(self.atoms_states) != len(positions := self.positions): + logger.warning( + 'Number of atoms states and positions do not match.', + atoms_states=len(self.atoms_states), + positions=len(positions), + ) + return None + + # if all is well, generate the `ase.Atoms` object ase_atoms = ase.Atoms( - symbols=[atom_state.chemical_symbol for atom_state in self.atoms_state], - pbc=self.periodic_boundary_conditions, + symbols=[atom_state.chemical_symbol for atom_state in self.atoms_states], positions=self.positions.to('angstrom').magnitude, cell=self.lattice_vectors.to('angstrom').magnitude, + pbc=self.periodic_boundary_conditions, ) return ase_atoms From b498bd74e3d8c85025908065dc16e6a9eb1d4f38 Mon Sep 17 00:00:00 2001 From: nathan Date: Mon, 27 May 2024 12:04:36 +0200 Subject: [PATCH 29/30] - add bin units in pipeline classes - ensure that `neighbor_list`'s order matches `ase_atoms` --- src/nomad_simulations/model_system.py | 108 ++++++++++++++++---------- 1 file changed, 66 insertions(+), 42 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index d627d830..6278eb68 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -484,33 +484,44 @@ class DistributionHistogram: def __init__( self, combo: list[str], - type: str, # ? replace for automated check? distribution_values: pint.Quantity, - bins: np.ndarray, + bins: pint.Quantity, neighbor_list: pint.Quantity, ) -> None: + """The `combo` is a list of element symbols, `distribution_values` is a list of distances or angles, + `bins` is the binning of the histogram and sets the final units, + and `neighbor_list` is the cutoffs for the distribution (in the same order as combo).""" self.combo = combo - self.type = type + self.bins = bins self.neighbor_list = neighbor_list + self.compute_histogram(distribution_values) + + def compute_histogram(self, distribution_values: pint.Quantity) -> None: + """Compute the histogram of the distribution values, + based on `self.bins` and `distribution_values`. + Results are stored under `self.counts`. Returns the object itself.""" + + # compute the histogram + self.counts, _ = np.histogram( + distribution_values.to(self.bins.u).magnitude, bins=self.bins.magnitude + ) # have the bins determine the units and range # normalize so the minimum is 1 - counts, _bins = np.histogram(distribution_values.magnitude, bins=bins) - self.bins = _bins * distribution_values.u - if len(nonzero_counts := counts[np.where(counts > 0)]): - self.frequency = counts / np.min(nonzero_counts) + if len(nonzero_counts := self.counts[np.where(self.counts > 0)]) > 0: + self.frequency = self.counts / np.min(nonzero_counts) else: - self.frequency = counts + self.frequency = self.counts def produce_nomad_distribution(self) -> GeometryDistribution: # select the constructor based on the type - if self.type == 'distances': - constructor = DistanceGeometryDistribution - elif self.type == 'angles': - constructor = AngleGeometryDistribution - elif self.type == 'dihedrals': - constructor = DihedralGeometryDistribution - elif self.type == 'generic': - constructor = GeometryDistribution + constructor_mapping = { + 'distances': DistanceGeometryDistribution, + 'angles': AngleGeometryDistribution, + 'dihedrals': DihedralGeometryDistribution, + 'generic': GeometryDistribution, + } + if self.type in constructor_mapping: + constructor = constructor_mapping[self.type] else: raise ValueError('Type not recognized.') @@ -519,9 +530,11 @@ def produce_nomad_distribution(self) -> GeometryDistribution: extremity_atom_labels=[self.combo[0], self.combo[-1]], frequency=self.frequency, n_cutoff_specifications=len(self.neighbor_list), - element_cutoff_selection=self.combo, # ! check if this is correct + element_cutoff_selection=self.combo, # this is enforced by `DistributionFactory` distance_cutoffs=self.neighbor_list, ) + + # distances require explicit binning data if self.type == 'distances': geom_dist.n_bins = len(self.bins) geom_dist.bins = self.bins @@ -541,39 +554,40 @@ class Distribution: def __init__( self, combo: list[str], - type: str, ase_atoms: ase.Atoms, neighbor_list: pint.Quantity, ) -> None: + # initialize object attributes self.combo = combo - self.type = type self.neighbor_list = neighbor_list geom_analysis = ase_as.Analysis( ase_atoms, neighbor_list.to('angstrom').magnitude - ) # slightly less optimal - if self.type == 'distances': - if len((matches := geom_analysis.get_bonds(*combo, unique=True))[0]): + ) + if len(combo) == 2: + if len((matches := geom_analysis.get_bonds(*combo, unique=True))[0]) > 0: self.values = geom_analysis.get_values(matches) else: self.values = np.array([]) self.values *= ureg.angstrom - elif self.type == 'angles' or self.type == 'dihedrals': - if len((matches := geom_analysis.get_angles(*combo, unique=True))[0]): - self.values = np.array(geom_analysis.get_values(matches)) + elif len(combo) in (3, 4): + if len((matches := geom_analysis.get_angles(*combo, unique=True))[0]) > 0: + self.values = np.array(geom_analysis.get_values(matches)) * ureg.degrees self.values = np.abs( - np.where(self.values > np.pi, 2 * np.pi - self.values, self.values) - ) # restrict to [0, pi] + np.where( + self.values > 180 * ureg.degrees, + 360 * ureg.degrees - self.values, + self.values, + ) + ) # restrict to [0, 180] degrees else: self.values = np.array([]) - self.values *= ureg.radians + self.values *= ureg.degrees else: - raise ValueError('Type not recognized.') + raise ValueError('Could not infer distribution type from `combo`.') - def produce_histogram(self, bins: np.ndarray) -> DistributionHistogram: - return DistributionHistogram( - self.combo, self.type, self.values, bins, self.neighbor_list - ) + def produce_histogram(self, bins: pint.Quantity) -> DistributionHistogram: + return DistributionHistogram(self.combo, self.values, bins, self.neighbor_list) class DistributionFactory: @@ -583,12 +597,18 @@ class DistributionFactory: """ def __init__(self, ase_atoms: ase.Atoms, neighbor_list: pint.Quantity) -> None: + """The neighbor list should follow the same order as `ase_atoms`.""" + if len(ase_atoms) != len(neighbor_list): + raise ValueError('Length of `ase_atoms` and `neighbor_list` do not match.') + self.ase_atoms = ase_atoms - self.neighbor_list = neighbor_list - self._elements = sorted(list(set(self.ase_atoms.get_chemical_symbols()))) + self.cutoff_mapping = dict(zip(ase_atoms.get_chemical_symbols(), neighbor_list)) + self._elements = sorted( + list(set(self.ase_atoms.get_chemical_symbols())), key=ase.Atom(x).number + ) # sort by atomic number @property - def get_elemental_pairs(self) -> list[list[str]]: + def get_elemental_pairs(self) -> list[tuple[str, str]]: """Generate all possible pairs of element symbols. Permutations are not considered, i.e., (B, A) is converted into (A, B).""" return [ @@ -598,7 +618,7 @@ def get_elemental_pairs(self) -> list[list[str]]: ] @property - def get_elemental_triples_centered(self) -> list[list[str]]: + def get_elemental_triples_centered(self) -> list[tuple[str, str, str]]: """Generate all possible triples of element symbols with the center element first. Permutations between the outer elements are not considered, i.e., (A, C, B) is converted into (A, B, C). """ @@ -610,7 +630,7 @@ def get_elemental_triples_centered(self) -> list[list[str]]: ] # matching the order of the `ase` package: https://wiki.fysik.dtu.dk/ase/_modules/ase/geometry/analysis.html#Analysis.get_angles @property - def get_elemental_quadruples_centered(self) -> list[list[str]]: + def get_elemental_quadruples_centered(self) -> list[tuple[str, str, str, str]]: """Generate all possible quadruples of element symbols with the center elements first. Permutations between the outer elements are not considered, i.e., (A, D, B, C) is converted into (A, B, C, D). """ @@ -626,11 +646,15 @@ def produce_distributions(self) -> list[Distribution]: """Generate all possible distributions of distances and angles for all element pairs and triples, respectively.""" return [ - Distribution(combos, type_decl, self.ase_atoms, self.neighbor_list) - for type_decl, combos in zip( - ['distances', 'angles'], - [*self.get_elemental_pairs, *self.get_elemental_triples_centered], + Distribution( + list(combos), + self.ase_atoms, + [self.cutoff_mapping[atom] for atom in combos], ) + for combos in [ + *self.get_elemental_pairs, + *self.get_elemental_triples_centered, + ] ] From 4e92474285a02eee8c5e1dbb343ecc780505f7ba Mon Sep 17 00:00:00 2001 From: nathan Date: Mon, 27 May 2024 12:45:13 +0200 Subject: [PATCH 30/30] - Fix `mypy` errors - Remove `type` in favor of `n_elem` - Reorganize `AtomicCell.normalize()` --- src/nomad_simulations/model_system.py | 76 ++++++++++++++++----------- tests/test_system.py | 6 +-- 2 files changed, 46 insertions(+), 36 deletions(-) diff --git a/src/nomad_simulations/model_system.py b/src/nomad_simulations/model_system.py index 6278eb68..ae570e5d 100644 --- a/src/nomad_simulations/model_system.py +++ b/src/nomad_simulations/model_system.py @@ -373,16 +373,20 @@ def get_bins(self) -> np.ndarray: Fall back to a default value if none are present.""" return self.bins - def sort_cutoffs(self, sort_key: Callable = lambda x: ase.Atom(x).number) -> None: + def sort_cutoffs(self, sort_key: Callable = lambda x: ase.Atom(x).number): """Sort the cutoffs and the elements accordingly. Args: sort_key (Callable): A function to sort the elements. Defaults to the atomic number. Returns: GeometryDistribution: The sorted geometry distribution object itself. """ - sorted_indices = np.argsort(self.element_cutoff_selection, key=sort_key) + sorted_indices = np.argsort( + [sort_key(elem) for elem in self.element_cutoff_selection] + ) # get the indices of the atomic numbers self.distance_cutoffs = self.distance_cutoffs[sorted_indices] - self.element_cutoff_selection = self.element_cutoff_selection[sorted_indices] + self.element_cutoff_selection = [ + self.element_cutoff_selection[i] for i in sorted_indices + ] return self def normalize(self, archive, logger: BoundLogger): @@ -492,6 +496,7 @@ def __init__( `bins` is the binning of the histogram and sets the final units, and `neighbor_list` is the cutoffs for the distribution (in the same order as combo).""" self.combo = combo + self.n_elem = len(combo) self.bins = bins self.neighbor_list = neighbor_list self.compute_histogram(distribution_values) @@ -515,15 +520,14 @@ def compute_histogram(self, distribution_values: pint.Quantity) -> None: def produce_nomad_distribution(self) -> GeometryDistribution: # select the constructor based on the type constructor_mapping = { - 'distances': DistanceGeometryDistribution, - 'angles': AngleGeometryDistribution, - 'dihedrals': DihedralGeometryDistribution, - 'generic': GeometryDistribution, + 2: DistanceGeometryDistribution, + 3: AngleGeometryDistribution, + 3: DihedralGeometryDistribution, } - if self.type in constructor_mapping: - constructor = constructor_mapping[self.type] + if self.n_elem in constructor_mapping: + constructor = constructor_mapping[self.n_elem] else: - raise ValueError('Type not recognized.') + raise ValueError('Cannot assign a constructor based on `self.combos`.') # instantiate the NOMAD distribution geom_dist = constructor( @@ -535,12 +539,12 @@ def produce_nomad_distribution(self) -> GeometryDistribution: ) # distances require explicit binning data - if self.type == 'distances': + if self.n_elem == 2: geom_dist.n_bins = len(self.bins) geom_dist.bins = self.bins # add the central atom labels if they exist - if len(self.combo) > 2: + if self.n_elem > 2: geom_dist.central_atom_labels = self.combo[1:-1] return geom_dist @@ -559,18 +563,19 @@ def __init__( ) -> None: # initialize object attributes self.combo = combo + self.n_elem = len(combo) self.neighbor_list = neighbor_list geom_analysis = ase_as.Analysis( ase_atoms, neighbor_list.to('angstrom').magnitude ) - if len(combo) == 2: + if self.n_elem == 2: # distances if len((matches := geom_analysis.get_bonds(*combo, unique=True))[0]) > 0: self.values = geom_analysis.get_values(matches) else: self.values = np.array([]) self.values *= ureg.angstrom - elif len(combo) in (3, 4): + elif self.n_elem in (3, 4): # (dihedral) angles if len((matches := geom_analysis.get_angles(*combo, unique=True))[0]) > 0: self.values = np.array(geom_analysis.get_values(matches)) * ureg.degrees self.values = np.abs( @@ -604,7 +609,8 @@ def __init__(self, ase_atoms: ase.Atoms, neighbor_list: pint.Quantity) -> None: self.ase_atoms = ase_atoms self.cutoff_mapping = dict(zip(ase_atoms.get_chemical_symbols(), neighbor_list)) self._elements = sorted( - list(set(self.ase_atoms.get_chemical_symbols())), key=ase.Atom(x).number + list(set(self.ase_atoms.get_chemical_symbols())), + key=lambda x: ase.Atom(x).number, ) # sort by atomic number @property @@ -612,7 +618,7 @@ def get_elemental_pairs(self) -> list[tuple[str, str]]: """Generate all possible pairs of element symbols. Permutations are not considered, i.e., (B, A) is converted into (A, B).""" return [ - [atom_1, atom_2] + (atom_1, atom_2) for i, atom_1 in enumerate(self._elements) for atom_2 in self._elements[i:] ] @@ -623,7 +629,7 @@ def get_elemental_triples_centered(self) -> list[tuple[str, str, str]]: Permutations between the outer elements are not considered, i.e., (A, C, B) is converted into (A, B, C). """ return [ - [atom_1, atom_c, atom_2] + (atom_1, atom_c, atom_2) for atom_c in self._elements for i, atom_1 in enumerate(self._elements) for atom_2 in self._elements[i:] @@ -635,7 +641,7 @@ def get_elemental_quadruples_centered(self) -> list[tuple[str, str, str, str]]: Permutations between the outer elements are not considered, i.e., (A, D, B, C) is converted into (A, B, C, D). """ return [ - [atom_1, atom_c, atom_d, atom_2] + (atom_1, atom_c, atom_d, atom_2) for atom_c in self._elements for atom_d in self._elements for i, atom_1 in enumerate(self._elements) @@ -759,7 +765,7 @@ def normalize(self, archive, logger: BoundLogger): if self.analyze_geometry: # set up distributions cutoffs = ase_nl.natural_cutoffs(self.ase_atoms, mult=3.0) * ureg.angstrom - self.simple_distributions = DistributionFactory( + self.plain_distributions = DistributionFactory( self.ase_atoms, ase_nl.build_neighbor_list( self.ase_atoms, @@ -769,20 +775,26 @@ def normalize(self, archive, logger: BoundLogger): ).produce_distributions() # write distributions to the archive - self.histogram_distributions, self.distributions = [], [] - for distribution in self.simple_distributions: + ( + self.histogram_distributions, + self.distance_distributions, + self.angle_distributions, + ) = [], [], [] + for distr in self.plain_distributions: # set binning - if distribution.type == 'distances': - bins = np.arange(0, max(cutoffs), 0.001) # so binning may deviate - elif distribution.type in ['angles', 'dihedrals']: - bins = np.arange(0, 180, 0.01) - - self.histogram_distributions.append( - distribution.produce_histogram(bins) - ) # temporary histogram for manipulation - self.distributions.append( - self.histogram_distributions[-1].produce_nomad_distribution() - ) # stored distributions + if distr.n_elem == 2: + bins = ( + np.arange(0, max(cutoffs), 0.001) * ureg.angstrom + ) # so binning may deviate + hist = distr.produce_histogram(bins) + self.distance_distributions.append( + hist.produce_nomad_distribution() + ) + elif distr.n_elem in (3, 4): + bins = np.arange(0, 180, 0.01) * ureg.degrees + hist = distr.produce_histogram(bins) + self.angle_distributions.append(hist.produce_nomad_distribution()) + self.histogram_distributions.append(hist) return self diff --git a/tests/test_system.py b/tests/test_system.py index 61ca476d..67280587 100644 --- a/tests/test_system.py +++ b/tests/test_system.py @@ -128,7 +128,7 @@ def test_distribution( """ values = np.sort( Distribution( - elements, analysis_type, atomic_cell, setup_neighbor_list(atomic_cell) + elements, atomic_cell, setup_neighbor_list(atomic_cell) ).values.magnitude ) @@ -165,9 +165,7 @@ def test_distribution_histogram( 4. the actual count """ to_type = 'distances' if len(elements) == 2 else 'angles' - dist = Distribution( - elements, to_type, atomic_cell, setup_neighbor_list(atomic_cell) - ) + dist = Distribution(elements, atomic_cell, setup_neighbor_list(atomic_cell)) dh = DistributionHistogram(elements, to_type, dist.values, bins) if len(bins) == 0: # 1.