Skip to content

Commit

Permalink
finalized movement of resolve formulas and expanded tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jrudz committed Jun 4, 2024
1 parent d42d174 commit fed6446
Show file tree
Hide file tree
Showing 5 changed files with 185 additions and 107 deletions.
9 changes: 5 additions & 4 deletions src/nomad_simulations/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,11 @@
from nomad.datamodel.metainfo.annotations import ELNAnnotation
from nomad.datamodel.data import EntryData
from nomad.datamodel.metainfo.basesections import Entity, Activity
from nomad.atomutils import get_composition

from .model_system import ModelSystem
from .model_method import ModelMethod
from .outputs import Outputs
from .utils import is_not_representative
from .utils import is_not_representative, get_composition

class Program(Entity):
"""
Expand Down Expand Up @@ -188,10 +187,11 @@ def resolve_composition_formula(
def set_branch_composition(system: ModelSystem, subsystems: List[ModelSystem], atom_labels: List[str]) -> None:
if not subsystems:
atom_indices = system.atom_indices if system.atom_indices is not None else []
subsystem_labels = [np.array(atom_labels)[atom_indices]] if atom_labels and len(atom_indices) != 0 else [] # TODO need to add to testing the case where labels and indices are missing
subsystem_labels = [np.array(atom_labels)[atom_indices]] if atom_labels else ['Unknown' for atom in range(len(atom_indices))]
else:
subsystem_labels = [subsystem.branch_label if subsystem.branch_label is not None else "Unknown" for subsystem in subsystems]
system.composition_formula = get_composition(subsystem_labels)
if system.composition_formula is None:
system.composition_formula = get_composition(subsystem_labels)

def traverse_system_recurs(system, atom_labels):
subsystems = system.model_system
Expand Down Expand Up @@ -224,6 +224,7 @@ def normalize(self, archive, logger) -> None:
if len(system_parent.model_system) == 0:
continue
self._set_system_branch_depth(system_parent)

if is_not_representative(system_parent, logger):
return
self.resolve_composition_formula(system_parent, logger)
27 changes: 0 additions & 27 deletions src/nomad_simulations/model_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -984,40 +984,13 @@ def resolve_system_type_and_dimensionality(

return system_type, dimensionality

# def resolve_composition_formula(
# self, logger: BoundLogger
# ) -> None:
# """
# """
# def set_branch_composition(system: ModelSystem, subsystems: List[ModelSystem], atom_labels: List[str]) -> None:
# if not subsystems:
# atom_indices = system.atom_indices if system.atom_indices is not None else []
# subsystem_labels = [np.array(atom_labels)[atom_indices]] if atom_labels and len(atom_indices) != 0 else [] # TODO need to add to testing the case where labels and indices are missing
# else:
# subsystem_labels = [subsystem.branch_label if subsystem.branch_label is not None else "Unknown" for subsystem in subsystems]
# system.composition_formula = get_composition(subsystem_labels)

# def traverse_system_recurs(system, atom_labels):
# subsystems = system.model_system
# set_branch_composition(system, subsystems, atom_labels)
# if subsystems:
# for subsystem in subsystems:
# traverse_system_recurs(subsystem, atom_labels)

# atoms_state = self.cell[0].atoms_state if self.cell is not None else []
# atom_labels = [atom.chemical_symbol for atom in atoms_state] if atoms_state is not None else []
# traverse_system_recurs(self, atom_labels)

def normalize(self, archive, logger) -> None:
super().normalize(archive, logger)

# We don't need to normalize if the system is not representative
if is_not_representative(self, logger):
return

# if self.composition_formula is None:
# self.resolve_composition_formula(logger)

# Extracting ASE Atoms object from the originally parsed AtomicCell section
if self.cell is None or len(self.cell) == 0:
logger.warning(
Expand Down
2 changes: 1 addition & 1 deletion src/nomad_simulations/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .utils import get_sibling_section, RussellSaundersState, is_not_representative
from .utils import get_sibling_section, RussellSaundersState, is_not_representative, get_composition
13 changes: 12 additions & 1 deletion src/nomad_simulations/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
# limitations under the License.
#

import numpy as np
from math import factorial
from typing import Optional
from typing import Optional, List
from structlog.stdlib import BoundLogger

from nomad.datamodel.data import ArchiveSection
Expand Down Expand Up @@ -128,3 +129,13 @@ def is_not_representative(model_system, logger: BoundLogger):
if not model_system.is_representative:
return True
return False

# TODO Either update nomad.atomutils function and remove this one, or remove the one in atomutils if we prefer it here only
def get_composition(children_names: List[str]) -> str:
"""
Generates a generalized "chemical formula" based on the provided list `children_names`,
with the format X(m)Y(n) for children_names X and Y of quantities m and n, respectively.
"""
children_count_tup = np.unique(children_names, return_counts=True)
formula = ''.join([f'{name}({count})' for name, count in zip(*children_count_tup)])
return formula if formula else None
241 changes: 167 additions & 74 deletions tests/test_model_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,77 +444,6 @@ def test_normalize(self):
assert np.isclose(model_system.elemental_composition[1].atomic_fraction, 1 / 3)


# @pytest.mark.parametrize(
# 'mol_label_list, n_mol_list, atom_labels_list, composition_formula_list',
# [
# (
# ['H20'],
# [3],
# [['H', 'O', 'O']],
# ['group_H20(1)', 'H20(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)']
# ), # pure system
# (
# ['H20', 'Methane'],
# [5, 2],
# [['H', 'O', 'O'], ['C', 'H', 'H', 'H', 'H']],
# ['group_H20(1)group_Methane(1)', 'H20(5)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'Methane(2)', 'C(1)H(4)', 'C(1)H(4)']
# ), # binary mixture
# ],
# )
# def test_system_hierarchy_for_molecules(
# self,
# mol_label_list: List[str],
# n_mol_list: List[int],
# atom_labels_list: List[str],
# composition_formula_list: List[str]
# ):
# """
# Test the `ModelSystem` normalization of 'composition_formula' for atoms and molecules.
# """
# #? Does it make sense to test the setting of branch_label or branch_depth?
# model_system = ModelSystem(is_representative=True)
# model_system.branch_label = 'Total System'
# model_system.branch_depth = 0
# atomic_cell = AtomicCell()
# model_system.cell.append(atomic_cell)
# model_system.atom_indices = []
# for (mol_label, n_mol, atom_labels) in zip(mol_label_list, n_mol_list, atom_labels_list):
# # Create a branch in the hierarchy for this molecule type
# model_system_mol_group = ModelSystem(branch_label='group' + mol_label)
# model_system_mol_group.atom_indices = []
# model_system_mol_group.branch_label = f"group_{mol_label}"
# model_system_mol_group.branch_depth = 1
# model_system.model_system.append(model_system_mol_group)
# for _ in range(n_mol):
# # Create a branch in the hierarchy for this molecule
# model_system_mol = ModelSystem(branch_label=mol_label)
# model_system_mol.branch_label = mol_label
# model_system_mol.branch_depth = 2
# model_system_mol_group.model_system.append(model_system_mol)
# # add the corresponding atoms to the global atom list
# for atom_label in atom_labels:
# atomic_cell.atoms_state.append(AtomsState(chemical_symbol = atom_label))
# n_atoms = len(atomic_cell.atoms_state)
# atom_indices = np.arange(n_atoms - len(atom_labels), n_atoms)
# model_system_mol.atom_indices = atom_indices
# model_system_mol_group.atom_indices = np.append(model_system_mol_group.atom_indices, atom_indices)
# model_system.atom_indices = np.append(model_system.atom_indices, atom_indices)

# model_system.normalize(EntryArchive(), logger)

# assert model_system.composition_formula == composition_formula_list[0]
# ctr_comp = 1
# def get_system_recurs(sec_system, ctr_comp):
# for sys in sec_system:
# assert sys.composition_formula == composition_formula_list[ctr_comp]
# ctr_comp += 1
# sec_subsystem = sys.model_system
# if sec_subsystem:
# ctr_comp = get_system_recurs(sec_subsystem, ctr_comp)
# return ctr_comp

# get_system_recurs(model_system.model_system, ctr_comp)

@pytest.mark.parametrize(
'mol_label_list, n_mol_list, atom_labels_list, composition_formula_list',
[
Expand Down Expand Up @@ -542,10 +471,8 @@ def test_system_hierarchy_for_molecules(
"""
Test the `ModelSystem` normalization of 'composition_formula' for atoms and molecules.
"""
simulation = Simulation()
#? Does it make sense to test the setting of branch_label or branch_depth?
model_system = ModelSystem(is_representative=True)
simulation.model_system.append(model_system)
model_system.branch_label = 'Total System'
model_system.branch_depth = 0
atomic_cell = AtomicCell()
Expand Down Expand Up @@ -573,9 +500,175 @@ def test_system_hierarchy_for_molecules(
model_system_mol_group.atom_indices = np.append(model_system_mol_group.atom_indices, atom_indices)
model_system.atom_indices = np.append(model_system.atom_indices, atom_indices)

# model_system.normalize(EntryArchive(), logger)
model_system.normalize(EntryArchive(), logger)

assert model_system.composition_formula == composition_formula_list[0]
ctr_comp = 1
def get_system_recurs(sec_system, ctr_comp):
for sys in sec_system:
assert sys.composition_formula == composition_formula_list[ctr_comp]
ctr_comp += 1
sec_subsystem = sys.model_system
if sec_subsystem:
ctr_comp = get_system_recurs(sec_subsystem, ctr_comp)
return ctr_comp

get_system_recurs(model_system.model_system, ctr_comp)

@pytest.mark.parametrize(
'is_representative, has_atom_indices, mol_label_list, n_mol_list, atom_labels_list, composition_formula_list, custom_formulas',
[
(
True,
True,
['H20'],
[3],
[['H', 'O', 'O']],
['group_H20(1)', 'H20(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'],
[None, None, None, None, None]
), # pure system
(
False,
True,
['H20'],
[3],
[['H', 'O', 'O']],
[None, None, None, None, None],
[None, None, None, None, None]
), # non-representative system
(
True,
True,
[None],
[3],
[['H', 'O', 'O']],
['Unknown(1)', 'Unknown(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'],
[None, None, None, None, None]
), # missing branch labels
(
True,
True,
['H20'],
[3],
[[None, None, None]],
['group_H20(1)', 'H20(3)', 'Unknown(3)', 'Unknown(3)', 'Unknown(3)'],
[None, None, None, None, None]
), # missing atom labels
(
True,
False,
['H20'],
[3],
[['H', 'O', 'O']],
['group_H20(1)', 'H20(3)', None, None, None],
[None, None, None, None, None]
), # missing atom indices
(
True,
True,
['H20'],
[3],
[['H', 'O', 'O']],
['waters(1)', 'water_molecules(3)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)'],
['waters(1)', 'water_molecules(3)', None, None, None]
), # custom formulas
(
True,
True,
['H20', 'Methane'],
[5, 2],
[['H', 'O', 'O'], ['C', 'H', 'H', 'H', 'H']],
['group_H20(1)group_Methane(1)', 'H20(5)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'H(1)O(2)', 'Methane(2)', 'C(1)H(4)', 'C(1)H(4)'],
[None, None, None, None, None, None, None, None, None, None]
), # binary mixture
],
)
def test_system_hierarchy_for_molecules(
self,
is_representative: bool,
has_atom_indices: bool,
mol_label_list: List[str],
n_mol_list: List[int],
atom_labels_list: List[str],
composition_formula_list: List[str],
custom_formulas: List[str]
):
"""
Test the `ModelSystem` normalization of 'composition_formula' for atoms and molecules.
Description of test parameters:
is_representative:
Boolean specifying if branch_depth = 0 is representative or not.
If not representative, the composition formulas should not be generated.
has_atom_indices:
Boolean specifying if the atom_indices should be populated during parsing.
Without atom_indices, the composition formulas for the deepest level of the hierarchy
should not be populated.
mol_label_list:
List of molecule types for generating the hierarchy.
n_mol_list: List[int]:
List of the number of molecules for each molecule type. Should be same
length as mol_label_list.
atom_labels_list:
List of atom labels for each molecule type. Should be same length as
mol_label_list, with each entry being a list of corresponding atom labels.
composition_formula_list:
This is the list of resulting composition formulas after normalization. The
ordering is dictated by the recursive traversing of the hierarchy in get_system_recurs(),
which follows each branch to its deepest level before moving to the next branch, i.e.,
[model_system.composition_formula,
model_system.model_system[0].composition_formula],
model_system.model_system[0].model_system[0].composition_formula,
model_system.model_system[0].model_system[1].composition_formula, ...,
model_system.model_system[1].composition_formula, ...]
custom_formulas:
This is a list of custom composition formulas that can be set in the generation
of the hierarchy, which will cause the normalize to ignore (i.e., not overwrite) these formula entries.
The ordering is as described above.
"""

### Generate the system hierarchy ###
simulation = Simulation()
model_system = ModelSystem(is_representative=True)
simulation.model_system.append(model_system)
model_system.branch_label = 'Total System'
model_system.is_representative = is_representative
model_system.composition_formula = custom_formulas[0]
ctr_comp = 1
atomic_cell = AtomicCell()
model_system.cell.append(atomic_cell)
if has_atom_indices:
model_system.atom_indices = []
for (mol_label, n_mol, atom_labels) in zip(mol_label_list, n_mol_list, atom_labels_list):
# Create a branch in the hierarchy for this molecule type
model_system_mol_group = ModelSystem()
if has_atom_indices:
model_system_mol_group.atom_indices = []
model_system_mol_group.branch_label = f"group_{mol_label}" if mol_label is not None else None
model_system_mol_group.composition_formula = custom_formulas[ctr_comp]
ctr_comp += 1
model_system.model_system.append(model_system_mol_group)
for _ in range(n_mol):
# Create a branch in the hierarchy for this molecule
model_system_mol = ModelSystem(branch_label=mol_label)
model_system_mol.branch_label = mol_label
model_system_mol.composition_formula = custom_formulas[ctr_comp]
ctr_comp += 1
model_system_mol_group.model_system.append(model_system_mol)
# add the corresponding atoms to the global atom list
for atom_label in atom_labels:
if atom_label is not None:
atomic_cell.atoms_state.append(AtomsState(chemical_symbol = atom_label))
n_atoms = len(atomic_cell.atoms_state)
atom_indices = np.arange(n_atoms - len(atom_labels), n_atoms)
if has_atom_indices:
model_system_mol.atom_indices = atom_indices
model_system_mol_group.atom_indices = np.append(model_system_mol_group.atom_indices, atom_indices)
model_system.atom_indices = np.append(model_system.atom_indices, atom_indices)

simulation.normalize(EntryArchive(), logger)

### Traverse the hierarchy recursively and check the results ###
assert model_system.composition_formula == composition_formula_list[0]
ctr_comp = 1
def get_system_recurs(sec_system, ctr_comp):
Expand Down

0 comments on commit fed6446

Please sign in to comment.