diff --git a/doped/generation.py b/doped/generation.py index 445b4227..b0749b50 100644 --- a/doped/generation.py +++ b/doped/generation.py @@ -7,10 +7,12 @@ import logging import operator import warnings +from collections import defaultdict +from collections.abc import Sequence from functools import partial, reduce from itertools import chain from multiprocessing import Pool, cpu_count -from typing import Optional, Union, cast +from typing import Any, Callable, Optional, Union, cast from unittest.mock import MagicMock import numpy as np @@ -22,16 +24,19 @@ InterstitialGenerator, SubstitutionGenerator, VacancyGenerator, - VoronoiInterstitialGenerator, ) -from pymatgen.analysis.defects.utils import get_labeled_inserted_structure -from pymatgen.analysis.structure_matcher import StructureMatcher +from pymatgen.analysis.defects.utils import remove_collisions +from pymatgen.analysis.structure_matcher import ElementComparator, StructureMatcher +from pymatgen.core import Structure from pymatgen.core.composition import Composition, Element from pymatgen.core.periodic_table import DummySpecies -from pymatgen.core.structure import PeriodicSite, Structure +from pymatgen.core.structure import PeriodicSite from pymatgen.entries.computed_entries import ComputedStructureEntry +from pymatgen.io.vasp.sets import get_valid_magmom_struct from pymatgen.transformations.advanced_transformations import CubicSupercellTransformation from pymatgen.util.typing import PathLike +from scipy.cluster.hierarchy import fcluster, linkage +from scipy.spatial.distance import squareform from tabulate import tabulate from tqdm import tqdm @@ -46,6 +51,7 @@ ) from doped.utils import parsing, supercells, symmetry from doped.utils.efficiency import Composition as doped_Composition +from doped.utils.efficiency import DopedTopographyAnalyzer from doped.utils.efficiency import PeriodicSite as doped_PeriodicSite from doped.utils.parsing import reorder_s1_like_s2 from doped.utils.plotting import format_defect_name @@ -1527,95 +1533,10 @@ class (such as ``clustering_tol``, ``stol``, ``min_dist`` etc), or to else: # Generate interstitial sites using Voronoi tessellation - tight_vig = VoronoiInterstitialGenerator( - stol=0.01 - ) # for determining multiplicities of any merged/grouped interstitial sites from - # Voronoi tessellation + structure-matching - - tight_cand_sites_mul_and_equiv_fpos = [ - *tight_vig._get_candidate_sites(self.primitive_structure) - ] - cand_sites_mul_and_equiv_fpos = _get_cand_interstitial_sites_from_tight_sites( - self.primitive_structure, - tight_cand_sites_mul_and_equiv_fpos, - min_dist=self.interstitial_gen_kwargs.get("min_dist", 0.9), - clustering_tol=self.interstitial_gen_kwargs.get("clustering_tol", 0.55), - stol=self.interstitial_gen_kwargs.get("stol", 0.32), + sorted_sites_mul_and_equiv_fpos = get_Voronoi_interstitial_sites( + host_structure=self.primitive_structure, + interstitial_gen_kwargs=self.interstitial_gen_kwargs, ) - structure_matcher = StructureMatcher( - self.interstitial_gen_kwargs.get("ltol", 0.2), - self.interstitial_gen_kwargs.get("stol", 0.3), - self.interstitial_gen_kwargs.get("angle_tol", 5), - ) # pymatgen-analysis-defects default - unique_tight_cand_sites_mul_and_equiv_fpos = [ - cand_site_mul_and_equiv_fpos - for cand_site_mul_and_equiv_fpos in tight_cand_sites_mul_and_equiv_fpos - if cand_site_mul_and_equiv_fpos not in cand_sites_mul_and_equiv_fpos - ] - - # structure-match the non-matching site & multiplicity tuples, and return the site & - # multiplicity of the tuple with the lower multiplicity (i.e. higher symmetry site) - output_sites_mul_and_equiv_fpos = [] - for cand_site_mul_and_equiv_fpos in cand_sites_mul_and_equiv_fpos: - matching_sites_mul_and_equiv_fpos = [] - if cand_site_mul_and_equiv_fpos not in tight_cand_sites_mul_and_equiv_fpos: - for ( - tight_cand_site_mul_and_equiv_fpos - ) in unique_tight_cand_sites_mul_and_equiv_fpos: - interstitial_struct = self.primitive_structure.copy() - interstitial_struct.insert( - 0, "H", cand_site_mul_and_equiv_fpos[0], coords_are_cartesian=False - ) - tight_interstitial_struct = self.primitive_structure.copy() - tight_interstitial_struct.insert( - 0, - "H", - tight_cand_site_mul_and_equiv_fpos[0], - coords_are_cartesian=False, - ) - if structure_matcher.fit(interstitial_struct, tight_interstitial_struct): - matching_sites_mul_and_equiv_fpos += [ - tight_cand_site_mul_and_equiv_fpos - ] - - # take the site with the lower multiplicity (higher symmetry). If multiplicities - # equal, then take site with larger distance to host atoms (then most ideal site - # according to symmetry._frac_coords_sort_func if also equal): - output_sites_mul_and_equiv_fpos.append( - min( - [cand_site_mul_and_equiv_fpos, *matching_sites_mul_and_equiv_fpos], - key=lambda cand_site_mul_and_equiv_fpos: ( - cand_site_mul_and_equiv_fpos[1], - # distance to nearest host atom (and invert so max -> min for sorting) - 1 - / ( - np.min( - self.primitive_structure.lattice.get_all_distances( - cand_site_mul_and_equiv_fpos[0], - self.primitive_structure.frac_coords, - ), - axis=1, - ) - ), - # return the minimum _frac_coords_sort_func for all equiv fpos: - *symmetry._frac_coords_sort_func( - sorted( - cand_site_mul_and_equiv_fpos[2], - key=symmetry._frac_coords_sort_func, - )[0] - ), - ), - ) - ) - - sorted_sites_mul_and_equiv_fpos = [] - for _cand_site, multiplicity, equiv_fpos in output_sites_mul_and_equiv_fpos: - # take site with equiv_fpos sorted by symmetry._frac_coords_sort_func: - sorted_equiv_fpos = sorted(equiv_fpos, key=symmetry._frac_coords_sort_func) - ideal_cand_site = sorted_equiv_fpos[0] - sorted_sites_mul_and_equiv_fpos.append( - (ideal_cand_site, multiplicity, sorted_equiv_fpos) - ) self.defects["interstitials"] = [] ig = InterstitialGenerator(self.interstitial_gen_kwargs.get("min_dist", 0.9)) @@ -2410,25 +2331,127 @@ def _sort_defects(defects_dict: dict, element_list: Optional[list[str]] = None): } -def _get_cand_interstitial_sites_from_tight_sites( - host_structure, tight_cand_sites_mul_and_equiv_fpos, min_dist=0.9, clustering_tol=0.55, stol=0.32 -): +def get_Voronoi_interstitial_sites( + host_structure: Structure, interstitial_gen_kwargs: Optional[dict[str, Any]] = None +) -> list: """ - Refactored from pymatgen.analysis.defects.generators to avoid recomputing - interstitial sites, when using looser stol / clustering / min_dist choices. + Generate candidate interstitial sites using Voronoi analysis. + + This function uses a similar approach to that in + ``VoronoiInterstitialGenerator`` from ``pymatgen-analysis-defects``, + but with modifications to make interstitial generation much faster, + fix bugs with interstitial grouping (which could lead to undesired + dropping of candidate sites) and achieve better control over site + placement in order to favour sites which are higher-symmetry and + furthest from the host lattice atoms (typically the most favourable + interstitial sites). + + The logic for picking interstitial sites is as follows: + - Generate all candidate sites using (efficient) Voronoi analysis + - Remove any sites which are within ``min_dist`` of any host atoms + - Cluster the remaining sites using a tolerance of ``clustering_tol`` + and symmetry-preference of ``symmetry_preference`` + (see ``_doped_cluster_frac_coords``) + - Determine the multiplicities and symmetry-equivalent coordinates of + the clustered sites using tight structure matching (with ``tight_stol``). + - Group the clustered sites by symmetry using (looser) structure matching + as controlled by ``stol``. + - From each group, pick the site with the highest symmetry and furthest + distance from the host atoms, if its ``min_dist`` is no more than + ``symmetry_preference`` (0.1 Å by default) smaller than the site with + the largest ``min_dist`` (to the host atoms). + (Parameters mentioned here can be supplied via ``interstitial_gen_kwargs`` + as noted in the args section below.) + + The motivation for favouring high symmetry interstitial sites and then + distance to host atoms is because higher symmetry interstitial sites + are typically the more intuitive sites for placement, cleaner, easier + for analysis etc, and interstitials are often lowest-energy when + furthest from host atoms (i.e. in the largest interstitial voids -- + particularly for fully-ionised charge states), and so this approach + tries to strike a balance between these two goals. + + One caveat to this preference for high symmetry interstitial sites, is + that they can also be slightly more prone to being stuck in local minima + on the PES, and so as always it is **highly recommended** to use + ``ShakeNBreak`` or another structure-searching technique to account for + symmetry-breaking when performing defect relaxations! + + Args: + host_structure (Structure): Host structure. + interstitial_gen_kwargs (dict): + Keyword arguments for interstitial generation. Supported kwargs are: + + - min_dist (float): + Minimum distance from host atoms for interstitial sites. + Defaults to 0.9 Å. + - clustering_tol (float): + Tolerance for clustering interstitial sites. Defaults to 0.55 Å. + - symmetry_preference (float): + Symmetry preference for interstitial site selection. Defaults to 0.1 Å. + - stol (float): + Structure matcher tolerance for looser site matching. Defaults to 0.32. + - tight_stol (float): + Structure matcher tolerance for tighter site matching. Defaults to 0.02. + + Returns: + list: List of interstitial sites as fractional coordinates """ - sm = StructureMatcher(stol=stol) + # so we want to pick the higher symmetry sites because it's cleaner, more intuitive etc + # but, this is probably more likely to be stuck in local minima, compared to the (nearby) + # lower symmetry interstitial sites... but this would at least be avoided by rattling I guess + # so we should pick the high symmetry sites, but make sure we have clear warnings/notes in docs + # that if SnB is being skipped (bad practice), at the very least you should rattle defect structures! + # (which we can quickly add as an option in doped.vasp?) -- TODO!! + + interstitial_gen_kwargs = interstitial_gen_kwargs or {} + if any( # check interstitial_gen_kwargs and warn if any missing: + i not in ["min_dist", "clustering_tol", "symmetry_preference", "stol", "tight_stol"] + for i in interstitial_gen_kwargs + ): + raise ValueError("Invalid interstitial_gen_kwargs supplied!") # TODO: Update to match test + top = DopedTopographyAnalyzer(host_structure) + + interstitial_merging_sm = StructureMatcher( + interstitial_gen_kwargs.get("stol", 0.32), + comparator=ElementComparator(), + ) # matches pymatgen-analysis-defects default + tight_sm = StructureMatcher(interstitial_gen_kwargs.get("tight_stol", 0.02)) + + sites_list = [v.frac_coords for v in top.vnodes] + sites_list = remove_collisions( + sites_list, structure=host_structure, min_dist=interstitial_gen_kwargs.get("min_dist", 0.9) + ) + sites: np.array = _doped_cluster_frac_coords( + sites_list, + host_structure, + tol=interstitial_gen_kwargs.get("clustering_tol", 0.55), + symmetry_preference=interstitial_gen_kwargs.get("symmetry_preference", 0.1), + ) + + def _get_inserted_structs(frac_coords_list: list, structure: Structure): + inserted_structs = [] + for fpos in frac_coords_list: + tmp_struct = structure.copy() + get_valid_magmom_struct(tmp_struct, inplace=True, spin_mode="none") + tmp_struct.insert( + 0, + "X", + fpos, + ) + tmp_struct.sort() + inserted_structs.append(tmp_struct) + return inserted_structs + + # Group the candidate sites by symmetry insert_sites = {} multiplicity: dict[int, int] = {} equiv_fpos: dict[int, list[list[float]]] = {} - for fpos, lab in get_labeled_inserted_structure( - [i[0] for i in tight_cand_sites_mul_and_equiv_fpos], - host_structure, - working_ion="X", - min_dist=min_dist, - clustering_tol=clustering_tol, - sm=sm, - ): + inserted_structs = _get_inserted_structs(sites, host_structure) + + # Label the groups by tight structure matching first: + tight_site_labels = _generic_group_labels(inserted_structs, comp=tight_sm.fit) + for fpos, lab in [*zip(sites.tolist(), tight_site_labels)]: if lab in insert_sites: multiplicity[lab] += 1 equiv_fpos[lab].append(fpos) @@ -2437,14 +2460,211 @@ def _get_cand_interstitial_sites_from_tight_sites( multiplicity[lab] = 1 equiv_fpos[lab] = [fpos] - cand_sites_mul_and_equiv_fpos_from_tight = [] - for key in insert_sites: - matching_tight_cand_sites = [ - i for i in tight_cand_sites_mul_and_equiv_fpos if i[0] in equiv_fpos[key] - ] - all_equiv_sites = [site for i in matching_tight_cand_sites for site in i[2]] - cand_sites_mul_and_equiv_fpos_from_tight.append( - (insert_sites[key], len(all_equiv_sites), all_equiv_sites) - ) + tight_cand_site_mul_and_equiv_fpos_list = [ + (insert_sites[key], multiplicity[key], equiv_fpos[key]) for key in insert_sites + ] + reduced_inserted_structs = _get_inserted_structs( + [insert_sites[k] for k in insert_sites], host_structure + ) + + looser_site_matched_dict = defaultdict(list) + for tight_cand_site_mul_and_equiv_fpos, label in zip( + tight_cand_site_mul_and_equiv_fpos_list, + _generic_group_labels(reduced_inserted_structs, comp=interstitial_merging_sm.fit), + ): + looser_site_matched_dict[label].append(tight_cand_site_mul_and_equiv_fpos) + + cand_site_mul_and_equiv_fpos_list = [] + symmetry_preference = interstitial_gen_kwargs.get("symmetry_preference", 0.1) + for tight_cand_site_mul_and_equiv_fpos_sublist in looser_site_matched_dict.values(): + if len(tight_cand_site_mul_and_equiv_fpos_sublist) == 1: + cand_site_mul_and_equiv_fpos_list.append(tight_cand_site_mul_and_equiv_fpos_sublist[0]) + + else: # pick site with the highest symmetry and furthest distance from host atoms: + site_scores = [ + ( + cand_site_mul_and_equiv_fpos[1], # multiplicity (lower is higher symmetry) + -np.min( # distance to nearest host atom (minus; so max -> min for sorting) + host_structure.lattice.get_all_distances( + cand_site_mul_and_equiv_fpos[0], host_structure.frac_coords + ), + axis=1, + ), + *symmetry._frac_coords_sort_func( + sorted(cand_site_mul_and_equiv_fpos[2], key=symmetry._frac_coords_sort_func)[0] + ), + ) + for cand_site_mul_and_equiv_fpos in tight_cand_site_mul_and_equiv_fpos_sublist + ] + symmetry_favoured_site_mul_and_equiv_fpos = tight_cand_site_mul_and_equiv_fpos_sublist[ + site_scores.index(min(site_scores)) + ] + dist_favoured_reordered_score = min( + [(score[1], score[0], *score[2:]) for score in site_scores] + ) + dist_favoured_site_mul_and_equiv_fpos = tight_cand_site_mul_and_equiv_fpos_sublist[ + site_scores.index( + ( + dist_favoured_reordered_score[1], + dist_favoured_reordered_score[0], + *dist_favoured_reordered_score[2:], + ) + ) + ] + + cand_site_mul_and_equiv_fpos_list.append( + dist_favoured_site_mul_and_equiv_fpos + if ( + np.min( + host_structure.lattice.get_all_distances( + dist_favoured_site_mul_and_equiv_fpos[0], host_structure.frac_coords + ), + axis=1, + ) + < np.min( + host_structure.lattice.get_all_distances( + symmetry_favoured_site_mul_and_equiv_fpos[0], host_structure.frac_coords + ), + axis=1, + ) + - symmetry_preference + ) + else symmetry_favoured_site_mul_and_equiv_fpos + ) + + sorted_sites_mul_and_equiv_fpos = [] + for _cand_site, multiplicity, equiv_fpos in cand_site_mul_and_equiv_fpos_list: # type: ignore + # take site with equiv_fpos sorted by symmetry._frac_coords_sort_func: + sorted_equiv_fpos = sorted(equiv_fpos, key=symmetry._frac_coords_sort_func) + ideal_cand_site = sorted_equiv_fpos[0] + sorted_sites_mul_and_equiv_fpos.append((ideal_cand_site, multiplicity, sorted_equiv_fpos)) + + return sorted_sites_mul_and_equiv_fpos + + +def _doped_cluster_frac_coords( + fcoords: np.typing.ArrayLike, + structure: Structure, + tol: float = 0.55, + symmetry_preference: float = 0.1, +) -> np.typing.NDArray: + """ + Cluster fractional coordinates that are within a certain distance tolerance + of each other, and return the cluster site. + + Modified from the ``pymatgen-analysis-defects``` function as follows: + For each site cluster, the possible sites to choose from are the sites + in the cluster _and_ the cluster midpoint (average position). Of these + sites, the site with the highest symmetry, and then largest ``min_dist`` + (distance to any host lattice site), is chosen -- if its ``min_dist`` is + no more than ``symmetry_preference`` (0.1 Å by default) smaller than + the site with the largest ``min_dist``. This is because we want to favour + the higher symmetry interstitial sites (as these are typically the more + intuitive sites for placement, cleaner, easier for analysis etc, and work + well when combined with ``ShakeNBreak`` or other structure-searching + techniques to account for symmetry-breaking), but also interstitials are + often lowest-energy when furthest from host atoms (i.e. in the largest + interstitial voids -- particularly for fully-ionised charge states), and + so this approach tries to strike a balance between these two goals. + + In ``pymatgen-analysis-defects``, the average cluster position is used, + which breaks symmetries and is less easy to manipulate in the following + interstitial generation functions. + + Args: + fcoords (npt.ArrayLike): Fractional coordinates of points to cluster. + structure (Structure): The host structure. + tol (float): + A distance tolerance for clustering Voronoi nodes. Default is 0.55 Å. + symmetry_preference (float): + A distance preference for symmetry over minimum distance to host atoms, + as detailed in docstring above. + Default is 0.1 Å. + + Returns: + np.typing.NDArray: Clustered fractional coordinates + """ + if len(fcoords) <= 1: + return None + + lattice = structure.lattice + sga = symmetry.get_sga(structure, symprec=0.1) # for getting symmetries of different sites + symm_ops = sga.get_symmetry_operations() # fractional symm_ops by default + dist_matrix = np.array(lattice.get_all_distances(fcoords, fcoords)) + dist_matrix = (dist_matrix + dist_matrix.T) / 2 + + for i in range(len(dist_matrix)): + dist_matrix[i, i] = 0 + condensed_m = squareform(dist_matrix) + z = linkage(condensed_m) + cn = fcluster(z, tol, criterion="distance") + unique_fcoords = [] + + for n in set(cn): + frac_coords = [] + for i, j in enumerate(np.where(cn == n)[0]): + if i == 0: + frac_coords.append(fcoords[j]) + else: + fcoord = fcoords[j] # We need the image to combine the frac_coords properly: + d, image = lattice.get_distance_and_image(frac_coords[0], fcoord) + frac_coords.append(fcoord + image) + + frac_coords.append(np.average(frac_coords, axis=0)) # midpoint of cluster + frac_coords_scores = { + tuple(x): ( + -symmetry.group_order_from_schoenflies( + symmetry.point_symmetry_from_site(x, structure, symm_ops=symm_ops) + ), # higher order = higher symmetry + -np.min(lattice.get_all_distances(x, structure.frac_coords), axis=1), + *symmetry._frac_coords_sort_func(x), + ) + for x in frac_coords + } + symmetry_favoured_site = sorted(frac_coords_scores.items(), key=lambda x: x[1])[0][0] + dist_favoured_site = sorted( + frac_coords_scores.items(), key=lambda x: (x[1][1], x[1][0], *x[1][2:]) + )[0][0] + + if ( + np.min(lattice.get_all_distances(dist_favoured_site, structure.frac_coords), axis=1) + < np.min(lattice.get_all_distances(symmetry_favoured_site, structure.frac_coords), axis=1) + - symmetry_preference + ): + unique_fcoords.append(dist_favoured_site) + else: # prefer symmetry over distance if difference is sufficiently small + unique_fcoords.append(symmetry_favoured_site) + + return symmetry._vectorized_custom_round( + np.mod(symmetry._vectorized_custom_round(unique_fcoords, 5), 1), 4 + ) # to unit cell + + +def _generic_group_labels(list_in: Sequence, comp: Callable = operator.eq) -> list[int]: + """ + Group a list of unsortable objects. + + Templated off the ``pymatgen-analysis-defects`` function, + but fixed to avoid broken reassignment logic and overwriting + of labels (resulting in sites being incorrectly dropped). + + Args: + list_in: A list of objects to group using ``comp``. + comp: Comparator function. + + Returns: + list[int]: list of labels for the input list + """ + list_out = [-1] * len(list_in) # Initialize with -1 instead of None for clarity + label_num = 0 + + for i1 in range(len(list_in)): + if list_out[i1] != -1: # Already labeled + continue + list_out[i1] = label_num + for i2 in range(i1 + 1, len(list_in)): + if list_out[i2] == -1 and comp(list_in[i1], list_in[i2]): + list_out[i2] = label_num + label_num += 1 - return cand_sites_mul_and_equiv_fpos_from_tight + return list_out diff --git a/doped/utils/efficiency.py b/doped/utils/efficiency.py index 3fd82853..90667585 100644 --- a/doped/utils/efficiency.py +++ b/doped/utils/efficiency.py @@ -90,8 +90,6 @@ def __init__( max_cell_range: int = 1, constrained_c_frac: float = 0.5, thickness: float = 0.5, - clustering_tol: float = 0.5, - min_dist: float = 0.9, ) -> None: """ Args: @@ -115,17 +113,12 @@ def __init__( thickness (float): Along with constrained_c_frac, limit the thickness of the regions where we want to explore. Default is 0.5, which is mapping all the site of the unit cell. - clustering_tol (float): Tolerance for clustering nodes. Default is - 0.5. - min_dist (float): Minimum distance between nodes. Default is 0.9. """ # if input cell is very small (< 5 atoms) and max cell range is 1 (default), bump to 2 for # accurate Voronoi tessellation: if len(structure) < 5 and max_cell_range == 1: max_cell_range = 2 - self.clustering_tol = clustering_tol - self.min_dist = min_dist self.structure = structure.copy() self.structure.remove_oxidation_states() diff --git a/tests/test_generation.py b/tests/test_generation.py index 25584f27..515acfd4 100644 --- a/tests/test_generation.py +++ b/tests/test_generation.py @@ -195,29 +195,29 @@ def setUp(self): Interstitials Guessed Charges Conv. Cell Coords Wyckoff --------------- --------------------- ------------------- --------- -Y_i_C2v [+3,+2,+1,0] [0.000,0.500,0.184] 8g +Y_i_C2v [+3,+2,+1,0] [0.000,0.500,0.185] 8g Y_i_C4v_O2.68 [+3,+2,+1,0] [0.000,0.000,0.485] 4e Y_i_C4v_Y1.92 [+3,+2,+1,0] [0.000,0.000,0.418] 4e -Y_i_Cs_S1.71 [+3,+2,+1,0] [0.180,0.180,0.143] 16m Y_i_Cs_Ti1.95 [+3,+2,+1,0] [0.325,0.325,0.039] 16m +Y_i_Cs_Y1.71 [+3,+2,+1,0] [0.191,0.191,0.144] 16m Y_i_D2d [+3,+2,+1,0] [0.000,0.500,0.250] 4d -Ti_i_C2v [+4,+3,+2,+1,0] [0.000,0.500,0.184] 8g +Ti_i_C2v [+4,+3,+2,+1,0] [0.000,0.500,0.185] 8g Ti_i_C4v_O2.68 [+4,+3,+2,+1,0] [0.000,0.000,0.485] 4e Ti_i_C4v_Y1.92 [+4,+3,+2,+1,0] [0.000,0.000,0.418] 4e -Ti_i_Cs_S1.71 [+4,+3,+2,+1,0] [0.180,0.180,0.143] 16m Ti_i_Cs_Ti1.95 [+4,+3,+2,+1,0] [0.325,0.325,0.039] 16m +Ti_i_Cs_Y1.71 [+4,+3,+2,+1,0] [0.191,0.191,0.144] 16m Ti_i_D2d [+4,+3,+2,+1,0] [0.000,0.500,0.250] 4d -S_i_C2v [+4,+3,+2,+1,0,-1,-2] [0.000,0.500,0.184] 8g +S_i_C2v [+4,+3,+2,+1,0,-1,-2] [0.000,0.500,0.185] 8g S_i_C4v_O2.68 [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.485] 4e S_i_C4v_Y1.92 [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.418] 4e -S_i_Cs_S1.71 [+4,+3,+2,+1,0,-1,-2] [0.180,0.180,0.143] 16m S_i_Cs_Ti1.95 [+4,+3,+2,+1,0,-1,-2] [0.325,0.325,0.039] 16m +S_i_Cs_Y1.71 [+4,+3,+2,+1,0,-1,-2] [0.191,0.191,0.144] 16m S_i_D2d [+4,+3,+2,+1,0,-1,-2] [0.000,0.500,0.250] 4d -O_i_C2v [0,-1,-2] [0.000,0.500,0.184] 8g +O_i_C2v [0,-1,-2] [0.000,0.500,0.185] 8g O_i_C4v_O2.68 [0,-1,-2] [0.000,0.000,0.485] 4e O_i_C4v_Y1.92 [0,-1,-2] [0.000,0.000,0.418] 4e -O_i_Cs_S1.71 [0,-1,-2] [0.180,0.180,0.143] 16m O_i_Cs_Ti1.95 [0,-1,-2] [0.325,0.325,0.039] 16m +O_i_Cs_Y1.71 [0,-1,-2] [0.191,0.191,0.144] 16m O_i_D2d [0,-1,-2] [0.000,0.500,0.250] 4d \n""" "The number in the Wyckoff label is the site multiplicity/degeneracy of that defect in the " @@ -256,32 +256,32 @@ def setUp(self): O_Mn [0,-1,-2,-3,-4,-5,-6] [0.121,0.129,0.625] 12d O_Ni [0,-1,-2,-3,-4] [0.625,0.625,0.625] 4b -Interstitials Guessed Charges Conv. Cell Coords Wyckoff -------------------- ----------------- ------------------- --------- -Li_i_C1_O1.72 [+1,0] [0.248,0.480,0.249] 24e -Li_i_C1_O1.78 [+1,0] [0.017,0.261,0.250] 24e -Li_i_C2_Li1.84O1.84 [+1,0] [0.073,0.177,0.125] 12d -Li_i_C2_Li1.84O1.94 [+1,0] [0.151,0.375,0.401] 12d -Li_i_C2_Li1.86 [+1,0] [0.086,0.375,0.336] 12d -Li_i_C3 [+1,0] [0.497,0.497,0.497] 8c -Mn_i_C1_O1.72 [+4,+3,+2,+1,0] [0.248,0.480,0.249] 24e -Mn_i_C1_O1.78 [+4,+3,+2,+1,0] [0.017,0.261,0.250] 24e -Mn_i_C2_Li1.84O1.84 [+4,+3,+2,+1,0] [0.073,0.177,0.125] 12d -Mn_i_C2_Li1.84O1.94 [+4,+3,+2,+1,0] [0.151,0.375,0.401] 12d -Mn_i_C2_Li1.86 [+4,+3,+2,+1,0] [0.086,0.375,0.336] 12d -Mn_i_C3 [+4,+3,+2,+1,0] [0.497,0.497,0.497] 8c -Ni_i_C1_O1.72 [+4,+3,+2,+1,0] [0.248,0.480,0.249] 24e -Ni_i_C1_O1.78 [+4,+3,+2,+1,0] [0.017,0.261,0.250] 24e -Ni_i_C2_Li1.84O1.84 [+4,+3,+2,+1,0] [0.073,0.177,0.125] 12d -Ni_i_C2_Li1.84O1.94 [+4,+3,+2,+1,0] [0.151,0.375,0.401] 12d -Ni_i_C2_Li1.86 [+4,+3,+2,+1,0] [0.086,0.375,0.336] 12d -Ni_i_C3 [+4,+3,+2,+1,0] [0.497,0.497,0.497] 8c -O_i_C1_O1.72 [0,-1,-2] [0.248,0.480,0.249] 24e -O_i_C1_O1.78 [0,-1,-2] [0.017,0.261,0.250] 24e -O_i_C2_Li1.84O1.84 [0,-1,-2] [0.073,0.177,0.125] 12d -O_i_C2_Li1.84O1.94 [0,-1,-2] [0.151,0.375,0.401] 12d -O_i_C2_Li1.86 [0,-1,-2] [0.086,0.375,0.336] 12d -O_i_C3 [0,-1,-2] [0.497,0.497,0.497] 8c +Interstitials Guessed Charges Conv. Cell Coords Wyckoff +--------------- ----------------- ------------------- --------- +Li_i_C1_Ni1.82 [+1,0] [0.021,0.278,0.258] 24e +Li_i_C1_O1.78 [+1,0] [0.233,0.492,0.492] 24e +Li_i_C2_Li1.84 [+1,0] [0.073,0.177,0.125] 12d +Li_i_C2_Li1.87 [+1,0] [0.161,0.375,0.410] 12d +Li_i_C2_Li1.90 [+1,0] [0.074,0.375,0.324] 12d +Li_i_C3 [+1,0] [0.497,0.497,0.497] 8c +Mn_i_C1_Ni1.82 [+4,+3,+2,+1,0] [0.021,0.278,0.258] 24e +Mn_i_C1_O1.78 [+4,+3,+2,+1,0] [0.233,0.492,0.492] 24e +Mn_i_C2_Li1.84 [+4,+3,+2,+1,0] [0.073,0.177,0.125] 12d +Mn_i_C2_Li1.87 [+4,+3,+2,+1,0] [0.161,0.375,0.410] 12d +Mn_i_C2_Li1.90 [+4,+3,+2,+1,0] [0.074,0.375,0.324] 12d +Mn_i_C3 [+4,+3,+2,+1,0] [0.497,0.497,0.497] 8c +Ni_i_C1_Ni1.82 [+4,+3,+2,+1,0] [0.021,0.278,0.258] 24e +Ni_i_C1_O1.78 [+4,+3,+2,+1,0] [0.233,0.492,0.492] 24e +Ni_i_C2_Li1.84 [+4,+3,+2,+1,0] [0.073,0.177,0.125] 12d +Ni_i_C2_Li1.87 [+4,+3,+2,+1,0] [0.161,0.375,0.410] 12d +Ni_i_C2_Li1.90 [+4,+3,+2,+1,0] [0.074,0.375,0.324] 12d +Ni_i_C3 [+4,+3,+2,+1,0] [0.497,0.497,0.497] 8c +O_i_C1_Ni1.82 [0,-1,-2] [0.021,0.278,0.258] 24e +O_i_C1_O1.78 [0,-1,-2] [0.233,0.492,0.492] 24e +O_i_C2_Li1.84 [0,-1,-2] [0.073,0.177,0.125] 12d +O_i_C2_Li1.87 [0,-1,-2] [0.161,0.375,0.410] 12d +O_i_C2_Li1.90 [0,-1,-2] [0.074,0.375,0.324] 12d +O_i_C3 [0,-1,-2] [0.497,0.497,0.497] 8c \n""" "The number in the Wyckoff label is the site multiplicity/degeneracy of that defect in the " "conventional ('conv.') unit cell, which comprises 4 formula unit(s) of Li2Mn3NiO8.\n" @@ -435,8 +435,6 @@ def setUp(self): Cd_i_C3v_Cd2.83Te3.27Cd5.42b [+2,+1,0] [0.000,0.000,0.917] 3a Cd_i_C3v_Te2.83Cd3.27Te5.42a [+2,+1,0] [0.000,0.000,0.167] 3a Cd_i_C3v_Te2.83Cd3.27Te5.42b [+2,+1,0] [0.000,0.000,0.833] 3a -Cd_i_Cs_Cd2.59Cd2.65 [+2,+1,0] [0.069,0.139,0.483] 9b -Cd_i_Cs_Cd2.59Te2.65 [+2,+1,0] [0.139,0.069,0.600] 9b Cd_i_Cs_Cd2.71Te2.71Cd4.25a [+2,+1,0] [0.500,0.500,0.042] 9b Cd_i_Cs_Cd2.71Te2.71Cd4.25b [+2,+1,0] [0.500,0.500,0.375] 9b Cd_i_Cs_Cd2.71Te2.71Cd4.25c [+2,+1,0] [0.500,0.500,0.708] 9b @@ -480,8 +478,6 @@ def setUp(self): Te_i_C3v_Cd2.83Te3.27Cd5.42b [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.917] 3a Te_i_C3v_Te2.83Cd3.27Te5.42a [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.167] 3a Te_i_C3v_Te2.83Cd3.27Te5.42b [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.833] 3a -Te_i_Cs_Cd2.59Cd2.65 [+4,+3,+2,+1,0,-1,-2] [0.069,0.139,0.483] 9b -Te_i_Cs_Cd2.59Te2.65 [+4,+3,+2,+1,0,-1,-2] [0.139,0.069,0.600] 9b Te_i_Cs_Cd2.71Te2.71Cd4.25a [+4,+3,+2,+1,0,-1,-2] [0.500,0.500,0.042] 9b Te_i_Cs_Cd2.71Te2.71Cd4.25b [+4,+3,+2,+1,0,-1,-2] [0.500,0.500,0.375] 9b Te_i_Cs_Cd2.71Te2.71Cd4.25c [+4,+3,+2,+1,0,-1,-2] [0.500,0.500,0.708] 9b @@ -639,7 +635,77 @@ def setUp(self): self.zn3p2 = Structure.from_file(f"{self.data_dir}/Zn3P2_POSCAR") self.sb2se3 = Structure.from_file(f"{self.data_dir}/Sb2Se3_bulk_supercell_POSCAR") + self.sb2se3_defect_gen_info = ( + """Vacancies Guessed Charges Conv. Cell Coords Wyckoff +-------------- ----------------- ------------------- --------- +v_Sb_Cs_Se2.57 [+1,0,-1,-2,-3] [0.537,0.250,0.355] 4c +v_Sb_Cs_Se2.63 [+1,0,-1,-2,-3] [0.328,0.250,0.032] 4c +v_Se_Cs_Sb2.57 [+2,+1,0,-1] [0.628,0.250,0.553] 4c +v_Se_Cs_Sb2.63 [+2,+1,0,-1] [0.192,0.250,0.210] 4c +v_Se_Cs_Sb2.65 [+2,+1,0,-1] [0.445,0.750,0.128] 4c + +Substitutions Guessed Charges Conv. Cell Coords Wyckoff +--------------- --------------------------- ------------------- --------- +Sb_Se_Cs_Sb2.57 [+7,+6,+5,+4,+3,+2,+1,0,-1] [0.628,0.250,0.553] 4c +Sb_Se_Cs_Sb2.63 [+7,+6,+5,+4,+3,+2,+1,0,-1] [0.192,0.250,0.210] 4c +Sb_Se_Cs_Sb2.65 [+7,+6,+5,+4,+3,+2,+1,0,-1] [0.445,0.750,0.128] 4c +Se_Sb_Cs_Se2.57 [+1,0,-1,-2,-3,-4,-5] [0.537,0.250,0.355] 4c +Se_Sb_Cs_Se2.63 [+1,0,-1,-2,-3,-4,-5] [0.328,0.250,0.032] 4c + +Interstitials Guessed Charges Conv. Cell Coords Wyckoff +--------------- --------------------------- ------------------- --------- +Sb_i_C1 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.367,0.043,0.279] 8d +Sb_i_Cs_Sb2.15 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.514,0.250,0.696] 4c +Sb_i_Cs_Sb2.31 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.525,0.250,0.064] 4c +Sb_i_Cs_Sb2.33 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.394,0.250,0.218] 4c +Sb_i_Cs_Sb2.34 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.335,0.250,0.349] 4c +Sb_i_Cs_Sb2.36 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.076,0.250,0.043] 4c +Sb_i_Cs_Sb2.84 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.624,0.250,0.132] 4c +Sb_i_Cs_Se2.36 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.241,0.750,0.112] 4c +Sb_i_Cs_Se2.38 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.293,0.750,0.263] 4c +Se_i_C1 [+4,+3,+2,+1,0,-1,-2] [0.367,0.043,0.279] 8d +Se_i_Cs_Sb2.15 [+4,+3,+2,+1,0,-1,-2] [0.514,0.250,0.696] 4c +Se_i_Cs_Sb2.31 [+4,+3,+2,+1,0,-1,-2] [0.525,0.250,0.064] 4c +Se_i_Cs_Sb2.33 [+4,+3,+2,+1,0,-1,-2] [0.394,0.250,0.218] 4c +Se_i_Cs_Sb2.34 [+4,+3,+2,+1,0,-1,-2] [0.335,0.250,0.349] 4c +Se_i_Cs_Sb2.36 [+4,+3,+2,+1,0,-1,-2] [0.076,0.250,0.043] 4c +Se_i_Cs_Sb2.84 [+4,+3,+2,+1,0,-1,-2] [0.624,0.250,0.132] 4c +Se_i_Cs_Se2.36 [+4,+3,+2,+1,0,-1,-2] [0.241,0.750,0.112] 4c +Se_i_Cs_Se2.38 [+4,+3,+2,+1,0,-1,-2] [0.293,0.750,0.263] 4c +\n""" + "The number in the Wyckoff label is the site multiplicity/degeneracy of that defect in " + "the conventional ('conv.') unit cell, which comprises 4 formula unit(s) of Sb2Se3.\n" + ) self.ag2se = Structure.from_file(f"{self.data_dir}/Ag2Se_POSCAR") + self.ag2se_defect_gen_info = ( + """Vacancies Guessed Charges Conv. Cell Coords Wyckoff +-------------- ----------------- ------------------- --------- +v_Ag_C1 [+1,0,-1] [0.103,0.005,0.244] 4e +v_Ag_C2_Ag2.80 [+1,0,-1] [0.391,0.000,0.000] 2a +v_Ag_C2_Ag2.85 [+1,0,-1] [0.615,0.500,0.500] 2b +v_Se [+2,+1,0,-1] [0.294,0.520,0.251] 4e + +Substitutions Guessed Charges Conv. Cell Coords Wyckoff +--------------- --------------------- ------------------- --------- +Ag_Se [+3,+2,+1,0] [0.294,0.520,0.251] 4e +Se_Ag_C1 [+3,+2,+1,0,-1,-2,-3] [0.103,0.005,0.244] 4e +Se_Ag_C2_Ag2.80 [+3,+2,+1,0,-1,-2,-3] [0.391,0.000,0.000] 2a +Se_Ag_C2_Ag2.85 [+3,+2,+1,0,-1,-2,-3] [0.615,0.500,0.500] 2b + +Interstitials Guessed Charges Conv. Cell Coords Wyckoff +--------------- ----------------- ------------------- --------- +Ag_i_C1_Ag2.04 [+2,+1,0] [0.335,0.435,0.002] 4e +Ag_i_C1_Ag2.09 [+2,+1,0] [0.435,0.123,0.251] 4e +Ag_i_C2_Ag2.02 [+2,+1,0] [0.500,0.250,0.319] 2d +Ag_i_C2_Ag2.48 [+2,+1,0] [0.091,0.500,0.500] 2b +Se_i_C1_Ag2.04 [0,-1,-2] [0.335,0.435,0.002] 4e +Se_i_C1_Ag2.09 [0,-1,-2] [0.435,0.123,0.251] 4e +Se_i_C2_Ag2.02 [0,-1,-2] [0.500,0.250,0.319] 2d +Se_i_C2_Ag2.48 [0,-1,-2] [0.091,0.500,0.500] 2b +\n""" + "The number in the Wyckoff label is the site multiplicity/degeneracy of that defect in " + "the conventional ('conv.') unit cell, which comprises 4 formula unit(s) of Ag2Se.\n" + ) self.sb2si2te6 = Structure.from_file(f"{self.data_dir}/Sb2Si2Te6_POSCAR") self.sb2si2te6_defect_gen_info = ( @@ -660,18 +726,18 @@ def setUp(self): Interstitials Guessed Charges Conv. Cell Coords Wyckoff --------------- --------------------------- ------------------- --------- -Si_i_C1_Si2.20 [+4,+3,+2,+1,0] [0.179,0.359,0.167] 18f -Si_i_C1_Si2.43 [+4,+3,+2,+1,0] [0.341,0.341,0.458] 18f +Si_i_C1_Si2.21 [+4,+3,+2,+1,0] [0.158,0.359,0.167] 18f +Si_i_C1_Si2.48 [+4,+3,+2,+1,0] [0.348,0.348,0.457] 18f Si_i_C1_Te2.44 [+4,+3,+2,+1,0] [0.336,0.335,0.711] 18f Si_i_C3_Si2.64 [+4,+3,+2,+1,0] [0.000,0.000,0.318] 6c Si_i_C3i_Te2.81 [+4,+3,+2,+1,0] [0.000,0.000,0.000] 3a -Sb_i_C1_Si2.20 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.179,0.359,0.167] 18f -Sb_i_C1_Si2.43 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.341,0.341,0.458] 18f +Sb_i_C1_Si2.21 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.158,0.359,0.167] 18f +Sb_i_C1_Si2.48 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.348,0.348,0.457] 18f Sb_i_C1_Te2.44 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.336,0.335,0.711] 18f Sb_i_C3_Si2.64 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.000,0.000,0.318] 6c Sb_i_C3i_Te2.81 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.000,0.000,0.000] 3a -Te_i_C1_Si2.20 [+4,+3,+2,+1,0,-1,-2] [0.179,0.359,0.167] 18f -Te_i_C1_Si2.43 [+4,+3,+2,+1,0,-1,-2] [0.341,0.341,0.458] 18f +Te_i_C1_Si2.21 [+4,+3,+2,+1,0,-1,-2] [0.158,0.359,0.167] 18f +Te_i_C1_Si2.48 [+4,+3,+2,+1,0,-1,-2] [0.348,0.348,0.457] 18f Te_i_C1_Te2.44 [+4,+3,+2,+1,0,-1,-2] [0.336,0.335,0.711] 18f Te_i_C3_Si2.64 [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.318] 6c Te_i_C3i_Te2.81 [+4,+3,+2,+1,0,-1,-2] [0.000,0.000,0.000] 3a @@ -735,6 +801,7 @@ def _load_and_test_defect_gen_jsons(self, defect_gen): if_present_rm(default_json_filename) def _general_defect_gen_check(self, defect_gen, charge_states_removed=False): + print("Checking general DefectsGenerator attributes") assert self.structure_matcher.fit( defect_gen.primitive_structure * defect_gen.supercell_matrix, defect_gen.bulk_supercell, @@ -755,7 +822,8 @@ def _general_defect_gen_check(self, defect_gen, charge_states_removed=False): defect_entry_name in defect_gen for defect_entry_name in defect_gen.defect_entries # __contains__() ) - # test no unwanted structure reordering + + print("Checking structure (re)ordering") # test no unwanted structure reordering if not np.isclose(abs(np.linalg.det(defect_gen.supercell_matrix)), 1): # if input structure is # unordered/defective and used as supercell, then primitive_structure and bulk_supercell # attributes can be unordered, fine as not directly used for POSCAR file generation @@ -772,6 +840,7 @@ def _general_defect_gen_check(self, defect_gen, charge_states_removed=False): assert np.isclose(defect_gen.min_image_distance, get_min_image_distance(defect_gen.bulk_supercell)) + print("Checking Defect/DefectEntry types") assert all(defect.defect_type == DefectType.Vacancy for defect in defect_gen.defects["vacancies"]) if "substitutions" in defect_gen.defects: assert all( @@ -782,16 +851,17 @@ def _general_defect_gen_check(self, defect_gen, charge_states_removed=False): defect.defect_type == DefectType.Interstitial for defect in defect_gen.defects.get("interstitials", []) ) - assert sum(vacancy.multiplicity for vacancy in defect_gen.defects["vacancies"]) == len( - defect_gen.primitive_structure - ) - assert all( isinstance(defect_entry, DefectEntry) for defect_entry in defect_gen.defect_entries.values() ) + print("Checking vacancy multiplicity") + assert sum(vacancy.multiplicity for vacancy in defect_gen.defects["vacancies"]) == len( + defect_gen.primitive_structure + ) for defect_list in defect_gen.defects.values(): for defect in defect_list: + print(f"Checking {defect.defect_type} {defect.name} attributes") assert isinstance(defect, Defect) assert defect.conv_cell_frac_coords in defect.equiv_conv_cell_frac_coords assert defect.structure == defect_gen.primitive_structure @@ -825,6 +895,7 @@ def _general_defect_gen_check(self, defect_gen, charge_states_removed=False): ) def _check_defect_entry(self, defect_entry, defect_name, defect_gen, charge_states_removed=False): + print(f"Checking DefectEntry {defect_name} attributes") assert defect_entry.name == defect_name assert defect_entry.charge_state == int(defect_name.split("_")[-1]) assert defect_entry.wyckoff @@ -841,87 +912,95 @@ def _check_defect_entry(self, defect_entry, defect_name, defect_gen, charge_stat defect_entry.sc_entry.structure.lattice.matrix, defect_gen.bulk_supercell.lattice.matrix, ) - sga = SpacegroupAnalyzer(defect_gen.structure) - reoriented_conv_structure = swap_axes( - sga.get_conventional_standard_structure(), defect_gen._BilbaoCS_conv_cell_vector_mapping - ) - assert np.array_equal( - defect_entry.conventional_structure.lattice.matrix, - defect_entry.defect.conventional_structure.lattice.matrix, - ) - assert np.allclose( - defect_entry.conventional_structure.lattice.matrix, - reoriented_conv_structure.lattice.matrix, - ) - # test no unwanted structure reordering - for structure in [ - defect_entry.defect_supercell, - defect_entry.bulk_supercell, - defect_entry.sc_entry.structure, - defect_entry.conventional_structure, - ]: - assert len(Poscar(structure).site_symbols) == len( - set(Poscar(structure).site_symbols) - ) # no duplicates - - # get minimum distance of defect_entry.conv_cell_frac_coords to any site in - # defect_entry.conventional_structure - distance_matrix = defect_entry.conventional_structure.lattice.get_all_distances( - defect_entry.conventional_structure.frac_coords, defect_entry.conv_cell_frac_coords - )[:, 0] - min_dist = distance_matrix[distance_matrix > 0.01].min() - if defect_gen.interstitial_gen_kwargs is not False: - assert min_dist > defect_gen.interstitial_gen_kwargs.get( - "min_dist", 0.9 - ) # default min_dist = 0.9 - for conv_cell_frac_coords in defect_entry.equiv_conv_cell_frac_coords: + + # only run more intensive checks on neutral entries, as charged entries are just copies of this + if defect_entry.charge_state == 0: + sga = SpacegroupAnalyzer(defect_gen.structure) + reoriented_conv_structure = swap_axes( + sga.get_conventional_standard_structure(), defect_gen._BilbaoCS_conv_cell_vector_mapping + ) + assert np.array_equal( + defect_entry.conventional_structure.lattice.matrix, + defect_entry.defect.conventional_structure.lattice.matrix, + ) + assert np.allclose( + defect_entry.conventional_structure.lattice.matrix, + reoriented_conv_structure.lattice.matrix, + ) + # test no unwanted structure reordering + for structure in [ + defect_entry.defect_supercell, + defect_entry.bulk_supercell, + defect_entry.sc_entry.structure, + defect_entry.conventional_structure, + ]: + assert len(Poscar(structure).site_symbols) == len( + set(Poscar(structure).site_symbols) + ) # no duplicates + + # get minimum distance of defect_entry.conv_cell_frac_coords to any site in + # defect_entry.conventional_structure distance_matrix = defect_entry.conventional_structure.lattice.get_all_distances( - defect_entry.conventional_structure.frac_coords, conv_cell_frac_coords + defect_entry.conventional_structure.frac_coords, defect_entry.conv_cell_frac_coords )[:, 0] - equiv_min_dist = distance_matrix[distance_matrix > 0.01].min() - assert np.isclose(min_dist, equiv_min_dist, atol=0.01) - - # test equivalent_sites for defects: - assert len(defect_entry.defect.equivalent_sites) == defect_entry.defect.multiplicity - assert defect_entry.defect.site in defect_entry.defect.equivalent_sites - for equiv_site in defect_entry.defect.equivalent_sites: - nearest_atoms = defect_entry.defect.structure.get_sites_in_sphere( - equiv_site.coords, - 5, - ) - nn_distances = np.array([nn.distance_from_point(equiv_site.coords) for nn in nearest_atoms]) - nn_distance = min(nn_distances[nn_distances > 0.01]) # minimum nonzero distance - print(defect_entry.name, equiv_site.coords, nn_distance, min_dist) - assert np.isclose(min_dist, nn_distance, atol=0.01) # same min_dist as from - # conv_cell_frac_coords testing above + min_dist = distance_matrix[distance_matrix > 0.01].min() + if defect_gen.interstitial_gen_kwargs is not False: + assert min_dist > defect_gen.interstitial_gen_kwargs.get( + "min_dist", 0.9 + ) # default min_dist = 0.9 + for conv_cell_frac_coords in defect_entry.equiv_conv_cell_frac_coords: + distance_matrix = defect_entry.conventional_structure.lattice.get_all_distances( + defect_entry.conventional_structure.frac_coords, conv_cell_frac_coords + )[:, 0] + equiv_min_dist = distance_matrix[distance_matrix > 0.01].min() + assert np.isclose(min_dist, equiv_min_dist, atol=0.01) + + # test equivalent_sites for defects: + assert len(defect_entry.defect.equivalent_sites) == defect_entry.defect.multiplicity + assert defect_entry.defect.site in defect_entry.defect.equivalent_sites + for equiv_site in defect_entry.defect.equivalent_sites: + nearest_atoms = defect_entry.defect.structure.get_sites_in_sphere( + equiv_site.coords, + 5, + ) + nn_distances = np.array( + [nn.distance_from_point(equiv_site.coords) for nn in nearest_atoms] + ) + nn_distance = min(nn_distances[nn_distances > 0.01]) # minimum nonzero distance + print(defect_entry.name, equiv_site.coords, nn_distance, min_dist) + assert np.isclose(min_dist, nn_distance, atol=0.01) # same min_dist as from + # conv_cell_frac_coords testing above - assert np.allclose( - defect_entry.bulk_supercell.lattice.matrix, defect_gen.bulk_supercell.lattice.matrix - ) - num_prim_cells_in_conv_cell = len(defect_entry.conventional_structure) / len( - defect_entry.defect.structure - ) - assert defect_entry.defect.multiplicity * num_prim_cells_in_conv_cell == int( - defect_entry.wyckoff[:-1] - ) - assert len(defect_entry.equiv_conv_cell_frac_coords) == int(defect_entry.wyckoff[:-1]) - assert len(defect_entry.defect.equiv_conv_cell_frac_coords) == int(defect_entry.wyckoff[:-1]) - assert any( - np.array_equal(conv_coords_array, defect_entry.conv_cell_frac_coords) - for conv_coords_array in defect_entry.equiv_conv_cell_frac_coords - ) - for equiv_conv_cell_frac_coords in defect_entry.equiv_conv_cell_frac_coords: + assert np.allclose( + defect_entry.bulk_supercell.lattice.matrix, defect_gen.bulk_supercell.lattice.matrix + ) + num_prim_cells_in_conv_cell = len(defect_entry.conventional_structure) / len( + defect_entry.defect.structure + ) + assert defect_entry.defect.multiplicity * num_prim_cells_in_conv_cell == int( + defect_entry.wyckoff[:-1] + ) + assert len(defect_entry.equiv_conv_cell_frac_coords) == int(defect_entry.wyckoff[:-1]) + assert len(defect_entry.defect.equiv_conv_cell_frac_coords) == int(defect_entry.wyckoff[:-1]) assert any( - np.array_equal(equiv_conv_cell_frac_coords, x) - for x in defect_entry.defect.equiv_conv_cell_frac_coords + np.array_equal(conv_coords_array, defect_entry.conv_cell_frac_coords) + for conv_coords_array in defect_entry.equiv_conv_cell_frac_coords ) - assert len(defect_entry.equivalent_supercell_sites) == int(defect_entry.wyckoff[:-1]) * ( - len(defect_entry.bulk_supercell) / len(defect_entry.conventional_structure) - ) - assert defect_entry.defect_supercell_site in defect_entry.equivalent_supercell_sites - assert np.isclose( - defect_entry.defect_supercell_site.frac_coords, defect_entry.sc_defect_frac_coords, atol=1e-5 - ).all() + for equiv_conv_cell_frac_coords in defect_entry.equiv_conv_cell_frac_coords: + assert any( + np.array_equal(equiv_conv_cell_frac_coords, x) + for x in defect_entry.defect.equiv_conv_cell_frac_coords + ) + assert len(defect_entry.equivalent_supercell_sites) == int(defect_entry.wyckoff[:-1]) * ( + len(defect_entry.bulk_supercell) / len(defect_entry.conventional_structure) + ) + assert defect_entry.defect_supercell_site in defect_entry.equivalent_supercell_sites + assert np.isclose( + defect_entry.defect_supercell_site.frac_coords, + defect_entry.sc_defect_frac_coords, + atol=1e-5, + ).all() + assert defect_entry.bulk_entry is None # Sb2Se3 and Ag2Se are the 2 test cases included so far where the lattice vectors swap, @@ -1012,6 +1091,7 @@ def _random_equiv_supercell_sites_check(self, defect_entry): ) def _check_editing_defect_gen(self, random_defect_entry_name, defect_gen): + print(f"Checking editing DefectsGenerator, using {random_defect_entry_name}") assert ( defect_gen[random_defect_entry_name] == defect_gen.defect_entries[random_defect_entry_name] ) # __getitem__() @@ -1103,7 +1183,7 @@ def _check_Se_Te(extrinsic_CdTe_defect_gen, element="Se", idx=-1): assert extrinsic_CdTe_defect_gen.defects["substitutions"][idx].oxi_state == 0 assert extrinsic_CdTe_defect_gen.defects["substitutions"][idx].multiplicity == 1 assert extrinsic_CdTe_defect_gen.defects["substitutions"][idx].defect_site == PeriodicSite( - "Te", [0.25, 0.25, 0.25], extrinsic_CdTe_defect_gen.primitive_structure.lattice + "Te2-", [0.25, 0.25, 0.25], extrinsic_CdTe_defect_gen.primitive_structure.lattice ) assert str(extrinsic_CdTe_defect_gen.defects["substitutions"][idx].site.specie) == f"{element}" assert np.isclose( @@ -1804,10 +1884,10 @@ def CdTe_defect_gen_check(self, CdTe_defect_gen, generate_supercell=True): assert CdTe_defect_gen.defects["vacancies"][0].oxi_state == -2 assert CdTe_defect_gen.defects["vacancies"][0].multiplicity == 1 assert CdTe_defect_gen.defects["vacancies"][0].defect_site == PeriodicSite( - "Cd", [0, 0, 0], CdTe_defect_gen.primitive_structure.lattice + "Cd2+", [0, 0, 0], CdTe_defect_gen.primitive_structure.lattice ) assert CdTe_defect_gen.defects["vacancies"][0].site == PeriodicSite( - "Cd", [0, 0, 0], CdTe_defect_gen.primitive_structure.lattice + "Cd2+", [0, 0, 0], CdTe_defect_gen.primitive_structure.lattice ) assert ( len(CdTe_defect_gen.defects["vacancies"][0].equiv_conv_cell_frac_coords) == 4 @@ -2018,10 +2098,10 @@ def CdTe_defect_gen_check(self, CdTe_defect_gen, generate_supercell=True): assert CdTe_defect_gen.defect_entries["v_Cd_0"].wyckoff == "4a" assert CdTe_defect_gen.defect_entries["v_Cd_0"].defect.defect_type == DefectType.Vacancy assert CdTe_defect_gen.defect_entries["v_Cd_0"].defect.defect_site == PeriodicSite( - "Cd", [0, 0, 0], CdTe_defect_gen.primitive_structure.lattice + "Cd2+", [0, 0, 0], CdTe_defect_gen.primitive_structure.lattice ) assert CdTe_defect_gen.defect_entries["v_Cd_0"].defect.site == PeriodicSite( - "Cd", [0, 0, 0], CdTe_defect_gen.primitive_structure.lattice + "Cd2+", [0, 0, 0], CdTe_defect_gen.primitive_structure.lattice ) assert np.allclose( CdTe_defect_gen.defect_entries["v_Cd_0"].conv_cell_frac_coords, @@ -2432,7 +2512,7 @@ def lmno_defect_gen_check(self, lmno_defect_gen, generate_supercell=True): lmno_defect_gen.defect_entries["Ni_i_C1_O1.78_+2"].defect.multiplicity == 24 ) # prim = conv structure in LMNO sc_frac_coords = np.array( - [0.378186, 0.622212, 0.61125] if generate_supercell else [0.500397, 0.510568, 0.766541] + [0.375325, 0.391795, 0.616475] if generate_supercell else [0.23288, 0.4918, 0.49173] ) assert np.allclose( lmno_defect_gen.defect_entries["Ni_i_C1_O1.78_+2"].sc_defect_frac_coords, @@ -2444,12 +2524,12 @@ def lmno_defect_gen_check(self, lmno_defect_gen, generate_supercell=True): ) assert np.allclose( lmno_defect_gen.defect_entries["Ni_i_C1_O1.78_+2"].conv_cell_frac_coords, - np.array([0.017, 0.261, 0.250]), + np.array([0.233, 0.492, 0.492]), atol=1e-3, ) assert np.allclose( lmno_defect_gen.defect_entries["Ni_i_C1_O1.78_+2"].defect.site.frac_coords, - np.array([0.017, 0.261, 0.250]), + np.array([0.233, 0.492, 0.492]), atol=1e-3, ) @@ -2872,7 +2952,7 @@ def cd_i_CdTe_supercell_defect_gen_check(self, cd_i_defect_gen): assert len(cd_i_defect_gen.defects) == 3 # vacancies, substitutions, interstitials assert len(cd_i_defect_gen.defects["vacancies"]) == 21 assert len(cd_i_defect_gen.defects["substitutions"]) == 21 - assert len(cd_i_defect_gen.defects["interstitials"]) == 90 + assert len(cd_i_defect_gen.defects["interstitials"]) == 86 # explicitly test some relevant defect attributes assert cd_i_defect_gen.defects["vacancies"][1].name == "v_Cd" @@ -3089,46 +3169,7 @@ def test_charge_state_guessing(self): # incorrectly determined if the pymatgen/spglib lattice vectors aren't swapped to match the BCS # convention, so this is a good check: - assert ( - ( - """Vacancies Guessed Charges Conv. Cell Coords Wyckoff --------------- ----------------- ------------------- --------- -v_Sb_Cs_Se2.57 [+1,0,-1,-2,-3] [0.537,0.250,0.355] 4c -v_Sb_Cs_Se2.63 [+1,0,-1,-2,-3] [0.328,0.250,0.032] 4c -v_Se_Cs_Sb2.57 [+2,+1,0,-1] [0.628,0.250,0.553] 4c -v_Se_Cs_Sb2.63 [+2,+1,0,-1] [0.192,0.250,0.210] 4c -v_Se_Cs_Sb2.65 [+2,+1,0,-1] [0.445,0.750,0.128] 4c - -Substitutions Guessed Charges Conv. Cell Coords Wyckoff ---------------- --------------------------- ------------------- --------- -Sb_Se_Cs_Sb2.57 [+7,+6,+5,+4,+3,+2,+1,0,-1] [0.628,0.250,0.553] 4c -Sb_Se_Cs_Sb2.63 [+7,+6,+5,+4,+3,+2,+1,0,-1] [0.192,0.250,0.210] 4c -Sb_Se_Cs_Sb2.65 [+7,+6,+5,+4,+3,+2,+1,0,-1] [0.445,0.750,0.128] 4c -Se_Sb_Cs_Se2.57 [+1,0,-1,-2,-3,-4,-5] [0.537,0.250,0.355] 4c -Se_Sb_Cs_Se2.63 [+1,0,-1,-2,-3,-4,-5] [0.328,0.250,0.032] 4c - -Interstitials Guessed Charges Conv. Cell Coords Wyckoff --------------------- --------------------------- ------------------- --------- -Sb_i_C1 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.367,0.043,0.279] 8d -Sb_i_Cs_Sb2.14 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.503,0.250,0.700] 4c -Sb_i_Cs_Sb2.34 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.335,0.250,0.349] 4c -Sb_i_Cs_Sb2.83 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.627,0.250,0.132] 4c -Sb_i_Cs_Se2.32Sb2.33 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.393,0.250,0.217] 4c -Sb_i_Cs_Se2.32Sb2.40 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.074,0.250,0.035] 4c -Sb_i_Cs_Se2.38 [+5,+4,+3,+2,+1,0,-1,-2,-3] [0.293,0.750,0.263] 4c -Se_i_C1 [+4,+3,+2,+1,0,-1,-2] [0.367,0.043,0.279] 8d -Se_i_Cs_Sb2.14 [+4,+3,+2,+1,0,-1,-2] [0.503,0.250,0.700] 4c -Se_i_Cs_Sb2.34 [+4,+3,+2,+1,0,-1,-2] [0.335,0.250,0.349] 4c -Se_i_Cs_Sb2.83 [+4,+3,+2,+1,0,-1,-2] [0.627,0.250,0.132] 4c -Se_i_Cs_Se2.32Sb2.33 [+4,+3,+2,+1,0,-1,-2] [0.393,0.250,0.217] 4c -Se_i_Cs_Se2.32Sb2.40 [+4,+3,+2,+1,0,-1,-2] [0.074,0.250,0.035] 4c -Se_i_Cs_Se2.38 [+4,+3,+2,+1,0,-1,-2] [0.293,0.750,0.263] 4c -\n""" - "The number in the Wyckoff label is the site multiplicity/degeneracy of that defect in " - "the conventional ('conv.') unit cell, which comprises 4 formula unit(s) of Sb2Se3.\n" - ) - in output - ) + assert self.sb2se3_defect_gen_info in output # test get_defect_name_from_entry relaxed/unrelaxed warnings: with warnings.catch_warnings(record=True) as w: @@ -3157,39 +3198,7 @@ def test_lattice_vector_swapping(self): ag2se_defect_gen, output = self._generate_and_test_no_warnings(self.ag2se) self._general_defect_gen_check(ag2se_defect_gen) - assert ( - ( - """Vacancies Guessed Charges Conv. Cell Coords Wyckoff --------------- ----------------- ------------------- --------- -v_Ag_C1 [+1,0,-1] [0.103,0.005,0.244] 4e -v_Ag_C2_Ag2.80 [+1,0,-1] [0.391,0.000,0.000] 2a -v_Ag_C2_Ag2.85 [+1,0,-1] [0.615,0.500,0.500] 2b -v_Se [+2,+1,0,-1] [0.294,0.520,0.251] 4e - -Substitutions Guessed Charges Conv. Cell Coords Wyckoff ---------------- --------------------- ------------------- --------- -Ag_Se [+3,+2,+1,0] [0.294,0.520,0.251] 4e -Se_Ag_C1 [+3,+2,+1,0,-1,-2,-3] [0.103,0.005,0.244] 4e -Se_Ag_C2_Ag2.80 [+3,+2,+1,0,-1,-2,-3] [0.391,0.000,0.000] 2a -Se_Ag_C2_Ag2.85 [+3,+2,+1,0,-1,-2,-3] [0.615,0.500,0.500] 2b - -Interstitials Guessed Charges Conv. Cell Coords Wyckoff ---------------- ----------------- ------------------- --------- -Ag_i_C1_Ag2.04 [+2,+1,0] [0.335,0.435,0.002] 4e -Ag_i_C1_Ag2.05 [+2,+1,0] [0.405,0.109,0.250] 4e -Ag_i_C2_Ag2.02 [+2,+1,0] [0.500,0.250,0.319] 2d -Ag_i_C2_Ag2.48 [+2,+1,0] [0.091,0.500,0.500] 2b -Se_i_C1_Ag2.04 [0,-1,-2] [0.335,0.435,0.002] 4e -Se_i_C1_Ag2.05 [0,-1,-2] [0.405,0.109,0.250] 4e -Se_i_C2_Ag2.02 [0,-1,-2] [0.500,0.250,0.319] 2d -Se_i_C2_Ag2.48 [0,-1,-2] [0.091,0.500,0.500] 2b -\n""" - "The number in the Wyckoff label is the site multiplicity/degeneracy of that defect in " - "the conventional ('conv.') unit cell, which comprises 4 formula unit(s) of Ag2Se.\n" - ) - in output - ) - + assert self.ag2se_defect_gen_info in output assert ag2se_defect_gen._BilbaoCS_conv_cell_vector_mapping == [1, 0, 2] def test_sb2si2te6(self): @@ -3469,9 +3478,7 @@ def test_unrecognised_gen_kwargs(self): with pytest.raises(TypeError) as exc: DefectsGenerator(self.prim_cdte, interstitial_gen_kwargs={"unrecognised_kwarg": 1}) - assert ( # TopographyAnalyzer.__init__() but only __init__() shown in python 3.9 - "__init__() got an unexpected keyword argument 'unrecognised_kwarg'" in str(exc.value) - ) + assert "interstitial_gen_kwargs must only contain the following keys" in str(exc.value) with pytest.raises(TypeError) as exc: DefectsGenerator(self.prim_cdte, charge_state_gen_kwargs={"unrecognised_kwarg": 1})