diff --git a/electronicparsers/fhiaims/metainfo/fhi_aims.py b/electronicparsers/fhiaims/metainfo/fhi_aims.py index 72034998..3749136a 100644 --- a/electronicparsers/fhiaims/metainfo/fhi_aims.py +++ b/electronicparsers/fhiaims/metainfo/fhi_aims.py @@ -88,6 +88,30 @@ class x_fhi_aims_section_controlIn_basis_func(MSection): - ''') + x_fhi_aims_controlIn_basis_func_gauss_l = Quantity( + type=np.dtype(np.int32), + shape=[], + description=''' + "L is an integer number, specifying the angular momentum" + - Manual FHI-aims v201716_2 + ''') + + x_fhi_aims_controlIn_basis_func_gauss_alphas = Quantity( + type=np.dtype(np.float64), + shape=['*'], + unit='1 / meter ** 2', + description=''' + "The exponent defining a (primitive) Gaussian function" + - Manual FHI-aims v201716_2 + ''') + + x_fhi_aims_controlIn_basis_func_gauss_coeffs = Quantity( + type=np.dtype(np.float64), + shape=['*'], + description=''' + Weights in linearly composed Gaussian functions. + ''') + x_fhi_aims_controlIn_basis_func_type = Quantity( type=str, shape=[], @@ -145,20 +169,6 @@ class x_fhi_aims_section_controlIn_basis_set(MSection): angular leven for the hartreee part ''') - x_fhi_aims_controlIn_mass = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - mass of the nucleus in atomic mass units - ''') - - x_fhi_aims_controlIn_nucleus = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - charge of the nucleus - ''') - x_fhi_aims_controlIn_outer_grid = Quantity( type=np.dtype(np.float64), shape=[], @@ -180,84 +190,22 @@ class x_fhi_aims_section_controlIn_basis_set(MSection): radial multiplier ''') - x_fhi_aims_controlIn_species_name = Quantity( - type=str, - shape=[], - description=''' - - - ''') - - x_fhi_aims_section_controlIn_basis_func = SubSection( - sub_section=SectionProxy('x_fhi_aims_section_controlIn_basis_func'), - repeats=True) - - -class x_fhi_aims_section_controlInOut_atom_species(MSection): - ''' - - - ''' - - m_def = Section(validate=False) - - x_fhi_aims_controlInOut_pure_gaussian = Quantity( + x_fhi_aims_controlIn_hash = Quantity( type=str, shape=[], description=''' - ''') - x_fhi_aims_controlInOut_species_charge = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='coulomb', - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_cut_pot_scale = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_cut_pot_width = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='meter', - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_cut_pot = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='meter', - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_mass = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='kilogram', - description=''' - - - ''') - - x_fhi_aims_controlInOut_species_name = Quantity( + x_fhi_aims_controlIn_species_name = Quantity( type=str, shape=[], description=''' - ''') - x_fhi_aims_section_controlInOut_basis_func = SubSection( - sub_section=SectionProxy('x_fhi_aims_section_controlInOut_basis_func'), - repeats=True) - - x_fhi_aims_section_vdW_TS = SubSection( - sub_section=SectionProxy('x_fhi_aims_section_vdW_TS'), + x_fhi_aims_section_controlIn_basis_func = SubSection( + sub_section=SectionProxy('x_fhi_aims_section_controlIn_basis_func'), repeats=True) @@ -275,49 +223,6 @@ class x_fhi_aims_section_controlInOut_basis_func(MSection): - ''') - x_fhi_aims_controlInOut_basis_func_gauss_alpha = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='1 / meter ** 2', - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_gauss_l = Quantity( - type=np.dtype(np.int32), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_gauss_N = Quantity( - type=np.dtype(np.int32), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_gauss_weight = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_l = Quantity( - type=str, - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_n = Quantity( - type=np.dtype(np.int32), - shape=[], - description=''' - - - ''') - x_fhi_aims_controlInOut_basis_func_occ = Quantity( type=np.dtype(np.float64), shape=[], @@ -325,28 +230,6 @@ class x_fhi_aims_section_controlInOut_basis_func(MSection): - ''') - x_fhi_aims_controlInOut_basis_func_primitive_gauss_alpha = Quantity( - type=np.dtype(np.float64), - shape=[], - unit='1 / meter ** 2', - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_radius = Quantity( - type=np.dtype(np.float64), - shape=[], - description=''' - - - ''') - - x_fhi_aims_controlInOut_basis_func_type = Quantity( - type=str, - shape=[], - description=''' - - - ''') - class x_fhi_aims_section_eigenvalues_group_ZORA(MSection): ''' @@ -1386,10 +1269,46 @@ class AtomParameters(simulation.method.AtomParameters): m_def = Section(validate=False, extends_base_section=True) - x_fhi_aims_section_controlInOut_atom_species = SubSection( - sub_section=SectionProxy('x_fhi_aims_section_controlInOut_atom_species'), + x_fhi_aims_controlInOut_pure_gaussian = Quantity( + type=str, + shape=[], + description=''' + - + ''') + + x_fhi_aims_controlInOut_species_cut_pot_scale = Quantity( + type=np.dtype(np.float64), + shape=[], + description=''' + - + ''') + + x_fhi_aims_controlInOut_species_cut_pot_width = Quantity( + type=np.dtype(np.float64), + shape=[], + unit='meter', + description=''' + - + ''') + + x_fhi_aims_controlInOut_species_cut_pot = Quantity( + type=np.dtype(np.float64), + shape=[], + unit='meter', + description=''' + - + ''') + + x_fhi_aims_section_vdW_TS = SubSection( + sub_section=SectionProxy('x_fhi_aims_section_vdW_TS'), repeats=True) + x_fhi_aims_section_controlIn_basis_set = Quantity( + type=Reference(SectionProxy('x_fhi_aims_section_controlIn_basis_set')), + shape=[], + description='''-''', + ) + class HubbardKanamoriModel(simulation.method.HubbardKanamoriModel): diff --git a/electronicparsers/fhiaims/parser.py b/electronicparsers/fhiaims/parser.py index 55f3adf3..9642a276 100644 --- a/electronicparsers/fhiaims/parser.py +++ b/electronicparsers/fhiaims/parser.py @@ -51,10 +51,9 @@ from .metainfo.fhi_aims import Run as xsection_run, Method as xsection_method,\ x_fhi_aims_section_parallel_task_assignement, x_fhi_aims_section_parallel_tasks,\ x_fhi_aims_section_controlIn_basis_set, x_fhi_aims_section_controlIn_basis_func,\ - x_fhi_aims_section_controlInOut_atom_species, x_fhi_aims_section_controlInOut_basis_func,\ x_fhi_aims_section_vdW_TS -from ..utils import BeyondDFTWorkflowsParser +from ..utils import BeyondDFTWorkflowsParser, hash_section re_float = r'[-+]?\d+\.\d*(?:[Ee][-+]\d+)?' @@ -77,26 +76,27 @@ def str_to_unit(val_in): def init_quantities(self): def str_to_species(val_in): - val = val_in.strip().splitlines() - data = [] - species = dict() - for v in val: - v = v.strip().split('#')[0] - if not v or not v[0].isalpha(): + lines = [] + line = '' + val_in = val_in.strip().splitlines()[:-1] + val_in.reverse() + for v in val_in: + line = v.strip().split('#')[0].replace('.d', '.e') + ' '+ line + if not line: continue - if v.startswith('species'): - if species: - data.append(species) - species = dict(species=v.split()[1:]) + if line[0].isalpha(): + lines = [line.split()] + lines + if line.startswith('species'): + break + line = '' + species = {} + for line in lines: + content = [line[1]] if len(line) == 2 else [line[1:]] + if line[0] in species: + species[line[0]].extend(content) else: - v = v.replace('.d', '.e').split() - vi = v[1] if len(v[1:]) == 1 else v[1:] - if v[0] in species: - species[v[0]].extend([vi]) - else: - species[v[0]] = [vi] - data.append(species) - return data + species[line[0]] = content + return species self._quantities = [ Quantity( @@ -151,12 +151,12 @@ def str_to_species(val_in): xsection_method.x_fhi_aims_controlIn_verbatim_writeout, rf'{re_n} *verbatim_writeout\s*([\w]+)', repeats=False), Quantity( - 'xc', - rf'{re_n} *xc\s*([\w\. \-\+]+)', repeats=False), + 'xc', rf'{re_n} *xc\s*([\w\. \-\+]+)', repeats=False), Quantity( - 'species', rf'{re_n} *(species\s*[A-Z][a-z]?[\s\S]+?)' - r'(?:species\s*[A-Z][a-z]?|Completed|\-{10})', - str_operation=str_to_species, repeats=False)] + 'species', rf'{re_n} *(species\s+[A-Z][a-z]?[\s\S]+?)' + r'(FHI-aims code project|\-{10})', + str_operation=str_to_species, repeats=True,), + ] class FHIAimsOutParser(TextParser): @@ -794,7 +794,7 @@ def __init__(self): 'Hybrid M11 gradient-corrected functionals': [{'name': 'MGGA_C_M11'}, {'name': 'HYB_MGGA_X_M11'}]} # TODO update metainfo to reflect all energy corrections - # why section_vdW_TS under x_fhi_aims_section_controlInOut_atom_species? + # why section_vdW_TS under atom_parameter? self._energy_map = { 'Total energy uncorrected': 'energy_total', 'Total energy corrected': 'energy_total_t0', @@ -1294,7 +1294,6 @@ def parse_scf(iteration): def parse_vdW(section): # these are not actually vdW outputs but vdW control parameters but are # printed within the calculation section. - # TODO why is x_fhi_aims_section_vdW_TS under x_fhi_aims_section_controlInOut_atom_species # we would then have to split the vdW parameters by species atoms = section.get('vdW_TS', {}).get('atom_hirshfeld', []) if not atoms: @@ -1307,8 +1306,7 @@ def parse_vdW(section): for sec in sec_atom_type: for atom in atoms: if sec.label == atom['atom']: - sec_vdW_ts = sec.x_fhi_aims_section_controlInOut_atom_species[-1].m_create( - x_fhi_aims_section_vdW_TS) + sec_vdW_ts = sec.m_create(x_fhi_aims_section_vdW_TS) for key, val in atom.items(): metainfo_name = self._property_map.get(key, None) if metainfo_name is None: @@ -1549,20 +1547,32 @@ def parse_basis_set(species): elif key == 'division': pass elif key in basis_funcs: - for i in range(len(val)): + for v in val: sec_basis_func = sec_basis_set.m_create( x_fhi_aims_section_controlIn_basis_func) sec_basis_func.x_fhi_aims_controlIn_basis_func_type = key - sec_basis_func.x_fhi_aims_controlIn_basis_func_n = int(val[i][0]) - sec_basis_func.x_fhi_aims_controlIn_basis_func_l = str(val[i][1]) - if len(val[i]) == 3 and hasattr(val[i][2], 'real'): - sec_basis_func.x_fhi_aims_controlIn_basis_func_radius = val[i][2] + if key == 'gaussian': + sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_l = int(v[0]) + gauss_alphas, gauss_coeffs = [], [] + for gaussian_index, gaussian_extra in enumerate(v[2:]): + if gaussian_index % 2: + gauss_coeffs.append(float(gaussian_extra)) + else: + gauss_alphas.append(float(gaussian_extra)) + sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_alphas = np.array(gauss_alphas) / ureg.bohr ** 2 + if gauss_coeffs: + sec_basis_func.x_fhi_aims_controlIn_basis_func_gauss_coeffs = gauss_coeffs + else: + sec_basis_func.x_fhi_aims_controlIn_basis_func_n = int(v[0]) + sec_basis_func.x_fhi_aims_controlIn_basis_func_l = str(v[1]) + if len(v) == 3 and hasattr(v[2], 'real'): + sec_basis_func.x_fhi_aims_controlIn_basis_func_radius = v[2] elif key in ['cut_pot', 'radial_base']: setattr(sec_basis_set, 'x_fhi_aims_controlIn_%s' % key, np.array( val[0], dtype=float)) else: try: - setattr(sec_basis_set, 'x_fhi_aims_controlIn_%s' % key, val[0]) + setattr(sec_basis_set, 'x_fhi_aims_controlIn_%s' % key, v[0]) except Exception: self.logger.warning('Error setting controlIn metainfo.', details={key: key}) @@ -1572,6 +1582,9 @@ def parse_basis_set(species): sec_basis_set.x_fhi_aims_controlIn_number_of_basis_func = len(division) sec_basis_set.x_fhi_aims_controlIn_division = division + # store hash + sec_basis_set.x_fhi_aims_controlIn_hash = hash_section([sec_basis_set], [True]) + def _get_elemental_tier( basis_settings: x_fhi_aims_section_controlIn_basis_set, reference: dict = self._native_tier_references) -> tuple[Any, Any]: @@ -1746,26 +1759,21 @@ def parse_topology(self): def parse_atom_type(species): sec_atom_type = sec_method.m_create(AtomParameters) - sec_atom_species = sec_atom_type.m_create( - x_fhi_aims_section_controlInOut_atom_species) + param_index = len(sec_method.atom_parameters) - 1 + sec_atom_type.x_fhi_aims_section_controlIn_basis_set = sec_method.x_fhi_aims_section_controlIn_basis_set[param_index] for key, val in species.items(): if key == 'nuclear charge': - charge = val[0] * ureg.elementary_charge - sec_atom_type.charge = charge - sec_atom_species.x_fhi_aims_controlInOut_species_charge = charge + sec_atom_type.charge = val[0] * ureg.elementary_charge elif key == 'atomic mass': - mass = val[0][0] * ureg.amu - sec_atom_type.mass = mass - sec_atom_species.x_fhi_aims_controlInOut_species_mass = mass + sec_atom_type.mass = val[0][0] * ureg.amu elif key == 'species': sec_atom_type.label = val - sec_atom_species.x_fhi_aims_controlInOut_species_name = val elif 'request to include pure gaussian fns' in key: - sec_atom_species.x_fhi_aims_controlInOut_pure_gaussian = val[0] + sec_atom_type.x_fhi_aims_controlInOut_pure_gaussian = val[0] elif 'cutoff potl' in key: - sec_atom_species.x_fhi_aims_controlInOut_species_cut_pot = val[0][0] * ureg.angstrom - sec_atom_species.x_fhi_aims_controlInOut_species_cut_pot_width = val[0][1] * ureg.angstrom - sec_atom_species.x_fhi_aims_controlInOut_species_cut_pot_scale = val[0][2] + sec_atom_type.x_fhi_aims_controlInOut_species_cut_pot = val[0][0] * ureg.angstrom + sec_atom_type.x_fhi_aims_controlInOut_species_cut_pot_width = val[0][1] * ureg.angstrom + sec_atom_type.x_fhi_aims_controlInOut_species_cut_pot_scale = val[0][2] elif "request for '+U'" in key: sec_hubbard = sec_atom_type.m_create(HubbardKanamoriModel) sec_hubbard.orbital = f'{val[0][0]}{val[0][1]}' @@ -1773,52 +1781,11 @@ def parse_atom_type(species): sec_hubbard.double_counting_correction = 'Dudarev' sec_hubbard.x_fhi_aims_projection_type = 'Mulliken (dual)' sec_hubbard.x_fhi_aims_petukhov_mixing_factor = self.out_parser.get('petukhov') - elif 'free-atom' in key or 'free-ion' in key: - for i in range(len(val)): - sec_basis_func = sec_atom_species.m_create( - x_fhi_aims_section_controlInOut_basis_func) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_type = ' '.join(key.split()[:-1]) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_n = val[i][0] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_l = val[i][1] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_occ = val[i][2] - elif 'hydrogenic' in key: - for i in range(len(val)): - sec_basis_func = sec_atom_species.m_create( - x_fhi_aims_section_controlInOut_basis_func) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_type = ' '.join(key.split()[:-1]) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_n = val[i][0] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_l = val[i][1] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_eff_charge = val[i][2] - elif 'ionic' in key: - for i in range(len(val)): - sec_basis_func = sec_atom_species.m_create( - x_fhi_aims_section_controlInOut_basis_func) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_type = 'ionic basis' - sec_basis_func.x_fhi_aims_controlInOut_basis_func_n = val[i][0] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_l = val[i][1] - elif 'basis function' in key: - for i in range(len(val)): - sec_basis_func = sec_atom_species.m_create( - x_fhi_aims_section_controlInOut_basis_func) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_type = key.split( - 'basis')[0].strip() - if val[i][0] == 'L': - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_l = val[i][2] - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_N = val[i][3] - alpha = [val[i][j + 2] for j in range(len(val[i])) if val[i][j] == 'alpha'] - weight = [val[i][j + 2] for j in range(len(val[i])) if val[i][j] == 'weight'] - alpha = np.array(alpha) * (1 / ureg.angstrom ** 2) - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_alpha = alpha - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_weight = weight - elif len(val[i]) == 2: - sec_basis_func.x_fhi_aims_controlInOut_basis_func_gauss_l = val[i][0] - alpha = np.array(val[i][1]) / ureg.angstrom ** 2 - sec_basis_func.x_fhi_aims_controlInOut_basis_func_primitive_gauss_alpha = alpha + # From legacy versions we know that 'free-atom' or 'free-ion' are connected to 'occ' + # and 'hydrogenic' to 'eff_charge'. Nothing for 'ionic' # add inout parameters read from main output - # species - species = self.out_parser.get('control_inout', {}).get('species') - if species is not None: + if (species := self.out_parser.get('control_inout', {}).get('species')) is not None: for specie in species: parse_atom_type(specie) diff --git a/electronicparsers/utils/__init__.py b/electronicparsers/utils/__init__.py index ebeeb1e3..ee10a158 100644 --- a/electronicparsers/utils/__init__.py +++ b/electronicparsers/utils/__init__.py @@ -17,5 +17,5 @@ # limitations under the License. from .utils import ( - extract_section, get_files, BeyondDFTWorkflowsParser + extract_section, get_files, BeyondDFTWorkflowsParser, hash_section ) diff --git a/electronicparsers/utils/utils.py b/electronicparsers/utils/utils.py index 066816e4..88a73b0f 100644 --- a/electronicparsers/utils/utils.py +++ b/electronicparsers/utils/utils.py @@ -17,9 +17,12 @@ # limitations under the License. # +import json import os from glob import glob +from nomad.metainfo import MSection +from nomad.metainfo.util import MSubSectionList from nomad.datamodel import EntryArchive from nomad.datamodel.metainfo.simulation.run import Run from nomad.datamodel.metainfo.workflow import Link, TaskReference @@ -28,6 +31,8 @@ ParticleHoleExcitationsMethod, ParticleHoleExcitationsResults, PhotonPolarization, PhotonPolarizationMethod, PhotonPolarizationResults ) +from nomad.utils import hash +from typing import Union def extract_section(source: EntryArchive, path: str): @@ -84,6 +89,54 @@ def get_files(pattern: str, filepath: str, stripname: str = '', deep: bool = Tru return filenames +def hash_section( + sections: Union[MSection, list[MSection]], + subsections: Union[bool, list[bool]], + **kwargs, + ) -> str: + ''' + General function for converting basis set sections to a hash for comparison. + Basis sets may contain element-specific settings, which typically are tackled separately. + The option consists of adding also general settings to the hash. + + There are two modes determining whether sections are defined by the `quantities` provided (`inclusion`) + or rather `quantities` are explicitly removed (`exclusion`). + + `sections`: sections to be hashed together + `subsections`: list of bools, indicating whether to include subsections. Must be of same length as basis_settings. + `mode`: str, either `include` or `exclude` (default) + `quantities`: list of str, quantities to be included or excluded + ''' + sections = [sections] if isinstance(sections, MSection) else sections + subsections = [subsections] if isinstance(subsections, bool) else subsections + mode: str = kwargs.get('mode', 'exclude') + quantities: list[str] = kwargs.get('quantities', []) + # sanity checks + try: + evaluation_settings = zip(sections, subsections) + except Exception: # TODO: specify exception + raise ValueError( + f'''basis_settings:{sections} and subsections:{subsections} + must be of same length.''' + ) + # filter out subsections + to_compare: list[dict[str, any]] = [] + for section, subsection_bool in evaluation_settings: + section_dict = section.m_to_dict() + to_write = {} + for key, val in section_dict.items(): + if not subsection_bool and\ + isinstance(getattr(section, key), (MSection, MSubSectionList)): + continue + if key == 'm_def' or\ + (mode == 'exclude' and key not in quantities) or\ + (mode == 'include' and key in quantities): + to_write[key] = val + to_compare.append(to_write) + # hash the filtered sections + return hash(*to_compare) + + class BeyondDFTWorkflowsParser: ''' Generates automatic beyondDFT (GW, BSE, DMFT) workflows. Main classes for parsers will