Skip to content

Commit

Permalink
Update frequency parsing in Gaussian (#232)
Browse files Browse the repository at this point in the history
* Fix hyphen elements among frequency list

* Update frequencies and reduced mass regex to capture only the old format

* Add note for future devs

* Add dynamic extraction of units

* Add units to Gaussian schema

* Remove double Gaussian frequency quantities

* Add IR intesities and harmonic force constants

---------

Co-authored-by: [email protected] <[email protected]>
Co-authored-by: Nathan Daelman <ndaelman.physik.hu-berlin.de>
  • Loading branch information
ndaelman-hu and [email protected] authored Jun 21, 2024
1 parent b3c3d74 commit 928e933
Show file tree
Hide file tree
Showing 2 changed files with 98 additions and 35 deletions.
42 changes: 19 additions & 23 deletions electronicparsers/gaussian/metainfo/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -603,51 +603,47 @@ class x_gaussian_section_frequencies(MSection):
validate=False,
)

x_gaussian_frequency_values = Quantity(
type=str,
shape=['number_of_frequency_rows'],
description="""
values of frequencies, in cm-1
""",
)

x_gaussian_frequencies = Quantity(
type=np.float64,
unit='1/m',
shape=['number_of_frequencies'],
description="""
values of frequencies, in cm-1
values of frequencies
""",
)
) # only store the '--' header, not '---'

x_gaussian_reduced_masses = Quantity(
x_gaussian_red_masses = Quantity(
type=np.float64,
shape=['number_of_reduced_masses_rows'],
unit='kg',
shape=['number_of_frequencies'],
description="""
values of normal mode reduced masses
""",
)
) # only store the '--' header, not '---'

x_gaussian_red_masses = Quantity(
x_gaussian_normal_mode_values = Quantity(
type=np.float64,
shape=['number_of_frequencies'],
shape=['number_of_frequencies', 'number_of_atoms', 3],
description="""
values of normal mode reduced masses
normal mode vectors
""",
)

x_gaussian_normal_modes = Quantity(
type=str,
shape=['number_of_normal_modes_rows'],
x_gaussian_harmonic_force_constants = Quantity(
type=np.float64,
unit='newton/meter',
shape=['number_of_frequencies'],
description="""
normal mode vectors
values of harmonic force constants
""",
)

x_gaussian_normal_mode_values = Quantity(
x_gaussian_ir_intensities = Quantity(
type=np.float64,
shape=['number_of_frequencies', 'number_of_atoms', 3],
unit='meter/mol',
shape=['number_of_frequencies'],
description="""
normal mode vectors
infra-red intensities, integrated over their path length
""",
)

Expand Down
91 changes: 79 additions & 12 deletions electronicparsers/gaussian/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,25 @@ def str_to_force_constants(val_in):
fc = fc + fc.T - np.diag(fc.diagonal())
return fc

def str_to_units(unit: str) -> ureg.Unit:
"""Map native Gaussian units to pint units.
Assumes lower case string input.""" # TODO handle compound units recursively
unit_map: dict[str, ureg.Unit] = {
'cm**-1': ureg.cm_1,
'ghz': ureg.gigahertz,
'kcal/mol': ureg.kilocalorie / ureg.mole,
'kj/mol': ureg.kilojoule / ureg.mole,
'j': ureg.joule,
'amu': ureg.amu,
'km/mol': ureg.kilometer / ureg.mole,
'mDyne/A': ureg.millidyne / ureg.angstrom,
}
unit = unit.lower()
try:
return unit_map[unit]
except KeyError:
raise ValueError(f'Unknown unit {unit}')

orientation_quantities = [
Quantity(
'standard_orientation',
Expand Down Expand Up @@ -391,14 +410,49 @@ def str_to_force_constants(val_in):
unit='debye * angstrom**3',
),
Quantity(
'frequencies', r'\n *Frequencies \-\-\s*(.+)', dtype=float, repeats=True
'frequency_unit',
r'[Hh]armonic frequencies \((\S+)\)',
str_operation=str_to_units,
),
Quantity(
'reduced_mass_unit',
r'reduced masses \((\S+)\)',
str_operation=str_to_units,
),
Quantity(
'harmonic_force_constant_unit',
r'force constants \((\S+)\)',
str_operation=str_to_units,
),
Quantity(
'ir_intensity_unit',
r'IR intensities \((\S+)\)',
str_operation=str_to_units,
),
Quantity(
'frequencies',
r'Frequencies\s[\-]{2}\s+(.+)',
dtype=np.float64,
repeats=True,
), # note the mandatory space after the '--'. Use nested strategy if space is optional
Quantity(
'reduced_masses',
r'\n *Red\. masses \-\-\s*(.+)',
str_operation=lambda x: [float(v) for v in x.split()],
r'Red\. masses\s[\-]{2}\s+(.+)',
dtype=np.float64,
repeats=True,
),
), # note the mandatory space after the '--'. Use nested strategy if space is optional
Quantity(
'harmonic_force_constants',
r'Frc consts[\s]{2}[\-]{2}\s+(.+)',
dtype=np.float64,
repeats=True,
), # note the mandatory space after the '--'. Use nested strategy if space is optional
Quantity(
'ir_intensities',
r'IR Inten[\s]{4}[\-]{2}\s+(.+)',
dtype=np.float64,
repeats=True,
), # note the mandatory space after the '--'. Use nested strategy if space is optional
Quantity(
'normal_modes',
r'Atom\s*AN.*\s*([\-\d\s\.]+)',
Expand Down Expand Up @@ -1106,18 +1160,31 @@ def parse_energy_corrections(method, iteration=False):
# vibrational frequencies
frequencies = section.get('frequencies')
if frequencies is not None:
# frequencies in old parsers are in J, not consistent with metainfo
sec_frequencies = x_gaussian_section_frequencies()
sec_run.x_gaussian_section_frequencies.append(sec_frequencies)
sec_frequencies.x_gaussian_frequencies = np.hstack(frequencies)

sec_frequencies.x_gaussian_frequencies = np.hstack(
frequencies
) * section.get('frequency_unit', ureg.cm_1)

reduced_masses = section.get('reduced_masses')
if reduced_masses is not None:
reduced_masses = (
np.array(np.hstack(reduced_masses), dtype=np.float64) * ureg.amu
)
sec_frequencies.x_gaussian_red_masses = reduced_masses.to(
'kg'
).magnitude
sec_frequencies.x_gaussian_red_masses = np.hstack(
reduced_masses
) * section.get('reduced_mass_unit', ureg.amu)

intensities = section.get('ir_intensities')
if intensities is not None:
sec_frequencies.x_gaussian_ir_intensities = np.hstack(
intensities
) * section.get('intensity_unit', ureg.kilometer / ureg.mole)

force_constants = section.get('harmonic_force_constants')
if force_constants is not None:
sec_frequencies.x_gaussian_harmonic_force_constants = np.hstack(
force_constants
) * section.get('force_constant_unit', ureg.mdyne / ureg.angstrom)

normal_modes = section.get('normal_modes')
if normal_modes is not None:
normal_modes = np.hstack(normal_modes)
Expand Down

0 comments on commit 928e933

Please sign in to comment.