From bcdf771a091d665dca61dfba8fb27667ba35f71b Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Thu, 20 Jun 2024 18:22:50 +0200 Subject: [PATCH 1/7] Fix hyphen elements among frequency list --- electronicparsers/gaussian/parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/electronicparsers/gaussian/parser.py b/electronicparsers/gaussian/parser.py index 3a0c36b1..5cfb410b 100644 --- a/electronicparsers/gaussian/parser.py +++ b/electronicparsers/gaussian/parser.py @@ -391,7 +391,7 @@ def str_to_force_constants(val_in): unit='debye * angstrom**3', ), Quantity( - 'frequencies', r'\n *Frequencies \-\-\s*(.+)', dtype=float, repeats=True + 'frequencies', r'\n *Frequencies \-+\s*(.+)', dtype=float, repeats=True ), Quantity( 'reduced_masses', From 235187b9ad900ff6513ef31c86895a63ce5b7803 Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Thu, 20 Jun 2024 18:50:15 +0200 Subject: [PATCH 2/7] Update frequencies and reduced mass regex to capture only the old format --- electronicparsers/gaussian/parser.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/electronicparsers/gaussian/parser.py b/electronicparsers/gaussian/parser.py index 5cfb410b..24b09977 100644 --- a/electronicparsers/gaussian/parser.py +++ b/electronicparsers/gaussian/parser.py @@ -391,11 +391,14 @@ def str_to_force_constants(val_in): unit='debye * angstrom**3', ), Quantity( - 'frequencies', r'\n *Frequencies \-+\s*(.+)', dtype=float, repeats=True + 'frequencies', + r'\n *Frequencies [\-]{2}\s+(.+)', + dtype=float, + repeats=True, ), Quantity( 'reduced_masses', - r'\n *Red\. masses \-\-\s*(.+)', + r'\n *Red\. masses [\-]{2}\s+(.+)', str_operation=lambda x: [float(v) for v in x.split()], repeats=True, ), From ed77c2b51705869264dc81971d92caf2ee6431b0 Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Thu, 20 Jun 2024 18:53:32 +0200 Subject: [PATCH 3/7] Add note for future devs --- electronicparsers/gaussian/parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/electronicparsers/gaussian/parser.py b/electronicparsers/gaussian/parser.py index 24b09977..f33cbd83 100644 --- a/electronicparsers/gaussian/parser.py +++ b/electronicparsers/gaussian/parser.py @@ -395,13 +395,13 @@ def str_to_force_constants(val_in): r'\n *Frequencies [\-]{2}\s+(.+)', dtype=float, repeats=True, - ), + ), # note the mandatory space after the '--'. Use nested strategy if space is optional Quantity( 'reduced_masses', r'\n *Red\. masses [\-]{2}\s+(.+)', str_operation=lambda x: [float(v) for v in x.split()], repeats=True, - ), + ), # note the mandatory space after the '--'. Use nested strategy if space is optional Quantity( 'normal_modes', r'Atom\s*AN.*\s*([\-\d\s\.]+)', From fb4d18894cc582927bdab8a1916b39462c2d6015 Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Thu, 20 Jun 2024 20:17:25 +0200 Subject: [PATCH 4/7] - Add dynamic extraction of units - Add units to Gaussian schema - Remove double Gaussian frequency quantities --- .../gaussian/metainfo/gaussian.py | 32 ++----------- electronicparsers/gaussian/parser.py | 46 +++++++++++++++---- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/electronicparsers/gaussian/metainfo/gaussian.py b/electronicparsers/gaussian/metainfo/gaussian.py index 2364cbd3..2f1c52d5 100644 --- a/electronicparsers/gaussian/metainfo/gaussian.py +++ b/electronicparsers/gaussian/metainfo/gaussian.py @@ -603,45 +603,23 @@ class x_gaussian_section_frequencies(MSection): validate=False, ) - x_gaussian_frequency_values = Quantity( - type=str, - shape=['number_of_frequency_rows'], - description=""" - values of frequencies, in cm-1 - """, - ) - x_gaussian_frequencies = Quantity( type=np.float64, + unit='1/m', shape=['number_of_frequencies'], description=""" - values of frequencies, in cm-1 + values of frequencies """, - ) - - x_gaussian_reduced_masses = Quantity( - type=np.float64, - shape=['number_of_reduced_masses_rows'], - description=""" - values of normal mode reduced masses - """, - ) + ) # only store the '--' header, not '---' x_gaussian_red_masses = Quantity( type=np.float64, + unit='kg', shape=['number_of_frequencies'], description=""" values of normal mode reduced masses """, - ) - - x_gaussian_normal_modes = Quantity( - type=str, - shape=['number_of_normal_modes_rows'], - description=""" - normal mode vectors - """, - ) + ) # only store the '--' header, not '---' x_gaussian_normal_mode_values = Quantity( type=np.float64, diff --git a/electronicparsers/gaussian/parser.py b/electronicparsers/gaussian/parser.py index f33cbd83..dbe797b3 100644 --- a/electronicparsers/gaussian/parser.py +++ b/electronicparsers/gaussian/parser.py @@ -100,6 +100,25 @@ def str_to_force_constants(val_in): fc = fc + fc.T - np.diag(fc.diagonal()) return fc + def str_to_units(unit: str): + """Map native Gaussian units to pint units. + Assumes lower case string input.""" # TODO handle compound units recursively + unit = unit.lower() + if unit == 'cm**-1': + return ureg.cm_1 + elif unit == 'ghz': + return ureg.gigahertz + elif unit == 'kcal/mol': + return ureg.kilocalorie / ureg.mole + elif unit == 'kj/mol': + return ureg.kilojoule / ureg.mole + elif unit == 'j': + return ureg.joule + elif unit == 'amu': + return ureg.amu + else: + raise ValueError(f'Unknown unit {unit}') + orientation_quantities = [ Quantity( 'standard_orientation', @@ -390,6 +409,16 @@ def str_to_force_constants(val_in): dtype=float, unit='debye * angstrom**3', ), + Quantity( + 'frequency_unit', + r'[Hh]armonic frequencies \((\S+)\)', + str_operation=str_to_units, + ), + Quantity( + 'reduced_mass_unit', + r'reduced masses \((\S+)\)', + str_operation=str_to_units, + ), Quantity( 'frequencies', r'\n *Frequencies [\-]{2}\s+(.+)', @@ -1109,18 +1138,19 @@ def parse_energy_corrections(method, iteration=False): # vibrational frequencies frequencies = section.get('frequencies') if frequencies is not None: - # frequencies in old parsers are in J, not consistent with metainfo sec_frequencies = x_gaussian_section_frequencies() sec_run.x_gaussian_section_frequencies.append(sec_frequencies) - sec_frequencies.x_gaussian_frequencies = np.hstack(frequencies) + + sec_frequencies.x_gaussian_frequencies = np.hstack( + frequencies + ) * section.get('frequency_unit', ureg.cm_1) + reduced_masses = section.get('reduced_masses') if reduced_masses is not None: - reduced_masses = ( - np.array(np.hstack(reduced_masses), dtype=np.float64) * ureg.amu - ) - sec_frequencies.x_gaussian_red_masses = reduced_masses.to( - 'kg' - ).magnitude + sec_frequencies.x_gaussian_red_masses = np.hstack( + reduced_masses + ) * section.get('reduced_mass_unit', ureg.amu) + normal_modes = section.get('normal_modes') if normal_modes is not None: normal_modes = np.hstack(normal_modes) From 69aace1c848f8aebf406cdb0cc8fa329fbfad616 Mon Sep 17 00:00:00 2001 From: Nathan Daelman Date: Fri, 21 Jun 2024 10:46:17 +0200 Subject: [PATCH 5/7] Apply reviewer feedback: use `dict` instead of `if-else` in `str_to_units` --- electronicparsers/gaussian/parser.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/electronicparsers/gaussian/parser.py b/electronicparsers/gaussian/parser.py index dbe797b3..fde77a09 100644 --- a/electronicparsers/gaussian/parser.py +++ b/electronicparsers/gaussian/parser.py @@ -100,23 +100,21 @@ def str_to_force_constants(val_in): fc = fc + fc.T - np.diag(fc.diagonal()) return fc - def str_to_units(unit: str): + def str_to_units(unit: str) -> ureg.Unit: """Map native Gaussian units to pint units. Assumes lower case string input.""" # TODO handle compound units recursively + unit_map: dict[str, ureg.Unit] = { + 'cm**-1': ureg.cm_1, + 'ghz': ureg.gigahertz, + 'kcal/mol': ureg.kilocalorie / ureg.mole, + 'kj/mol': ureg.kilojoule / ureg.mole, + 'j': ureg.joule, + 'amu': ureg.amu, + } unit = unit.lower() - if unit == 'cm**-1': - return ureg.cm_1 - elif unit == 'ghz': - return ureg.gigahertz - elif unit == 'kcal/mol': - return ureg.kilocalorie / ureg.mole - elif unit == 'kj/mol': - return ureg.kilojoule / ureg.mole - elif unit == 'j': - return ureg.joule - elif unit == 'amu': - return ureg.amu - else: + try: + return unit_map[unit] + except KeyError: raise ValueError(f'Unknown unit {unit}') orientation_quantities = [ From bab90b8d0286fd736aadfc050c2006eda597237c Mon Sep 17 00:00:00 2001 From: Nathan Daelman Date: Fri, 21 Jun 2024 10:53:20 +0200 Subject: [PATCH 6/7] First extension of vibrational quantity extraction --- electronicparsers/gaussian/metainfo/gaussian.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/electronicparsers/gaussian/metainfo/gaussian.py b/electronicparsers/gaussian/metainfo/gaussian.py index 2f1c52d5..b70579e1 100644 --- a/electronicparsers/gaussian/metainfo/gaussian.py +++ b/electronicparsers/gaussian/metainfo/gaussian.py @@ -629,6 +629,22 @@ class x_gaussian_section_frequencies(MSection): """, ) + x_gaussian_ir_intensities = Quantity( + type=np.float64, + shape=['number_of_frequencies'], + description=""" + infra-red intensities + """, + ) + + x_gaussian_harmonic_force_constants = Quantity( + type=np.float64, + shape=['number_of_atoms', 'number_of_atoms', 'number_of_frequencies'], + description=""" + values of harmonic force constants + """, + ) + class x_gaussian_section_thermochem(MSection): """ From 875ec13a101843d2218fa13871535a4af574114b Mon Sep 17 00:00:00 2001 From: "nathan.daelman@physik.hu-berlin.de" Date: Fri, 21 Jun 2024 18:46:45 +0200 Subject: [PATCH 7/7] - Add IR intesities and harmonic force constants (+ their dynamic units) - Clean up frequency section regexes --- .../gaussian/metainfo/gaussian.py | 12 ++--- electronicparsers/gaussian/parser.py | 44 +++++++++++++++++-- 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/electronicparsers/gaussian/metainfo/gaussian.py b/electronicparsers/gaussian/metainfo/gaussian.py index b70579e1..858865a6 100644 --- a/electronicparsers/gaussian/metainfo/gaussian.py +++ b/electronicparsers/gaussian/metainfo/gaussian.py @@ -629,19 +629,21 @@ class x_gaussian_section_frequencies(MSection): """, ) - x_gaussian_ir_intensities = Quantity( + x_gaussian_harmonic_force_constants = Quantity( type=np.float64, + unit='newton/meter', shape=['number_of_frequencies'], description=""" - infra-red intensities + values of harmonic force constants """, ) - x_gaussian_harmonic_force_constants = Quantity( + x_gaussian_ir_intensities = Quantity( type=np.float64, - shape=['number_of_atoms', 'number_of_atoms', 'number_of_frequencies'], + unit='meter/mol', + shape=['number_of_frequencies'], description=""" - values of harmonic force constants + infra-red intensities, integrated over their path length """, ) diff --git a/electronicparsers/gaussian/parser.py b/electronicparsers/gaussian/parser.py index fde77a09..269bdd0b 100644 --- a/electronicparsers/gaussian/parser.py +++ b/electronicparsers/gaussian/parser.py @@ -110,6 +110,8 @@ def str_to_units(unit: str) -> ureg.Unit: 'kj/mol': ureg.kilojoule / ureg.mole, 'j': ureg.joule, 'amu': ureg.amu, + 'km/mol': ureg.kilometer / ureg.mole, + 'mDyne/A': ureg.millidyne / ureg.angstrom, } unit = unit.lower() try: @@ -417,16 +419,38 @@ def str_to_units(unit: str) -> ureg.Unit: r'reduced masses \((\S+)\)', str_operation=str_to_units, ), + Quantity( + 'harmonic_force_constant_unit', + r'force constants \((\S+)\)', + str_operation=str_to_units, + ), + Quantity( + 'ir_intensity_unit', + r'IR intensities \((\S+)\)', + str_operation=str_to_units, + ), Quantity( 'frequencies', - r'\n *Frequencies [\-]{2}\s+(.+)', - dtype=float, + r'Frequencies\s[\-]{2}\s+(.+)', + dtype=np.float64, repeats=True, ), # note the mandatory space after the '--'. Use nested strategy if space is optional Quantity( 'reduced_masses', - r'\n *Red\. masses [\-]{2}\s+(.+)', - str_operation=lambda x: [float(v) for v in x.split()], + r'Red\. masses\s[\-]{2}\s+(.+)', + dtype=np.float64, + repeats=True, + ), # note the mandatory space after the '--'. Use nested strategy if space is optional + Quantity( + 'harmonic_force_constants', + r'Frc consts[\s]{2}[\-]{2}\s+(.+)', + dtype=np.float64, + repeats=True, + ), # note the mandatory space after the '--'. Use nested strategy if space is optional + Quantity( + 'ir_intensities', + r'IR Inten[\s]{4}[\-]{2}\s+(.+)', + dtype=np.float64, repeats=True, ), # note the mandatory space after the '--'. Use nested strategy if space is optional Quantity( @@ -1149,6 +1173,18 @@ def parse_energy_corrections(method, iteration=False): reduced_masses ) * section.get('reduced_mass_unit', ureg.amu) + intensities = section.get('ir_intensities') + if intensities is not None: + sec_frequencies.x_gaussian_ir_intensities = np.hstack( + intensities + ) * section.get('intensity_unit', ureg.kilometer / ureg.mole) + + force_constants = section.get('harmonic_force_constants') + if force_constants is not None: + sec_frequencies.x_gaussian_harmonic_force_constants = np.hstack( + force_constants + ) * section.get('force_constant_unit', ureg.mdyne / ureg.angstrom) + normal_modes = section.get('normal_modes') if normal_modes is not None: normal_modes = np.hstack(normal_modes)