Source code for src.initial_geom

#!/usr/bin/env python3
"""
The FinalGeometry class searches for and stores the initial (i.e., input by the
user) geometry data from an ORCA .out file.
"""
__author__ = "Peter Waddell"
__copyright__ = "Copyright 2024"
__credits__ = ["Peter Waddell"]
__version__ = "0.1.0"
__date__ = "2024/02/26"
__maintainer__ = "Peter Waddell"
__email__ = "pmwaddell9@gmail.com"
__status__ = "Prototype"

import re

from orca_data_extraction.src.data_section_with_inputs import DataSectionWithInputs



[docs]
class InitialGeometry(DataSectionWithInputs):
    """
    Finds and stores initial geometry data from an ORCA .out file.

    Methods
    -------
    _search
        Search the .out file for bond length data.
    """
    def __init__(self, out_filename, outfile_contents, inputs):
        """
        Parameters
        ----------
        out_filename : str
            Name of the ORCA .out file that will be searched.
        outfile_contents : str
            String containing the full text of the ORCA .out file.
        inputs : tuple
            Tuple of atom labels (e.g. '2 H') for which input geometry data
            will be searched.
        """
        super().__init__(out_filename, outfile_contents, inputs)
        self._section_name = 'Initial Geometry'

    def _search(self, atom_label):
        """
        Use regex to search .out file for an atom's initial geom. coordinates.

        Meaning, the coordinates that the user input for the calculation. In
        the case of single point calculations, these coordinates will be the
        same as the final coordinates, but for geometry optimizations they
        should be different.

        Parameters
        ----------
        atom_label : str
            String of the desired atom label.

        Returns
        -------
        dict
            A dictionary containing 'x', 'y' and 'z' as keys and the
            corresponding initial geometry coordinates (in Å) for the atom
            label as values (as strings).

        Raises
        ------
        AttributeError
            This occurs when the regex fails to find what it is looking
            for, and returns NoneType. Then, .group(n) gives this error.
        """
        def __convert_str_for_verbose_regex(s):
            """
            Converts string to a form that works properly for verbose REs.

            Verbose regular expressions ignore whitespace, unless preceded by a
            "\" (backslash) character. To use such a string as part of a
            verbose RE, this character must be added before each space first;
            this function returns a version of the input string modified
            in this way.

            Parameters
            ----------
            s: str
                Input string.

            Returns
            -------
            result: str
                A modified version of the input string which now has "\"
                preceding each whitespace character.
            """
            result = ''
            for i in range(len(s)):
                if s[i] == ' ':
                    result = result + r'\ '
                else:
                    result = result + s[i]
            return result

        def __convert_au_to_angstrom(x):
            """
            Converts a value from AU to Ångstroms.

            Parameters
            ----------
            x
                Value in AU to be converted.

            Returns
            -------
            float
                Value of x in Ångstroms.
            """
            x = float(x)
            return round(x * 0.529177, 5)

        re_atom_label = __convert_str_for_verbose_regex(atom_label)
        # Here I am assuming that the first occurrence of geometry data in the
        # .out file will represent the coordinates which were input by the
        # user, for either geometry optimizations or single point calculations.
        regex_geom_opt = re.compile(
            fr"""
            (CARTESIAN\ COORDINATES\ \(A.U.\))
            (.*?)               # all text until data begins
            ((\ |\n){re_atom_label})
            (\ *)               # whitespace
            (-?[\d]+[.][\d]+)   # ZA, unwanted information here
            (\ *)
            ([\d]+)             # FRAG, unwanted information here
            (\ *)
            (-?[\d]+[.][\d]+)   # MASS, unwanted information here
            (\ *)
            (-?[\d]+[.][\d]+)   # X coordinate
            (\ *)
            (-?[\d]+[.][\d]+)   # Y coordinate
            (\ *)
            (-?[\d]+[.][\d]+)   # Z coordinate
            (.*?)
            (INTERNAL\ COORDINATES)
            # above: prevents accidental matches later in the .out file
            """,
            flags=re.VERBOSE | re.DOTALL
        )
        try:
            result = regex_geom_opt.search(self._outfile_contents)
            x = __convert_au_to_angstrom(result.group(12))
            y = __convert_au_to_angstrom(result.group(14))
            z = __convert_au_to_angstrom(result.group(16))
            return {'x': str(x), 'y': str(y), 'z': str(z)}
        except AttributeError:
            print(f'Error: {atom_label} was not found'
                  f' in {self._out_filename} (Initial Geometry).')
            return {'x': None,
                    'y': None,
                    'z': None}
Source code for src.initial_geom

orca-data-extraction

Navigation

Related Topics