Source code for src.initial_geom
#!/usr/bin/env python3
"""
The FinalGeometry class searches for and stores the initial (i.e., input by the
user) geometry data from an ORCA .out file.
"""
__author__ = "Peter Waddell"
__copyright__ = "Copyright 2024"
__credits__ = ["Peter Waddell"]
__version__ = "0.1.0"
__date__ = "2024/02/26"
__maintainer__ = "Peter Waddell"
__email__ = "pmwaddell9@gmail.com"
__status__ = "Prototype"
import re
from orca_data_extraction.src.data_section_with_inputs import DataSectionWithInputs
[docs]
class InitialGeometry(DataSectionWithInputs):
"""
Finds and stores initial geometry data from an ORCA .out file.
Methods
-------
_search
Search the .out file for bond length data.
"""
def __init__(self, out_filename, outfile_contents, inputs):
"""
Parameters
----------
out_filename : str
Name of the ORCA .out file that will be searched.
outfile_contents : str
String containing the full text of the ORCA .out file.
inputs : tuple
Tuple of atom labels (e.g. '2 H') for which input geometry data
will be searched.
"""
super().__init__(out_filename, outfile_contents, inputs)
self._section_name = 'Initial Geometry'
def _search(self, atom_label):
"""
Use regex to search .out file for an atom's initial geom. coordinates.
Meaning, the coordinates that the user input for the calculation. In
the case of single point calculations, these coordinates will be the
same as the final coordinates, but for geometry optimizations they
should be different.
Parameters
----------
atom_label : str
String of the desired atom label.
Returns
-------
dict
A dictionary containing 'x', 'y' and 'z' as keys and the
corresponding initial geometry coordinates (in Å) for the atom
label as values (as strings).
Raises
------
AttributeError
This occurs when the regex fails to find what it is looking
for, and returns NoneType. Then, .group(n) gives this error.
"""
def __convert_str_for_verbose_regex(s):
"""
Converts string to a form that works properly for verbose REs.
Verbose regular expressions ignore whitespace, unless preceded by a
"\" (backslash) character. To use such a string as part of a
verbose RE, this character must be added before each space first;
this function returns a version of the input string modified
in this way.
Parameters
----------
s: str
Input string.
Returns
-------
result: str
A modified version of the input string which now has "\"
preceding each whitespace character.
"""
result = ''
for i in range(len(s)):
if s[i] == ' ':
result = result + r'\ '
else:
result = result + s[i]
return result
def __convert_au_to_angstrom(x):
"""
Converts a value from AU to Ångstroms.
Parameters
----------
x
Value in AU to be converted.
Returns
-------
float
Value of x in Ångstroms.
"""
x = float(x)
return round(x * 0.529177, 5)
re_atom_label = __convert_str_for_verbose_regex(atom_label)
# Here I am assuming that the first occurrence of geometry data in the
# .out file will represent the coordinates which were input by the
# user, for either geometry optimizations or single point calculations.
regex_geom_opt = re.compile(
fr"""
(CARTESIAN\ COORDINATES\ \(A.U.\))
(.*?) # all text until data begins
((\ |\n){re_atom_label})
(\ *) # whitespace
(-?[\d]+[.][\d]+) # ZA, unwanted information here
(\ *)
([\d]+) # FRAG, unwanted information here
(\ *)
(-?[\d]+[.][\d]+) # MASS, unwanted information here
(\ *)
(-?[\d]+[.][\d]+) # X coordinate
(\ *)
(-?[\d]+[.][\d]+) # Y coordinate
(\ *)
(-?[\d]+[.][\d]+) # Z coordinate
(.*?)
(INTERNAL\ COORDINATES)
# above: prevents accidental matches later in the .out file
""",
flags=re.VERBOSE | re.DOTALL
)
try:
result = regex_geom_opt.search(self._outfile_contents)
x = __convert_au_to_angstrom(result.group(12))
y = __convert_au_to_angstrom(result.group(14))
z = __convert_au_to_angstrom(result.group(16))
return {'x': str(x), 'y': str(y), 'z': str(z)}
except AttributeError:
print(f'Error: {atom_label} was not found'
f' in {self._out_filename} (Initial Geometry).')
return {'x': None,
'y': None,
'z': None}