Source code for src.final_geom
#!/usr/bin/env python3
"""
The FinalGeometry class searches for and stores the final structure geometry
data (i.e., after geometry optimization is complete) from an ORCA .out file.
"""
__author__ = "Peter Waddell"
__copyright__ = "Copyright 2024"
__credits__ = ["Peter Waddell"]
__version__ = "0.1.0"
__date__ = "2024/02/26"
__maintainer__ = "Peter Waddell"
__email__ = "pmwaddell9@gmail.com"
__status__ = "Prototype"
import re
from orca_data_extraction.src.data_section_with_inputs import DataSectionWithInputs
[docs]
class FinalGeometry(DataSectionWithInputs):
"""
Finds and stores the final geometry data from an ORCA .out file.
Methods
-------
_search
Search the .out file for bond length data.
"""
def __init__(self, out_filename, outfile_contents, inputs):
"""
Parameters
----------
out_filename : str
Name of the ORCA .out file that will be searched.
outfile_contents : str
String containing the full text of the ORCA .out file.
inputs : tuple
Tuple of atom labels (e.g. '2 H') for which final geometry data
will be searched.
"""
super().__init__(out_filename, outfile_contents, inputs)
self._section_name = 'Final Geometry'
def _search(self, atom_label):
"""
Use regex to search .out file for an atom's final geom. coordinates.
Parameters
----------
atom_label : str
String of the desired atom label.
Returns
-------
dict
A dictionary containing 'x', 'y' and 'z' as keys and the
corresponding final geometry coordinates (in Å) for the atom label
as values (as strings).
Raises
------
AttributeError
This occurs when the regex fails to find what it is looking
for, and returns NoneType. Then, .group(n) gives this error.
"""
def __convert_str_for_verbose_regex(s):
"""
Converts string to a form that works properly for verbose REs.
Verbose regular expressions ignore whitespace, unless preceded by a
"\" (backslash) character. To use such a string as part of a
verbose RE, this character must be added before each space first;
this function returns a version of the input string modified
in this way.
Parameters
----------
s: str
Input string.
Returns
-------
result: str
A modified version of the input string which now has "\"
preceding each whitespace character.
"""
result = ''
for i in range(len(s)):
if s[i] == ' ':
result = result + r'\ '
else:
result = result + s[i]
return result
def __convert_au_to_angstrom(x):
"""
Converts a value from AU to Ångstroms.
Parameters
----------
x
Value in AU to be converted.
Returns
-------
float
Value of x in Ångstroms.
"""
x = float(x)
return round(x * 0.529177, 5)
def __reverse_string_by_lines(s):
"""
Reverses a string in terms of the order of its lines.
Parameters
----------
s: str
String to be reversed line-by-line
Returns
-------
str
Reversed string, line-by-line.
"""
s = s.splitlines()[::-1]
return '\n'.join(s)
re_atom_label = __convert_str_for_verbose_regex(atom_label)
# Here I am assuming that the last occurrence of geometry data in the
# .out file will represent the finished calculation, seems logical...
# However, I found that in order to find the last occurrence without
# catastrophic backtracking, it was necessary to reverse the line order
# of the outfile contents string, then match the first occurrence there.
reversed_contents = \
__reverse_string_by_lines(self._outfile_contents)
# Note that, consequently, this regex string is INVERTED to match!!!
# Note also: \ must be used for all whitespace I want to count
# when using verbose regular expressions.
regex_geom_opt = re.compile(
fr"""
(INTERNAL\ COORDINATES\ \(ANGSTROEM\))
(.*?) # all text until data begins
((\ |\n){re_atom_label})
# above: (\ |\n) prevents '1 H' from matching e.g. '11 H' when '1 H' is not present
(\ *) # whitespace
(-?[\d]+[.][\d]+) # ZA, unwanted information here
(\ *)
([\d]+) # FRAG, unwanted information here
(\ *)
(-?[\d]+[.][\d]+) # MASS, unwanted information here
(\ *)
(-?[\d]+[.][\d]+) # X coordinate
(\ *)
(-?[\d]+[.][\d]+) # Y coordinate
(\ *)
(-?[\d]+[.][\d]+) # Z coordinate
(.*?)
(CARTESIAN\ COORDINATES\ \(A.U.\))
# above: prevents accidental matches later in the .out file
""",
flags=re.VERBOSE | re.DOTALL
)
try:
result = regex_geom_opt.search(reversed_contents)
x = __convert_au_to_angstrom(result.group(12))
y = __convert_au_to_angstrom(result.group(14))
z = __convert_au_to_angstrom(result.group(16))
return {'x': str(x), 'y': str(y), 'z': str(z)}
except AttributeError:
print(f'Error: {atom_label} was not found'
f' in {self._out_filename} (Final Geometry).')
return {'x': None,
'y': None,
'z': None}