Source code for src.homo_lumo_energies
#!/usr/bin/env python3
"""
The HOMOLUMOEnergies class searches for and stores the HOMO and LUMO energy
data from an ORCA .out file.
"""
__author__ = "Peter Waddell"
__copyright__ = "Copyright 2024"
__credits__ = ["Peter Waddell"]
__version__ = "0.1.0"
__date__ = "2024/02/26"
__maintainer__ = "Peter Waddell"
__email__ = "pmwaddell9@gmail.com"
__status__ = "Prototype"
import re
from orca_data_extraction.src.data_section import DataSection
[docs]
class HOMOLUMOEnergies(DataSection):
"""
Finds and stores HOMO and LUMO energy data from a ORCA .out file.
Attributes
----------
__regex : str
Regular expression string used to search the .out file for the
HOMO and LUMO energy data.
Methods
-------
_find_data
Search the .out file for HOMO and LUMO energy data, return as dict.
"""
def __init__(self, out_filename, outfile_contents):
"""
Parameters
----------
out_filename : str
Name of the ORCA .out file that will be searched.
outfile_contents : str
String containing the full text of the ORCA .out file.
"""
# Here I am assuming that the last occurrence of the MO energy data in
# the .out file will be from the finished calculation, seems logical...
# However, I found that in order to find the last occurrence without
# catastrophic backtracking, it was necessary to reverse the line order
# of the outfile contents string, then match the first occurrence there.
# Note that, consequently, this regex string is INVERTED to match!!!
# Note also: \ must be used for all whitespace I want to count
# when using verbose regular expressions.
self.__regex = re.compile(
r"""
(MULLIKEN\ POPULATION\ ANALYSIS)
(.*?) # all text until data begins
(0.0000) # LUMO occupancy, always 0 of course
(\ *) # whitespace
(-?[\d]+[.][\d]+) # LUMO energy in Eh
(\ *)
(-?[\d]+[.][\d]+) # LUMO energy in eV
(\ \n) # newline
(\ *)
([\d]+) # HOMO orbital number
(\ \ \ )
(1|2) # HOMO occupancy
(.0000)
(\ *)
(-?[\d]+[.][\d]+) # HOMO energy in Eh
(\ *)
(-?[\d]+[.][\d]+) # HOMO energy in eV
(.*?)
(ORBITAL\ ENERGIES)
# above: prevents accidental matches later in the .out file
""",
flags=re.VERBOSE | re.DOTALL
)
super().__init__(out_filename, outfile_contents)
self._section_name = 'HOMO LUMO Energies'
def _find_data(self):
"""
Search the .out file for HOMO and LUMO energy data, return as dict.
Returns
-------
dict
Dictionary containing HOMO/LUMO energy as keys and the
corresponding energy values (in eV) as values (as strings).
Raises
------
AttributeError
This occurs when the regex fails to find what it is looking
for, and returns NoneType. Then, .group(n) gives this error.
"""
def __reverse_string_by_lines(s):
"""
Reverses a string in terms of the order of its lines.
Parameters
----------
s: str
String to be reversed line-by-line
Returns
-------
str
Reversed string, line-by-line.
"""
s = s.splitlines()[::-1]
return '\n'.join(s)
# See above comments: in order to match the LAST occurrence of the MO
# energy data in the .out file, the contents must be reversed.
reversed_contents = \
__reverse_string_by_lines(self._outfile_contents)
try:
result = self.__regex.search(reversed_contents)
homo_energy = result.group(17)
lumo_energy = result.group(7)
return {'HOMO energy': homo_energy, 'LUMO energy': lumo_energy}
except AttributeError:
print(f'HOMO/LUMO energy data not found in '
f'{self._out_filename}')
return {'HOMO energy': None, 'LUMO energy': None}