Source code for src.orca_out_to_json
#!/usr/bin/env python3
"""
A script to quickly pull desired data from an ORCA .out file and compile
it into a JSON file.
Before running, the user should specify what information they want to look for
in a .txt file (see example). When executed, the script checks each file in the
working directory. If the file ends in .out, it exports the desired data into
a JSON file.
"""
__author__ = "Peter Waddell"
__copyright__ = "Copyright 2024"
__credits__ = ["Peter Waddell"]
__version__ = "0.1.0"
__date__ = "2024/03/01"
__maintainer__ = "Peter Waddell"
__email__ = "pmwaddell9@gmail.com"
__status__ = "Prototype"
import os
import json
import sys
from orca_data_extraction.src.structure_data_builder import StructureDataBuilder
[docs]
def make_json_list(sd_list):
"""
Converts the data in a list of StructureData instances to a dict for JSON.
Parameters
----------
sd_list : list
List containing the set of StructureData instances that each come
from the ORCA .out files.
Returns
-------
list
List with the data from sd_list configured to be compatible with
JSON (i.e., tuples are converted to strings).
"""
def format_column_name(x):
"""
Format string for use as a column name in the JSON file.
Parameters
----------
x : str or other
Entity to be potentially renamed.
Returns
-------
str or other
A string formatted for use as column name in the JSON file, or returns
the object unchanged if it is not a string (e.g., a dict).
"""
if type(x) != str:
return x
else:
return x\
.replace("'", "")\
.replace(', ', ',')\
.replace(' ', '_')\
.lower()
json_lst = []
for sd in sd_list:
sd_data = {
'script_input_filename': sd.get_input_filename(),
'orca_out_filename': sd.get_out_filename()
}
for data_section in sd.get_data_sections().values():
data_section_data = data_section.get_data()
json_safe_data = {}
# JSON is not compatible with tuples, so must convert to str
for key in data_section_data.keys():
val = data_section_data[key]
key_to_add, val_to_add = key, val
if type(val) == tuple:
val_to_add = str(val)
if type(key) == tuple:
key_to_add = str(key)
json_safe_data[format_column_name(key_to_add)] = \
format_column_name(val_to_add)
sd_data[
data_section.get_section_name().replace(' ', '_').lower()
] = json_safe_data
json_lst.append(sd_data)
return json_lst
[docs]
def create_json_from_sds(sd_list, json_name):
"""
Writes the data in a list of StructureData instances to a JSON file.
Parameters
----------
sd_list : list
List containing the set of StructureData instances that each come
from the ORCA .out files.
json_name : str
Name of the JSON file where the data will be stored.
"""
json_list = make_json_list(sd_list)
with open(f'{json_name}.json', 'w') as f:
json.dump(json_list, f, indent=2)
[docs]
def main():
# TODO: use an argument parser here instead? make argument inputs more sophisticated?
json_name = ''
# Process command line arguments
# TODO: extract this part for each file type?? let the user select the file type @ command line?
if len(sys.argv) >= 2:
inputs_name = sys.argv[1]
if not os.path.isfile(inputs_name):
print('No file with name ' + inputs_name + ' found.')
quit()
if len(sys.argv) >= 3:
json_name = sys.argv[2]
else:
print('Script will execute on all .out files in the current '
'working directory.')
while True:
print('Name of input file with atom labels ("q" to quit): ',
end='')
inputs_name = input()
if inputs_name == 'q':
quit()
if not os.path.isfile(os.getcwd() + "\\" + inputs_name):
print('No file with name ' + inputs_name + ' found.')
continue
break
# Ask for excel file name
if json_name == '':
print('Name of the JSON file which will contain the data (press ENTER '
'to use the default name, "q" to quit): ', end='')
json_name = input()
if json_name == 'q':
quit()
# If the user just hits enter, use default name:
if json_name == '':
json_name = f'ORCA_data_{inputs_name[:-4]}'
print('')
sd_list = []
structure_data_builder = StructureDataBuilder(inputs_name)
for f in os.listdir(os.getcwd()):
if os.path.isfile(f):
try:
filename_end = f[-4:]
except IndexError:
# Since it is hard for filenames to be shorter than 4 chars
# I think it is unlikely this error would ever be raised...
print(f'{f}: Invalid filename')
continue
if filename_end == '.out':
try:
print(f'Beginning search: {f}')
sd_list.append(structure_data_builder.build(f))
print(f'{f} complete.\n')
except IndexError:
print(f'Something went wrong with {f} and it threw '
f'an IndexError...\n')
create_json_from_sds(sd_list, json_name)
print(f'Process complete! Results saved as "{json_name}.json"')
if __name__ == '__main__':
main()