smamp.charges_to_rtp
module
- Transfer Charges from CSV table to .rft file
- Copyright 2019 Simulation Lab
- University of Freiburg
Author
:Lukas
Elflein
elfleinl@cs.uni-freiburg.de
Source code
""" Transfer Charges from CSV table to .rft file
Copyright 2019 Simulation Lab
University of Freiburg
Author: Lukas Elflein <elfleinl@cs.uni-freiburg.de>
"""
import pandas as pd
import argparse
import shutil
def import_charges(csv_path):
"""
Read and filter the best-fit charges.
Args:
csv_path: path to the charge table
Return:
charges: pandas DataFrame with atom names, residue names, charges
"""
charges = pd.read_csv(csv_path)
# select atom names, the residue and q column
charges = charges[['atom', 'residue', 'q']]
print('Charge table sucessfully imported.')
return charges
def parse_rft(rtp_path, charges=None):
"""
Substitute the fitted charges for the original charges in the RTF file.
Args:
rtp_path: A string containing the original GROMACS topology file path
charges: A pandas DataFrame containing the best fit charges
Returns:
fitted_rtp_text: The original topoly file, but with updated charges
"""
with open(rtp_path, 'r') as rtp_file:
print('Successfully loaded topolgy file {}'.format(rtp_path))
rtp_text = rtp_file.readlines()
# save all possible atom names
atom_names = charges.atom.unique()
# and residuum names
residuum_names = charges.residue.unique()
# We will append all modified lines to sub_text
sub_text = ''
# Keep track of how often we substituted for debugging
nr_substitutions = 0
current_residuum = None
for line in rtp_text:
# Atom names are only unique inside one residuum
# Thus, specify which res we are currently in
for residuum in residuum_names:
if residuum in line:
current_residuum = residuum
break
# Now, we can look up the atom name in the charge table.
# First, select the lines with exactly one atom name
for atom_name in atom_names:
# Select lines with at least one atom name
if atom_name in line[0:7]:
second_entry = line[8:18].replace('+', '')
second_entry = second_entry.replace('-', '').strip()
# Select lines with no atom name in second column
if not second_entry in atom_names:
# Now we can substitute the charge with the fitted one
line = substitute(line, charges, current_residuum)
nr_substitutions += 1
break
# We substituted the charge, if applicable.
# Now we can append the line to the text file
sub_text += line
# Make sure that all substitutions were executed
assert len(charges.index) == nr_substitutions
print('{} of {} charges substituted.'.format(nr_substitutions, len(charges.index)))
return sub_text
def substitute(line, charges, current_residuum):
"""
Look up the charge for an atom, and write into a string.
Args:
line: String containing an atom name, residuum name, and original charge.
charges: DataFrame containing atom name, residuum names, new charges.
Returns:
line: the original line, with the charge updated from the `charges` table.
"""
current_atom = line[0:7].strip()
# look up the current atom name in the residuum we are currently in
mask = (charges.atom == current_atom) & (charges.residue == current_residuum)
# Exract the charge q
new_charge = charges[mask].q.values[0]
charge_string = '{: 1.6f}'.format(new_charge) + ' '
modified_line = line[:24] + charge_string + line[34:]
return modified_line
def export_rft(text, out_path):
"""
Write the modified text to a file.
"""
with open(out_path, 'w') as outfile:
outfile.write(text)
print('Modified topoly file written to {}'.format(out_path))
def cmd_parser():
parser = argparse.ArgumentParser(prog='',
description='Transfer charges from a csv file to an .rtp file')
parser.add_argument('-rtp',
help='The location of the GROMACS topology file (.rtp)',
default='./n7nh2.rtp', metavar='./n7nh2.rtp')
parser.add_argument('-csv', metavar='./charges.csv',
help='The location of the atomname-charge table, in the .csv format.',
default='./average_cost_function_check/fitted_points_charges.csv')
parser.add_argument('-out', metavar='./modified.rtp',
help='The location where the modified rtp file should be saved.',
default='modified.rtp')
args = parser.parse_args()
return args.rtp, args.csv, args.out
def main():
"""
Run the script.
"""
rtp_path, csv_path, out_path = cmd_parser()
charges = import_charges(csv_path)
sub_text = parse_rft(rtp_path=rtp_path, charges=charges)
export_rft(text=sub_text, out_path=out_path)
print('Done.')
if __name__ == '__main__':
main()
Functions
def cmd_parser()
-
Source code
def cmd_parser(): parser = argparse.ArgumentParser(prog='', description='Transfer charges from a csv file to an .rtp file') parser.add_argument('-rtp', help='The location of the GROMACS topology file (.rtp)', default='./n7nh2.rtp', metavar='./n7nh2.rtp') parser.add_argument('-csv', metavar='./charges.csv', help='The location of the atomname-charge table, in the .csv format.', default='./average_cost_function_check/fitted_points_charges.csv') parser.add_argument('-out', metavar='./modified.rtp', help='The location where the modified rtp file should be saved.', default='modified.rtp') args = parser.parse_args() return args.rtp, args.csv, args.out
def export_rft(text, out_path)
-
Write the modified text to a file.
Source code
def export_rft(text, out_path): """ Write the modified text to a file. """ with open(out_path, 'w') as outfile: outfile.write(text) print('Modified topoly file written to {}'.format(out_path))
def import_charges(csv_path)
-
Read and filter the best-fit charges.
- Args:
csv_path
:path
to
the
charge
table
- Return:
charges
:pandas
DataFrame
with
atom
names
,residue
names
,charges
Source code
def import_charges(csv_path): """ Read and filter the best-fit charges. Args: csv_path: path to the charge table Return: charges: pandas DataFrame with atom names, residue names, charges """ charges = pd.read_csv(csv_path) # select atom names, the residue and q column charges = charges[['atom', 'residue', 'q']] print('Charge table sucessfully imported.') return charges
def main()
-
Run the script.
Source code
def main(): """ Run the script. """ rtp_path, csv_path, out_path = cmd_parser() charges = import_charges(csv_path) sub_text = parse_rft(rtp_path=rtp_path, charges=charges) export_rft(text=sub_text, out_path=out_path) print('Done.')
def parse_rft(rtp_path, charges=None)
-
Substitute the fitted charges for the original charges in the RTF file.
- Args:
rtp_path
:A
string
containing
the
original
GROMACS
topology
file
path
charges
:A
pandas
DataFrame
containing
the
best
fit
charges
- Returns:
fitted_rtp_text
:The
original
topoly
file
,but
with
updated
charges
Source code
def parse_rft(rtp_path, charges=None): """ Substitute the fitted charges for the original charges in the RTF file. Args: rtp_path: A string containing the original GROMACS topology file path charges: A pandas DataFrame containing the best fit charges Returns: fitted_rtp_text: The original topoly file, but with updated charges """ with open(rtp_path, 'r') as rtp_file: print('Successfully loaded topolgy file {}'.format(rtp_path)) rtp_text = rtp_file.readlines() # save all possible atom names atom_names = charges.atom.unique() # and residuum names residuum_names = charges.residue.unique() # We will append all modified lines to sub_text sub_text = '' # Keep track of how often we substituted for debugging nr_substitutions = 0 current_residuum = None for line in rtp_text: # Atom names are only unique inside one residuum # Thus, specify which res we are currently in for residuum in residuum_names: if residuum in line: current_residuum = residuum break # Now, we can look up the atom name in the charge table. # First, select the lines with exactly one atom name for atom_name in atom_names: # Select lines with at least one atom name if atom_name in line[0:7]: second_entry = line[8:18].replace('+', '') second_entry = second_entry.replace('-', '').strip() # Select lines with no atom name in second column if not second_entry in atom_names: # Now we can substitute the charge with the fitted one line = substitute(line, charges, current_residuum) nr_substitutions += 1 break # We substituted the charge, if applicable. # Now we can append the line to the text file sub_text += line # Make sure that all substitutions were executed assert len(charges.index) == nr_substitutions print('{} of {} charges substituted.'.format(nr_substitutions, len(charges.index))) return sub_text
def substitute(line, charges, current_residuum)
-
Look up the charge for an atom, and write into a string.
Args: line: String containing an atom name, residuum name, and original charge. charges: DataFrame containing atom name, residuum names, new charges.
Returns: line: the original line, with the charge updated from the
charges
table.Source code
def substitute(line, charges, current_residuum): """ Look up the charge for an atom, and write into a string. Args: line: String containing an atom name, residuum name, and original charge. charges: DataFrame containing atom name, residuum names, new charges. Returns: line: the original line, with the charge updated from the `charges` table. """ current_atom = line[0:7].strip() # look up the current atom name in the residuum we are currently in mask = (charges.atom == current_atom) & (charges.residue == current_residuum) # Exract the charge q new_charge = charges[mask].q.values[0] charge_string = '{: 1.6f}'.format(new_charge) + ' ' modified_line = line[:24] + charge_string + line[34:] return modified_line