smamp.convert_UA_to_AA
module
- Change structure with implicit Hydrogen to one with explicitely defined H-atoms.
- Copyright 2019 Simulation Lab
- University of Freiburg
Author
:Johannes
Hoermann
johannes.hoermann@imtek.uni-freiburg.deModified
:Lukas
Elflein
elfleinl@cs.uni-freiburg.de
Source code
""" Change structure with implicit Hydrogen to one with explicitely defined H-atoms.
Copyright 2019 Simulation Lab
University of Freiburg
Author: Johannes Hoermann <johannes.hoermann@imtek.uni-freiburg.de>
Modified: Lukas Elflein <elfleinl@cs.uni-freiburg.de>
"""
import os
import ase.io
import sys
import warnings
import numpy as np
import parmed as pmd
from ase.data import atomic_numbers
from ase.neighborlist import NeighborList
from matscipy.neighbours import neighbour_list
from parmed import gromacs
def insertHbyList(ase_struct, pmd_top, implicitHbondingPartners, bond_length=1.0, logfile=None):
"""Inserts explicit hydrogen atoms into ASE structure."""
# make copies of passed structures as not to alter originals:
new_pmd_top = pmd_top.copy(pmd.Structure)
new_ase_struct = ase_struct.copy()
# make copied atoms accessible by unchangable indices (standard list)
originalAtoms = [a for a in new_pmd_top.atoms]
implicitHbondingPartnersIdxHnoTuples = [(a.idx,implicitHbondingPartners[k])
for a in pmd_top.atoms
for k in implicitHbondingPartners.keys()
if a.name == k]
implicitHbondingPartnersIdxHnoDict = dict(implicitHbondingPartnersIdxHnoTuples)
# build numbered neighbour list "manually"
#i: list of atoms e.g. [0,0,0,1,1,2,2,...]
i = np.array([b.atom1.idx for b in new_pmd_top.bonds])
#j: list of bonding partners corresponding to i [1,2,3,...] ==> 0 has bondpartners 1,2 and 3; etc.
j = np.array([b.atom2.idx for b in new_pmd_top.bonds])
r = new_ase_struct.positions
for k,Hno in implicitHbondingPartnersIdxHnoDict.items(): # for all atoms to append hydrogen to
logfile.write('Adding {} H-atoms to {} (#{})...'.format(Hno,originalAtoms[k].name,k))
for h in range(0,Hno):
r = new_ase_struct.positions
bondingPartners = j[i==k]
logfile.write('bondingPartners'.join(str(bondingPartners)))
partnerStr=''
for p in bondingPartners:
if partnerStr == '':
partnerStr = originalAtoms[p].name
else:
partnerStr += ', ' + originalAtoms[p].name
logfile.write('Atom {} already has bonding partners {}'.format(originalAtoms[k].name,partnerStr))
dr = (r[j[i==k]] - r[k]).mean(axis=0)
# my understanding: dr is vector
# from atom k's position towards the geometrical center of mass
# it forms with its defined neighbours
# r0 is a vector offset into the opposit direction:
dr = dr/np.linalg.norm(dr) #normalized vector in direction dr
#calculate an orthogonal vector 'dr_ortho' on dr
#and push the H atoms in dr+dr_ortho and dr-dr_ortho
dr_ortho = np.cross(dr,np.array([1,0,0]))
if np.linalg.norm(dr_ortho) < 0.1 : #if dr and (1,0,0) have almost the same direction
dr_ortho = np.cross(dr,np.array([0,1,0]))
# (1-2*h) = {1,-1} for h={0,1}
h_pos_vec = (dr+(1-2*h)*dr_ortho)/np.linalg.norm(dr+(1-2*h)*dr_ortho)
r0 = r[k] - bond_length*h_pos_vec
new_ase_struct += ase.Atom('H', r0) # add atom in ase structure
n_atoms = len(new_ase_struct)
#introduce a corrector step for a added atom which is too close to others
#do as many corrector stepps until all atoms are more than 1\AA appart
c_step = 0
while True:
nl = NeighborList(cutoffs=[.5]*len(new_ase_struct),
skin=0.09, self_interaction=False,
bothways=True)
nl.update(new_ase_struct)
indices, offsets = nl.get_neighbors(-1)
indices = np.delete(indices, np.where(indices==k))
if len(indices) == 0:
break
elif c_step > 15:
logfile.write('programm needs more than 15 corrector steps for',
' H atom {} at atom {}'.format(n_atoms, k))
sys.exit(15)
break
logfile.write('too close atoms'.join(str(indices)))
c_step += 1
logfile.write('correcter step {} for H {} at atom {}'.format(c_step, n_atoms-1, k))
r_H, r_k, a_close = np.take(new_ase_struct.get_positions(),[-1,k,indices[0]], axis=0)
corr_step = (r_H-a_close)/np.linalg.norm((r_H-a_close))
corr_pos = ((r_H-r_k) + corr_step)/np.linalg.norm((r_H-r_k) + corr_step)
new_r_H = r_k + bond_length * corr_pos
#correct the H position to new_r_H in new_ase_struct
trans = np.zeros([n_atoms,3])
trans[-1] = new_r_H - r_H
new_ase_struct.translate(trans)
i = np.append(i,k) # manually update numbered neighbour lists
j = np.append(j,len(new_ase_struct)-1)
# update pmd topology
bondingPartner = originalAtoms[k] # here we need the original numbering,
# as ParmEd alters indices when adding atoms to the structure
nameH = '{}{}'.format(h+1,bondingPartner.name) # atom needs a unique name
logfile.write('Adding H-atom {} at position [ {}, {}, {} ]'.format(nameH,r0[0], r0[1], r0[2]))
new_H = pmd.Atom(name=nameH, type='H', atomic_number=1)
new_H.xx = r0[0]
new_H.xy = r0[1]
new_H.xz = r0[2]
# Apparently we need the Bond object in order to update topology
new_Bond = pmd.Bond(bondingPartner,new_H)
new_H.bond_to(bondingPartner) # not sure, whether this is necessary
new_pmd_top.bonds.append(new_Bond)
new_pmd_top.add_atom_to_residue(new_H,bondingPartner.residue)
originalAtoms.append(new_H) # add atom to the bottom of "index-stiff" list
return new_ase_struct, new_pmd_top
def read_input_files(input_dir='../0_initial_structure'):
"""Search for and read input files (with implicit H-atoms)."""
ase_struct, pmd_top = None, None
files = os.listdir(input_dir)
for filename in files:
if ('snapshot' in filename) and ('.pdb' in filename):
ase_struct = ase.io.read(os.path.join(input_dir, filename))
pmd_struct = pmd.load_file(os.path.join(input_dir, filename))
elif '.top' in filename:
pmd_top = gromacs.GromacsTopologyFile(os.path.join(input_dir, filename),
parametrize=False)
# Make sure we actually found everything we need
if ase_struct is None:
raise RuntimeError('structure file (.pdb) not found in {}'.format(input_dir))
if pmd_top is None:
raise RuntimeError('topology file (.top) not found in {}'.format(input_dir))
return ase_struct, pmd_struct, pmd_top
def main():
"""Execute everything."""
# Read the united-atoms files extracted from the MD-simulation trajectory
# throws some warnings on angle types, does not matter for bonding info
with warnings.catch_warnings():
warnings.simplefilter('ignore')
ase_struct, pmd_struct, pmd_top = read_input_files()
# define a dictionary, marking how many H atoms are missing at which bonding partner explicitly
implicitHbondingPartners={'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2}
pmd_top.strip(':SOL,CL') # strip water and electrolyte from system
pmd_top.box = pmd_struct.box # Needed because .prmtop contains box info
pmd_top.positions = pmd_struct.positions
# Insert the explicit hydrogens
print('Inserting explicit hydrogens, please wait ...')
with open('insert_H.log', 'w') as logfile:
new_ase_struct, new_pmd_top = insertHbyList(ase_struct,pmd_top,implicitHbondingPartners,1.0, logfile)
# Write output
new_ase_struct.write('ase_pdbH.pdb')
new_ase_struct.write('ase_pdbH.traj')
# Write other output
new_pmd_top.write_pdb('pmd_pdbH.pdb')
test_pmd = pmd.load_file('pmd_pdbH.pdb')
# some topology format, un functionality similar to GROMACS' .top, but readable by VMD
# new_pmd_top.write_psf('pmd_pdbH.psf')
print('Done.')
if __name__ == '__main__':
main()
Functions
def insertHbyList(ase_struct, pmd_top, implicitHbondingPartners, bond_length=1.0, logfile=None)
-
Inserts explicit hydrogen atoms into ASE structure.
Source code
def insertHbyList(ase_struct, pmd_top, implicitHbondingPartners, bond_length=1.0, logfile=None): """Inserts explicit hydrogen atoms into ASE structure.""" # make copies of passed structures as not to alter originals: new_pmd_top = pmd_top.copy(pmd.Structure) new_ase_struct = ase_struct.copy() # make copied atoms accessible by unchangable indices (standard list) originalAtoms = [a for a in new_pmd_top.atoms] implicitHbondingPartnersIdxHnoTuples = [(a.idx,implicitHbondingPartners[k]) for a in pmd_top.atoms for k in implicitHbondingPartners.keys() if a.name == k] implicitHbondingPartnersIdxHnoDict = dict(implicitHbondingPartnersIdxHnoTuples) # build numbered neighbour list "manually" #i: list of atoms e.g. [0,0,0,1,1,2,2,...] i = np.array([b.atom1.idx for b in new_pmd_top.bonds]) #j: list of bonding partners corresponding to i [1,2,3,...] ==> 0 has bondpartners 1,2 and 3; etc. j = np.array([b.atom2.idx for b in new_pmd_top.bonds]) r = new_ase_struct.positions for k,Hno in implicitHbondingPartnersIdxHnoDict.items(): # for all atoms to append hydrogen to logfile.write('Adding {} H-atoms to {} (#{})...'.format(Hno,originalAtoms[k].name,k)) for h in range(0,Hno): r = new_ase_struct.positions bondingPartners = j[i==k] logfile.write('bondingPartners'.join(str(bondingPartners))) partnerStr='' for p in bondingPartners: if partnerStr == '': partnerStr = originalAtoms[p].name else: partnerStr += ', ' + originalAtoms[p].name logfile.write('Atom {} already has bonding partners {}'.format(originalAtoms[k].name,partnerStr)) dr = (r[j[i==k]] - r[k]).mean(axis=0) # my understanding: dr is vector # from atom k's position towards the geometrical center of mass # it forms with its defined neighbours # r0 is a vector offset into the opposit direction: dr = dr/np.linalg.norm(dr) #normalized vector in direction dr #calculate an orthogonal vector 'dr_ortho' on dr #and push the H atoms in dr+dr_ortho and dr-dr_ortho dr_ortho = np.cross(dr,np.array([1,0,0])) if np.linalg.norm(dr_ortho) < 0.1 : #if dr and (1,0,0) have almost the same direction dr_ortho = np.cross(dr,np.array([0,1,0])) # (1-2*h) = {1,-1} for h={0,1} h_pos_vec = (dr+(1-2*h)*dr_ortho)/np.linalg.norm(dr+(1-2*h)*dr_ortho) r0 = r[k] - bond_length*h_pos_vec new_ase_struct += ase.Atom('H', r0) # add atom in ase structure n_atoms = len(new_ase_struct) #introduce a corrector step for a added atom which is too close to others #do as many corrector stepps until all atoms are more than 1\AA appart c_step = 0 while True: nl = NeighborList(cutoffs=[.5]*len(new_ase_struct), skin=0.09, self_interaction=False, bothways=True) nl.update(new_ase_struct) indices, offsets = nl.get_neighbors(-1) indices = np.delete(indices, np.where(indices==k)) if len(indices) == 0: break elif c_step > 15: logfile.write('programm needs more than 15 corrector steps for', ' H atom {} at atom {}'.format(n_atoms, k)) sys.exit(15) break logfile.write('too close atoms'.join(str(indices))) c_step += 1 logfile.write('correcter step {} for H {} at atom {}'.format(c_step, n_atoms-1, k)) r_H, r_k, a_close = np.take(new_ase_struct.get_positions(),[-1,k,indices[0]], axis=0) corr_step = (r_H-a_close)/np.linalg.norm((r_H-a_close)) corr_pos = ((r_H-r_k) + corr_step)/np.linalg.norm((r_H-r_k) + corr_step) new_r_H = r_k + bond_length * corr_pos #correct the H position to new_r_H in new_ase_struct trans = np.zeros([n_atoms,3]) trans[-1] = new_r_H - r_H new_ase_struct.translate(trans) i = np.append(i,k) # manually update numbered neighbour lists j = np.append(j,len(new_ase_struct)-1) # update pmd topology bondingPartner = originalAtoms[k] # here we need the original numbering, # as ParmEd alters indices when adding atoms to the structure nameH = '{}{}'.format(h+1,bondingPartner.name) # atom needs a unique name logfile.write('Adding H-atom {} at position [ {}, {}, {} ]'.format(nameH,r0[0], r0[1], r0[2])) new_H = pmd.Atom(name=nameH, type='H', atomic_number=1) new_H.xx = r0[0] new_H.xy = r0[1] new_H.xz = r0[2] # Apparently we need the Bond object in order to update topology new_Bond = pmd.Bond(bondingPartner,new_H) new_H.bond_to(bondingPartner) # not sure, whether this is necessary new_pmd_top.bonds.append(new_Bond) new_pmd_top.add_atom_to_residue(new_H,bondingPartner.residue) originalAtoms.append(new_H) # add atom to the bottom of "index-stiff" list return new_ase_struct, new_pmd_top
def main()
-
Execute everything.
Source code
def main(): """Execute everything.""" # Read the united-atoms files extracted from the MD-simulation trajectory # throws some warnings on angle types, does not matter for bonding info with warnings.catch_warnings(): warnings.simplefilter('ignore') ase_struct, pmd_struct, pmd_top = read_input_files() # define a dictionary, marking how many H atoms are missing at which bonding partner explicitly implicitHbondingPartners={'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2} pmd_top.strip(':SOL,CL') # strip water and electrolyte from system pmd_top.box = pmd_struct.box # Needed because .prmtop contains box info pmd_top.positions = pmd_struct.positions # Insert the explicit hydrogens print('Inserting explicit hydrogens, please wait ...') with open('insert_H.log', 'w') as logfile: new_ase_struct, new_pmd_top = insertHbyList(ase_struct,pmd_top,implicitHbondingPartners,1.0, logfile) # Write output new_ase_struct.write('ase_pdbH.pdb') new_ase_struct.write('ase_pdbH.traj') # Write other output new_pmd_top.write_pdb('pmd_pdbH.pdb') test_pmd = pmd.load_file('pmd_pdbH.pdb') # some topology format, un functionality similar to GROMACS' .top, but readable by VMD # new_pmd_top.write_psf('pmd_pdbH.psf') print('Done.')
def read_input_files(input_dir='../0_initial_structure')
-
Search for and read input files (with implicit H-atoms).
Source code
def read_input_files(input_dir='../0_initial_structure'): """Search for and read input files (with implicit H-atoms).""" ase_struct, pmd_top = None, None files = os.listdir(input_dir) for filename in files: if ('snapshot' in filename) and ('.pdb' in filename): ase_struct = ase.io.read(os.path.join(input_dir, filename)) pmd_struct = pmd.load_file(os.path.join(input_dir, filename)) elif '.top' in filename: pmd_top = gromacs.GromacsTopologyFile(os.path.join(input_dir, filename), parametrize=False) # Make sure we actually found everything we need if ase_struct is None: raise RuntimeError('structure file (.pdb) not found in {}'.format(input_dir)) if pmd_top is None: raise RuntimeError('topology file (.top) not found in {}'.format(input_dir)) return ase_struct, pmd_struct, pmd_top