smamp.insertHbyList
module
Source code
## jlh 2018/02/13
# takes dictionary of united atoms and implicit hydrogen number,
# adding a certain number of explicit hydrogens around these.
# Needs and returns corresponding ASE and ParmEd representations of the system
# Ugly and annoying:
# ASE identifies atoms only by number, no functionality to store atom name and residue
# New atoms are added at the end of the whole atom list
# Parmed inserts new atoms not at the end of the list, nut within their respective residues.
# Initially, ordering in ASE and Parmed are equal, but when structure is modified
# we have to track every change in order to be able to map Parmed structure to ase structure
# TODO: think about the case when one has to add more than two H-atoms
import numpy as np
import matscipy as msp
from matscipy.neighbours import neighbour_list
from ase.data import atomic_numbers
import ase.io
from ase.neighborlist import NeighborList
from ase.visualize import view
import parmed as pmd
from parmed import gromacs
import sys
import logging
#gromacs.GROMACS_TOPDIR = "/home/jotelha/gromacs/2016.4/share/gromacs/top"
# returns new_ase_struct and new_mpd_struct with hydrogens added
# additionally, two lists, "names" and "residues" are returned in order to make ASE atoms identifiable
def insertHbyList(ase_struct,pmd_top,implicitHbondingPartners,bond_length=1.0,debug=False):
# make copies of passed structures as not to alter originals:
new_pmd_top = pmd_top.copy(pmd.Structure)
new_ase_struct = ase_struct.copy()
# names stores the String IDs of all atoms as to facilitate later ASE ID -> Atom name mapping
names = [ a.name for a in pmd_top.atoms ]
residues = [ a.residue.name for a in pmd_top.atoms ]
# make copied atoms accessible by unchangable indices (standard list)
originalAtoms = [a for a in new_pmd_top.atoms]
implicitHbondingPartnersIdxHnoTuples = [(a.idx,implicitHbondingPartners[k])
for a in pmd_top.atoms
for k in implicitHbondingPartners.keys()
if a.name == k]
implicitHbondingPartnersIdxHnoDict = dict(implicitHbondingPartnersIdxHnoTuples)
# build numbered neighbour list "manually"
#i: list of atoms e.g. [0,0,0,1,1,2,2,...]
i = np.array([b.atom1.idx for b in new_pmd_top.bonds])
#j: list of bonding partners corresponding to i [1,2,3,...] ==> 0 has bondpartners 1,2 and 3; etc.
j = np.array([b.atom2.idx for b in new_pmd_top.bonds])
r = new_ase_struct.positions
for k,Hno in implicitHbondingPartnersIdxHnoDict.items(): # for all atoms to append hydrogen to
logging.info('Adding {} H-atoms to {} (#{})...'.format(Hno,originalAtoms[k].name,k))
for h in range(0,Hno):
r = new_ase_struct.positions
bondingPartners = j[i==k]
logging.info('bondingPartners {}'.format(bondingPartners))
partnerStr=''
for p in bondingPartners:
if partnerStr == '':
partnerStr = originalAtoms[p].name
else:
partnerStr += ', ' + originalAtoms[p].name
logging.info('Atom {} already has bonding partners {}'.format(originalAtoms[k].name,partnerStr))
dr = (r[j[i==k]] - r[k]).mean(axis=0)
# my understanding: dr is vector
# from atom k's position towards the geometrical center of mass
# it forms with its defined neighbours
# r0 is a vector offset into the opposit direction:
dr = dr/np.linalg.norm(dr) #normalized vector in direction dr
#calculate an orthogonal vector 'dr_ortho' on dr
#and push the H atoms in dr+dr_ortho and dr-dr_ortho
#if one has to add more than two H atoms introduce dr_ortho_2 = dr x dr_ortho
dr_ortho = np.cross(dr,np.array([1,0,0]))
if np.linalg.norm(dr_ortho) < 0.1 : #if dr and (1,0,0) have almost the same direction
dr_ortho = np.cross(dr,np.array([0,1,0]))
# (1-2*h) = {1,-1} for h={0,1}
h_pos_vec = (dr+(1-2*h)*dr_ortho)/np.linalg.norm(dr+(1-2*h)*dr_ortho)
r0 = r[k] - bond_length*h_pos_vec
new_ase_struct += ase.Atom('H', r0) # add atom in ase structure
n_atoms = len(new_ase_struct)
#introduce a corrector step for a added atom which is too close to others
#do as many corrector stepps until all atoms are more than 1\AA appart
c_step = 0
while True:
nl = NeighborList(cutoffs=[.5]*len(new_ase_struct),
skin=0.09, self_interaction=False,
bothways=True)
nl.update(new_ase_struct)
indices, offsets = nl.get_neighbors(-1)
indices = np.delete(indices, np.where(indices==k))
if len(indices) == 0:
break
elif c_step > 15:
logging.info('programm needs more than 15 corrector steps for H atom {} at atom {}'.format(n_atoms, k))
sys.exit(15)
break
logging.info('too close atoms {}'.format(indices))
c_step += 1
logging.info('correcter step {} for H {} at atom {}'.format(c_step, n_atoms-1, k))
# if indices not empty -> the atom(-1)=r_H is to close together
# with atom a_close=indices[0], it is a H-atom belonging to atom 'k'=r_k .
#correctorstep: corr_step = (r_H-a_close)/|(r_H-a_close)|
#corrected_pos: corr_pos = ((r_H-r_k) + corr_step)/|((r_H-r_k) + corr_step)|
#new H position: new_r_H = r_k + corr_pos
r_H, r_k, a_close = np.take(new_ase_struct.get_positions(),[-1,k,indices[0]], axis=0)
#print('r_H, r_k, a_close', r_H, r_k, a_close)
corr_step = (r_H-a_close)/np.linalg.norm((r_H-a_close)) #maybe introduce here a skaling Faktor s=0.3 or somthing like that to make tiny corrections and don't overshoot.
corr_pos = ((r_H-r_k) + corr_step)/np.linalg.norm((r_H-r_k) + corr_step)
new_r_H = r_k + bond_length * corr_pos
#correct the H position to new_r_H in new_ase_struct
trans = np.zeros([n_atoms,3])
trans[-1] = new_r_H - r_H
new_ase_struct.translate(trans)
#view(new_ase_struct)
#sys.exit()
i = np.append(i,k) # manually update numbered neighbour lists
j = np.append(j,len(new_ase_struct)-1)
# update pmd topology
bondingPartner = originalAtoms[k] # here we need the original numbering,
# as ParmEd alters indices when adding atoms to the structure
nameH = '{}{}'.format(h+1,bondingPartner.name) # atom needs a unique name
logging.info('Adding H-atom {} at position [ {}, {}, {} ]'.format(nameH,r0[0], r0[1], r0[2]))
new_H = pmd.Atom(name=nameH, type='H', atomic_number=1)
new_H.xx = r0[0] # ParmEd documentation not very helpful, did not find any more compact assignment
new_H.xy = r0[1]
new_H.xz = r0[2]
# do not understand ParmEd that well, apparently we need the Bond object in order to update topology
new_Bond = pmd.Bond(bondingPartner,new_H)
new_H.bond_to(bondingPartner) # not sure, whether this is necessary
new_pmd_top.bonds.append(new_Bond)
new_pmd_top.add_atom_to_residue(new_H,bondingPartner.residue)
originalAtoms.append(new_H) # add atom to the bottom of "index-stiff" list
names.append(nameH) # append name of H-atom
residues.append(bondingPartner.residue.name) # H is in same residue as bonding partner
return new_ase_struct, new_pmd_top, names, residues
Functions
def insertHbyList(ase_struct, pmd_top, implicitHbondingPartners, bond_length=1.0, debug=False)
-
Source code
def insertHbyList(ase_struct,pmd_top,implicitHbondingPartners,bond_length=1.0,debug=False): # make copies of passed structures as not to alter originals: new_pmd_top = pmd_top.copy(pmd.Structure) new_ase_struct = ase_struct.copy() # names stores the String IDs of all atoms as to facilitate later ASE ID -> Atom name mapping names = [ a.name for a in pmd_top.atoms ] residues = [ a.residue.name for a in pmd_top.atoms ] # make copied atoms accessible by unchangable indices (standard list) originalAtoms = [a for a in new_pmd_top.atoms] implicitHbondingPartnersIdxHnoTuples = [(a.idx,implicitHbondingPartners[k]) for a in pmd_top.atoms for k in implicitHbondingPartners.keys() if a.name == k] implicitHbondingPartnersIdxHnoDict = dict(implicitHbondingPartnersIdxHnoTuples) # build numbered neighbour list "manually" #i: list of atoms e.g. [0,0,0,1,1,2,2,...] i = np.array([b.atom1.idx for b in new_pmd_top.bonds]) #j: list of bonding partners corresponding to i [1,2,3,...] ==> 0 has bondpartners 1,2 and 3; etc. j = np.array([b.atom2.idx for b in new_pmd_top.bonds]) r = new_ase_struct.positions for k,Hno in implicitHbondingPartnersIdxHnoDict.items(): # for all atoms to append hydrogen to logging.info('Adding {} H-atoms to {} (#{})...'.format(Hno,originalAtoms[k].name,k)) for h in range(0,Hno): r = new_ase_struct.positions bondingPartners = j[i==k] logging.info('bondingPartners {}'.format(bondingPartners)) partnerStr='' for p in bondingPartners: if partnerStr == '': partnerStr = originalAtoms[p].name else: partnerStr += ', ' + originalAtoms[p].name logging.info('Atom {} already has bonding partners {}'.format(originalAtoms[k].name,partnerStr)) dr = (r[j[i==k]] - r[k]).mean(axis=0) # my understanding: dr is vector # from atom k's position towards the geometrical center of mass # it forms with its defined neighbours # r0 is a vector offset into the opposit direction: dr = dr/np.linalg.norm(dr) #normalized vector in direction dr #calculate an orthogonal vector 'dr_ortho' on dr #and push the H atoms in dr+dr_ortho and dr-dr_ortho #if one has to add more than two H atoms introduce dr_ortho_2 = dr x dr_ortho dr_ortho = np.cross(dr,np.array([1,0,0])) if np.linalg.norm(dr_ortho) < 0.1 : #if dr and (1,0,0) have almost the same direction dr_ortho = np.cross(dr,np.array([0,1,0])) # (1-2*h) = {1,-1} for h={0,1} h_pos_vec = (dr+(1-2*h)*dr_ortho)/np.linalg.norm(dr+(1-2*h)*dr_ortho) r0 = r[k] - bond_length*h_pos_vec new_ase_struct += ase.Atom('H', r0) # add atom in ase structure n_atoms = len(new_ase_struct) #introduce a corrector step for a added atom which is too close to others #do as many corrector stepps until all atoms are more than 1\AA appart c_step = 0 while True: nl = NeighborList(cutoffs=[.5]*len(new_ase_struct), skin=0.09, self_interaction=False, bothways=True) nl.update(new_ase_struct) indices, offsets = nl.get_neighbors(-1) indices = np.delete(indices, np.where(indices==k)) if len(indices) == 0: break elif c_step > 15: logging.info('programm needs more than 15 corrector steps for H atom {} at atom {}'.format(n_atoms, k)) sys.exit(15) break logging.info('too close atoms {}'.format(indices)) c_step += 1 logging.info('correcter step {} for H {} at atom {}'.format(c_step, n_atoms-1, k)) # if indices not empty -> the atom(-1)=r_H is to close together # with atom a_close=indices[0], it is a H-atom belonging to atom 'k'=r_k . #correctorstep: corr_step = (r_H-a_close)/|(r_H-a_close)| #corrected_pos: corr_pos = ((r_H-r_k) + corr_step)/|((r_H-r_k) + corr_step)| #new H position: new_r_H = r_k + corr_pos r_H, r_k, a_close = np.take(new_ase_struct.get_positions(),[-1,k,indices[0]], axis=0) #print('r_H, r_k, a_close', r_H, r_k, a_close) corr_step = (r_H-a_close)/np.linalg.norm((r_H-a_close)) #maybe introduce here a skaling Faktor s=0.3 or somthing like that to make tiny corrections and don't overshoot. corr_pos = ((r_H-r_k) + corr_step)/np.linalg.norm((r_H-r_k) + corr_step) new_r_H = r_k + bond_length * corr_pos #correct the H position to new_r_H in new_ase_struct trans = np.zeros([n_atoms,3]) trans[-1] = new_r_H - r_H new_ase_struct.translate(trans) #view(new_ase_struct) #sys.exit() i = np.append(i,k) # manually update numbered neighbour lists j = np.append(j,len(new_ase_struct)-1) # update pmd topology bondingPartner = originalAtoms[k] # here we need the original numbering, # as ParmEd alters indices when adding atoms to the structure nameH = '{}{}'.format(h+1,bondingPartner.name) # atom needs a unique name logging.info('Adding H-atom {} at position [ {}, {}, {} ]'.format(nameH,r0[0], r0[1], r0[2])) new_H = pmd.Atom(name=nameH, type='H', atomic_number=1) new_H.xx = r0[0] # ParmEd documentation not very helpful, did not find any more compact assignment new_H.xy = r0[1] new_H.xz = r0[2] # do not understand ParmEd that well, apparently we need the Bond object in order to update topology new_Bond = pmd.Bond(bondingPartner,new_H) new_H.bond_to(bondingPartner) # not sure, whether this is necessary new_pmd_top.bonds.append(new_Bond) new_pmd_top.add_atom_to_residue(new_H,bondingPartner.residue) originalAtoms.append(new_H) # add atom to the bottom of "index-stiff" list names.append(nameH) # append name of H-atom residues.append(bondingPartner.residue.name) # H is in same residue as bonding partner return new_ase_struct, new_pmd_top, names, residues