smamp.convert_UA_to_AA module
- Change structure with implicit Hydrogen to one with explicitely defined H-atoms.
 - Copyright 2019 Simulation Lab
 - University of Freiburg
 Author:JohannesHoermannjohannes.hoermann@imtek.uni-freiburg.deModified:LukasElfleinelfleinl@cs.uni-freiburg.de
Source code
""" Change structure with implicit Hydrogen to one with explicitely defined H-atoms.
Copyright 2019 Simulation Lab
University of Freiburg
Author: Johannes Hoermann <johannes.hoermann@imtek.uni-freiburg.de>
Modified: Lukas Elflein <elfleinl@cs.uni-freiburg.de>
"""
import os
import ase.io
import sys
import warnings
import numpy as np
import parmed as pmd
from ase.data import atomic_numbers
from ase.neighborlist import NeighborList
from matscipy.neighbours import neighbour_list
from parmed import gromacs
def insertHbyList(ase_struct, pmd_top, implicitHbondingPartners, bond_length=1.0, logfile=None):
    """Inserts explicit hydrogen atoms into ASE structure.""" 
    # make copies of passed structures as not to alter originals:
    new_pmd_top = pmd_top.copy(pmd.Structure)
    new_ase_struct = ase_struct.copy()
    # make copied atoms accessible by unchangable indices (standard list)
    originalAtoms = [a for a in new_pmd_top.atoms]
    implicitHbondingPartnersIdxHnoTuples = [(a.idx,implicitHbondingPartners[k])
                                            for a in pmd_top.atoms
                                            for k in implicitHbondingPartners.keys()
                                            if a.name == k]
    implicitHbondingPartnersIdxHnoDict = dict(implicitHbondingPartnersIdxHnoTuples)
    # build numbered neighbour list "manually"
    #i: list of atoms e.g. [0,0,0,1,1,2,2,...]
    i = np.array([b.atom1.idx for b in new_pmd_top.bonds])
    #j: list of bonding partners corresponding to i [1,2,3,...] ==> 0 has bondpartners 1,2 and 3; etc.
    j = np.array([b.atom2.idx for b in new_pmd_top.bonds])
    r = new_ase_struct.positions
    for k,Hno in implicitHbondingPartnersIdxHnoDict.items(): # for all atoms to append hydrogen to
        logfile.write('Adding {} H-atoms to {} (#{})...'.format(Hno,originalAtoms[k].name,k))
        for h in range(0,Hno):
            r = new_ase_struct.positions
            bondingPartners = j[i==k]
            logfile.write('bondingPartners'.join(str(bondingPartners)))
            partnerStr=''
            for p in bondingPartners:
                if partnerStr == '':
                    partnerStr = originalAtoms[p].name
                else:
                    partnerStr += ', ' + originalAtoms[p].name
            logfile.write('Atom {} already has bonding partners {}'.format(originalAtoms[k].name,partnerStr))
            dr = (r[j[i==k]] - r[k]).mean(axis=0)
            # my understanding: dr is vector
            # from atom k's position towards the geometrical center of mass
            # it forms with its defined neighbours
            # r0 is a vector offset into the opposit direction:
            dr = dr/np.linalg.norm(dr) #normalized vector in direction dr
            #calculate an orthogonal vector 'dr_ortho' on dr
            #and push the H atoms in dr+dr_ortho and dr-dr_ortho
            dr_ortho = np.cross(dr,np.array([1,0,0]))
            if np.linalg.norm(dr_ortho) < 0.1 :   #if dr and (1,0,0) have almost the same direction
                dr_ortho = np.cross(dr,np.array([0,1,0]))
            # (1-2*h) = {1,-1} for h={0,1}
            h_pos_vec = (dr+(1-2*h)*dr_ortho)/np.linalg.norm(dr+(1-2*h)*dr_ortho)
            r0 = r[k] - bond_length*h_pos_vec
            new_ase_struct += ase.Atom('H', r0) # add atom in ase structure
            n_atoms = len(new_ase_struct)
            #introduce a corrector step for a added atom which is too close to others
            #do as many corrector stepps until all atoms are more than 1\AA appart
            c_step = 0
            while True:
                nl = NeighborList(cutoffs=[.5]*len(new_ase_struct),
                                  skin=0.09, self_interaction=False,
                                  bothways=True)
                nl.update(new_ase_struct)
                indices, offsets = nl.get_neighbors(-1)
                indices = np.delete(indices, np.where(indices==k))
                if len(indices) == 0:
                    break
                elif c_step > 15:
                    logfile.write('programm needs more than 15 corrector steps for',
                          ' H atom {} at atom {}'.format(n_atoms, k))
                    sys.exit(15)
                    break
                logfile.write('too close atoms'.join(str(indices)))
                c_step += 1
                logfile.write('correcter step {} for H {} at atom {}'.format(c_step, n_atoms-1, k))
                r_H, r_k, a_close = np.take(new_ase_struct.get_positions(),[-1,k,indices[0]], axis=0)
                corr_step = (r_H-a_close)/np.linalg.norm((r_H-a_close)) 
                corr_pos = ((r_H-r_k) + corr_step)/np.linalg.norm((r_H-r_k) + corr_step)
                new_r_H = r_k + bond_length * corr_pos
                #correct the H position to new_r_H in new_ase_struct
                trans = np.zeros([n_atoms,3])
                trans[-1] = new_r_H - r_H
                new_ase_struct.translate(trans)
            i = np.append(i,k) # manually update numbered neighbour lists
            j = np.append(j,len(new_ase_struct)-1)
            # update pmd topology
            bondingPartner = originalAtoms[k] # here we need the original numbering,
            # as ParmEd alters indices when adding atoms to the structure
            nameH = '{}{}'.format(h+1,bondingPartner.name) # atom needs a unique name
            logfile.write('Adding H-atom {} at position [ {}, {}, {} ]'.format(nameH,r0[0], r0[1], r0[2]))
            new_H = pmd.Atom(name=nameH, type='H', atomic_number=1)
            new_H.xx = r0[0] 
            new_H.xy = r0[1]
            new_H.xz = r0[2]
            # Apparently we need the Bond object in order to update topology
            new_Bond = pmd.Bond(bondingPartner,new_H)
            new_H.bond_to(bondingPartner) # not sure, whether this is necessary
            new_pmd_top.bonds.append(new_Bond)
            new_pmd_top.add_atom_to_residue(new_H,bondingPartner.residue)
            originalAtoms.append(new_H) # add atom to the bottom of "index-stiff" list
    return new_ase_struct, new_pmd_top
def read_input_files(input_dir='../0_initial_structure'):
    """Search for and read input files (with implicit H-atoms)."""
    ase_struct, pmd_top = None, None
    files = os.listdir(input_dir)
    for filename in files:
        if ('snapshot' in filename) and ('.pdb' in filename):
            ase_struct = ase.io.read(os.path.join(input_dir, filename))
            pmd_struct = pmd.load_file(os.path.join(input_dir, filename))
        elif '.top' in filename:
            pmd_top = gromacs.GromacsTopologyFile(os.path.join(input_dir, filename), 
                                                      parametrize=False)
    # Make sure we actually found everything we need
    if ase_struct is None:
        raise RuntimeError('structure file (.pdb) not found in {}'.format(input_dir))
    if pmd_top is None:
        raise RuntimeError('topology file (.top) not found in {}'.format(input_dir))
    return ase_struct, pmd_struct, pmd_top
def main():
    """Execute everything."""
    # Read the united-atoms files extracted from the MD-simulation trajectory
    # throws some warnings on angle types, does not matter for bonding info
    with warnings.catch_warnings():
         warnings.simplefilter('ignore')
         ase_struct, pmd_struct, pmd_top = read_input_files()
    # define a dictionary, marking how many H atoms are missing at which bonding partner explicitly
    implicitHbondingPartners={'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2}
    
    pmd_top.strip(':SOL,CL') # strip water and electrolyte from system
    pmd_top.box = pmd_struct.box # Needed because .prmtop contains box info
    pmd_top.positions = pmd_struct.positions
    # Insert the explicit hydrogens
    print('Inserting explicit hydrogens, please wait ...')
    with open('insert_H.log', 'w') as logfile:
       new_ase_struct, new_pmd_top = insertHbyList(ase_struct,pmd_top,implicitHbondingPartners,1.0, logfile)
    # Write output
    new_ase_struct.write('ase_pdbH.pdb')
    new_ase_struct.write('ase_pdbH.traj')
    # Write other output
    new_pmd_top.write_pdb('pmd_pdbH.pdb')
    test_pmd = pmd.load_file('pmd_pdbH.pdb')
    # some topology format, un functionality similar to GROMACS' .top, but readable by VMD
    # new_pmd_top.write_psf('pmd_pdbH.psf')
    print('Done.')
if __name__ == '__main__':
    main()
Functions
def insertHbyList(ase_struct, pmd_top, implicitHbondingPartners, bond_length=1.0, logfile=None)- 
Inserts explicit hydrogen atoms into ASE structure.
Source code
def insertHbyList(ase_struct, pmd_top, implicitHbondingPartners, bond_length=1.0, logfile=None): """Inserts explicit hydrogen atoms into ASE structure.""" # make copies of passed structures as not to alter originals: new_pmd_top = pmd_top.copy(pmd.Structure) new_ase_struct = ase_struct.copy() # make copied atoms accessible by unchangable indices (standard list) originalAtoms = [a for a in new_pmd_top.atoms] implicitHbondingPartnersIdxHnoTuples = [(a.idx,implicitHbondingPartners[k]) for a in pmd_top.atoms for k in implicitHbondingPartners.keys() if a.name == k] implicitHbondingPartnersIdxHnoDict = dict(implicitHbondingPartnersIdxHnoTuples) # build numbered neighbour list "manually" #i: list of atoms e.g. [0,0,0,1,1,2,2,...] i = np.array([b.atom1.idx for b in new_pmd_top.bonds]) #j: list of bonding partners corresponding to i [1,2,3,...] ==> 0 has bondpartners 1,2 and 3; etc. j = np.array([b.atom2.idx for b in new_pmd_top.bonds]) r = new_ase_struct.positions for k,Hno in implicitHbondingPartnersIdxHnoDict.items(): # for all atoms to append hydrogen to logfile.write('Adding {} H-atoms to {} (#{})...'.format(Hno,originalAtoms[k].name,k)) for h in range(0,Hno): r = new_ase_struct.positions bondingPartners = j[i==k] logfile.write('bondingPartners'.join(str(bondingPartners))) partnerStr='' for p in bondingPartners: if partnerStr == '': partnerStr = originalAtoms[p].name else: partnerStr += ', ' + originalAtoms[p].name logfile.write('Atom {} already has bonding partners {}'.format(originalAtoms[k].name,partnerStr)) dr = (r[j[i==k]] - r[k]).mean(axis=0) # my understanding: dr is vector # from atom k's position towards the geometrical center of mass # it forms with its defined neighbours # r0 is a vector offset into the opposit direction: dr = dr/np.linalg.norm(dr) #normalized vector in direction dr #calculate an orthogonal vector 'dr_ortho' on dr #and push the H atoms in dr+dr_ortho and dr-dr_ortho dr_ortho = np.cross(dr,np.array([1,0,0])) if np.linalg.norm(dr_ortho) < 0.1 : #if dr and (1,0,0) have almost the same direction dr_ortho = np.cross(dr,np.array([0,1,0])) # (1-2*h) = {1,-1} for h={0,1} h_pos_vec = (dr+(1-2*h)*dr_ortho)/np.linalg.norm(dr+(1-2*h)*dr_ortho) r0 = r[k] - bond_length*h_pos_vec new_ase_struct += ase.Atom('H', r0) # add atom in ase structure n_atoms = len(new_ase_struct) #introduce a corrector step for a added atom which is too close to others #do as many corrector stepps until all atoms are more than 1\AA appart c_step = 0 while True: nl = NeighborList(cutoffs=[.5]*len(new_ase_struct), skin=0.09, self_interaction=False, bothways=True) nl.update(new_ase_struct) indices, offsets = nl.get_neighbors(-1) indices = np.delete(indices, np.where(indices==k)) if len(indices) == 0: break elif c_step > 15: logfile.write('programm needs more than 15 corrector steps for', ' H atom {} at atom {}'.format(n_atoms, k)) sys.exit(15) break logfile.write('too close atoms'.join(str(indices))) c_step += 1 logfile.write('correcter step {} for H {} at atom {}'.format(c_step, n_atoms-1, k)) r_H, r_k, a_close = np.take(new_ase_struct.get_positions(),[-1,k,indices[0]], axis=0) corr_step = (r_H-a_close)/np.linalg.norm((r_H-a_close)) corr_pos = ((r_H-r_k) + corr_step)/np.linalg.norm((r_H-r_k) + corr_step) new_r_H = r_k + bond_length * corr_pos #correct the H position to new_r_H in new_ase_struct trans = np.zeros([n_atoms,3]) trans[-1] = new_r_H - r_H new_ase_struct.translate(trans) i = np.append(i,k) # manually update numbered neighbour lists j = np.append(j,len(new_ase_struct)-1) # update pmd topology bondingPartner = originalAtoms[k] # here we need the original numbering, # as ParmEd alters indices when adding atoms to the structure nameH = '{}{}'.format(h+1,bondingPartner.name) # atom needs a unique name logfile.write('Adding H-atom {} at position [ {}, {}, {} ]'.format(nameH,r0[0], r0[1], r0[2])) new_H = pmd.Atom(name=nameH, type='H', atomic_number=1) new_H.xx = r0[0] new_H.xy = r0[1] new_H.xz = r0[2] # Apparently we need the Bond object in order to update topology new_Bond = pmd.Bond(bondingPartner,new_H) new_H.bond_to(bondingPartner) # not sure, whether this is necessary new_pmd_top.bonds.append(new_Bond) new_pmd_top.add_atom_to_residue(new_H,bondingPartner.residue) originalAtoms.append(new_H) # add atom to the bottom of "index-stiff" list return new_ase_struct, new_pmd_top def main()- 
Execute everything.
Source code
def main(): """Execute everything.""" # Read the united-atoms files extracted from the MD-simulation trajectory # throws some warnings on angle types, does not matter for bonding info with warnings.catch_warnings(): warnings.simplefilter('ignore') ase_struct, pmd_struct, pmd_top = read_input_files() # define a dictionary, marking how many H atoms are missing at which bonding partner explicitly implicitHbondingPartners={'CD4':1,'CD3':1,'CA2':2,'CA3':2,'CB2':2,'CB3':2} pmd_top.strip(':SOL,CL') # strip water and electrolyte from system pmd_top.box = pmd_struct.box # Needed because .prmtop contains box info pmd_top.positions = pmd_struct.positions # Insert the explicit hydrogens print('Inserting explicit hydrogens, please wait ...') with open('insert_H.log', 'w') as logfile: new_ase_struct, new_pmd_top = insertHbyList(ase_struct,pmd_top,implicitHbondingPartners,1.0, logfile) # Write output new_ase_struct.write('ase_pdbH.pdb') new_ase_struct.write('ase_pdbH.traj') # Write other output new_pmd_top.write_pdb('pmd_pdbH.pdb') test_pmd = pmd.load_file('pmd_pdbH.pdb') # some topology format, un functionality similar to GROMACS' .top, but readable by VMD # new_pmd_top.write_psf('pmd_pdbH.psf') print('Done.') def read_input_files(input_dir='../0_initial_structure')- 
Search for and read input files (with implicit H-atoms).
Source code
def read_input_files(input_dir='../0_initial_structure'): """Search for and read input files (with implicit H-atoms).""" ase_struct, pmd_top = None, None files = os.listdir(input_dir) for filename in files: if ('snapshot' in filename) and ('.pdb' in filename): ase_struct = ase.io.read(os.path.join(input_dir, filename)) pmd_struct = pmd.load_file(os.path.join(input_dir, filename)) elif '.top' in filename: pmd_top = gromacs.GromacsTopologyFile(os.path.join(input_dir, filename), parametrize=False) # Make sure we actually found everything we need if ase_struct is None: raise RuntimeError('structure file (.pdb) not found in {}'.format(input_dir)) if pmd_top is None: raise RuntimeError('topology file (.top) not found in {}'.format(input_dir)) return ase_struct, pmd_struct, pmd_top