Source code for af2rave.simulation.utils

import openmm.app as app
import openmm.unit as unit
import mdtraj as md
import numpy as np
from openmm.unit import angstrom, molar

import pdbfixer
from numbers import Integral

from . import Charmm36mFF

AtomIndexLike = int | set[int] | tuple[int] | list[int] | list[set[int]] | list[tuple[int]]
TopologyLike = app.Topology | md.Topology | str


[docs] class TopologyMap: ''' Create a mapping between the old and new topology. :param old_top: old topology, either a path or an OpenMM or MDTraj topology object :type old_top: openmm.app.Topology or mdtraj.Topology or str :param new_top: new topology, either a path or an OpenMM or MDTraj topology object :type new_top: openmm.app.Topology or mdtraj.Topology or str :param resid_offset: Offset to be added to the residue id to align two topologies. Default is 0. For example, if a residue in the old topology has resid 1 and in the new topology has resid 101, then resid_offset = 100. ''' def __init__(self, old_top: TopologyLike, new_top: TopologyLike, resid_offset = 0): self._old_top = self._load_topology(old_top) self._new_top = self._load_topology(new_top) self._atom_index_map = self._generate_mapping_table(resid_offset) def _load_topology(self, top: TopologyLike) -> app.Topology: ''' Load the topology from a file or a topology object. :param top: topology, either a path or an OpenMM or MDTraj topology object :type top: openmm.app.Topology or mdtraj.Topology or str ''' if isinstance(top, str): return app.PDBFile(top).topology elif isinstance(top, md.Topology): return top.to_openmm() elif isinstance(top, app.Topology): return top else: raise ValueError("top must be a path to a PDB file or an OpenMM or MDTraj topology object.") def _generate_mapping_table(self, resid_offset: int) -> dict[int, int]: index_lookup = {} forward_map = {} # Note that xxx.index counts from 0 and xxx.id counts from 1 for a in self._old_top.atoms(): # retrieve information for the old map index = a.index name = a.name resid = int(a.residue.id) chain = a.residue.chain.id index_lookup[(chain, resid, name)] = index # now looks up this information in the new topology for a in self._new_top.atoms(): index = a.index name = a.name resid = int(a.residue.id) chain = a.residue.chain.id try: old_index = index_lookup[(chain, resid-resid_offset, name)] forward_map[old_index] = index except KeyError: pass return forward_map
[docs] def map_atom_index(self, index: AtomIndexLike) -> AtomIndexLike: ''' Map atom index (0-based) from input PDB to the output PDB file. After adding hydrogen, the atom index will be changed. This function translates the old atom index to the new atom index. :param index: atom index or a list of atom indices :type index: AtomIndexLike: int or set[int] or list[int] or list[set[int]] ''' try: if isinstance(index, np.ndarray): return np.vectorize(self.map_atom_index)(index) elif isinstance(index, (set, tuple, list)): return type(index)(map(self.map_atom_index, index)) elif isinstance(index, Integral): return self._atom_index_map[int(index)] else: raise TypeError("Unrecognized type for index.") except KeyError as e: atom_missing = list(self._old_top.atoms())[int(e.args[0])] raise ValueError(f"{atom_missing} in the new topology does " "not exist in the old topology.")
[docs] class SimulationBox: ''' Create a simulation box from a protein PDB box. :param filename: path to the pdb file :type filename: str :param forcefield: forcefield to be used for adding hydrogens :type forcefield: OpenMM.app.ForceField ''' def __init__(self, filename: str, forcefield: app.ForceField = Charmm36mFF): self._filename = filename self._forcefield = forcefield # fixer instance with open(self._filename, 'r') as ifs: fixer = pdbfixer.PDBFixer(pdbfile=ifs) # finding and adding missing residues including terminals fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms(seed=0) ''' PDB fixer automatically add disulfide bonds when two cys-S are close. First, this may not always be the case. Second, this behavior is not verbose in AMBER ffs as they actually have the CYM residue for disulfide- bonded CYS. Meanwhile, CHARMM ffs do not have CYM and modeller will complain. We will remove the disulfide bond here. ''' modeller = app.Modeller(fixer.topology, fixer.positions) ds_bonds = [] for bond in modeller.topology.bonds(): if bond.atom1.name == 'SG' and bond.atom2.name == 'SG': ds_bonds.append(bond) modeller.delete(ds_bonds) self.top = modeller.topology self.pos = modeller.positions
[docs] def add_disulfide_bond(self, resid: list[tuple[int, int]]): ''' By default the simulation box after create_box() does not have any disulfide bonds. This method will add back the bonds when necessary. This method currently only works with AMBER forcefields. :param resid: list of tuples of CYS residues that are bonded :type resid: list[tuple[int, int]] ''' # create modeller instance modeller = app.Modeller(self.top, self.pos) # add disulfide bonds for i, j in resid: residues = {int(r.index): r for r in modeller.topology.residues()} residue_i, residue_j = residues[i], residues[j] if residue_i.name != "CYS" or residue_j.name != "CYS": raise ValueError(f"Residues {i}({residue_i.name}) {j}({residue_j.name}) both need to be cysteines.") atoms_i = {atom.name: atom for atom in residue_i.atoms()} atoms_j = {atom.name: atom for atom in residue_j.atoms()} if "SG" in atoms_i and "SG" in atoms_j: modeller.topology.addBond(atoms_i["SG"], atoms_j["SG"]) # residue_i.name, residue_j.name = 'CYX', 'CYX' else: if "SG" not in atoms_i: raise ValueError(f"Residues {i} do not have an SG atom.") else: raise ValueError(f"Residues {j} do not have an SG atom.") to_delete = [] if "HG" in atoms_i: to_delete.append(atoms_i["HG"]) if "HG" in atoms_j: to_delete.append(atoms_j["HG"]) modeller.delete(to_delete) self.top = modeller.topology self.pos = modeller.positions
[docs] def create_box(self, **kwargs) -> tuple[list, app.Topology]: """ Generate the simulation box from a raw pdb file. Currently only soluble proteins are supported as we can only add water. This function performs the following tasks: 1. use pdbfixer to add missing atoms, residues, and terminals 2. add hydrogen, at the given pH 3. solvate the system with water, neutralize the system by adding ions :param float pH: float: pH of the system. Default is 7.0 :param float padding: padding around the protein. Default is 10. Unit: Angstrom. :param str water_model: water model to be used. Default is 'tip3p' :param str positiveIon: positive ion used to neutralize the system. Default is 'Na+'. :param str negativeIon: negative ion used to neutralize the system. Default is 'Cl-' :param float ionicStrength: ionic strength of the system. Default is 0.0. Unit: molar """ # create modeller instance modeller = app.Modeller(self.top, self.pos) # add hydrogens self.pH = kwargs.get('pH', 7.0) modeller.addHydrogens(self._forcefield, pH=self.pH) # add solvent self.padding = kwargs.get('padding', 10 * angstrom) self.water_model = kwargs.get('water_model', 'tip3p') self.positive_ion = kwargs.get('positiveIon', 'Na+') self.negative_ion = kwargs.get('negativeIon', 'Cl-') self.ionic_strength = kwargs.get('ionicStrength', 0.0 * molar) if not unit.is_quantity(self.padding): self.padding *= angstrom if not unit.is_quantity(self.ionic_strength): self.ionic_strength *= molar modeller.addSolvent(self._forcefield, padding=self.padding, model=self.water_model, neutralize=True, positiveIon=self.positive_ion, negativeIon=self.negative_ion, ionicStrength=self.ionic_strength ) self.top = modeller.topology self.pos = modeller.positions # initialize a mapping object self.top_map = TopologyMap(app.PDBFile(self._filename).topology, self.top) self._map_atom_index = self.top_map.map_atom_index return modeller.positions, modeller.topology
@property def map_atom_index(self): ''' Map atom index from input PDB to the output PDB file. This is a callable property. After adding hydrogen, the atom index will be changed. This function translates the old atom index to the new atom index. Example: .. code-block:: python box = SimulationBox(filename) box.create_box() box.map_atom_index(1) :param index: atom index or a list of atom indices :type index: AtomIndexLike: int or set[int] or list[int] or list[set[int]] ''' return self._map_atom_index
[docs] def save_pdb(self, filename: str): """ Write the simulation box to a pdb file. :param str filename: path to the output pdb file """ with open(filename, 'w') as f: app.PDBFile.writeFile(self.top, self.pos, f, keepIds=True)
def __str__(self): return (f"SimulationBox('{self._filename}') at pH {self.pH}. " f"Solvated: {self.padding} {self.water_model} water " f"with {self.ionic_strength} {self.positive_ion}{self.negative_ion} ions.")