Skip to content

Commit

Permalink
Minor patch
Browse files Browse the repository at this point in the history
  • Loading branch information
JLittlef committed Mar 7, 2024
1 parent 56324fa commit bc9db61
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 82 deletions.
74 changes: 64 additions & 10 deletions bin/mutant_maker.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,70 @@
#!/usr/bin/env python3
"""mutant_maker
Fix PDB files, adding missing residues, atoms and hydrogens, then create mutations in desired locations
Usage:
mutant_maker.py [--incsv=<incsv>] [--from-col=<col>] [--in-pdb=<pdb>] [--chain=<chain>] [--pH=<pH>]
Options:
--incsv=<incsv> Read data from csv file
--from-col=<col> Select column title containing mutations
--in-pdb=<pdb> PDB file of protein wildtype
--chain=<chain> Select chain upon which to perform mutation
--pH=<ph> Set pH of the protein
"""

from docopt import docopt
import pandas as pd
import re
from openmm.app import PDBFile
import sys
from sys import stdout
from pdbfixer import PDBFixer

pdb = PDBFixer(sys.argv[1])
pdb.addMutations(mutationChain='A', mutationResidues='19', newResidues='ALA')
pdb.findMissingResidues()
pdb.findNonstandardResidues()
pdb.replaceNonstandardResidues()
pdb.removeHeterogens(True)
pdb.findMissingAtoms()
pdb.addMissingAtoms()
pdb.addMissingHydrogens(7.0)
PDBFile.writeFile(pdb.topology, pdb.positions, open('modified_protein.pdb', 'w'))
def get_data(csvname: str, col: str):
data = pd.read_csv(csvname)
clipped_data=pd.DataFrame(data=data, columns=[col])
divided_data = clipped_data.Mutation.str.extract(r'([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)')
divided_cleaned = divided_data.to_string(header=None, index=False)
amino_index = {'G': 'GLY' , 'L': 'LEU', 'R': 'ARG', 'A': 'ALA', 'Y': 'TYR', 'S': 'SER', 'M': 'MET', 'I': 'ILE', 'T': 'THR', 'C': 'CYS', 'V': 'VAL', 'P': 'PRO', 'F': 'PHE', 'W': 'TRP', 'H': 'HIS', 'K': 'LYS', 'D': 'ASP', 'E': 'GLU', 'N': 'ASN', 'Q': 'GLN', ' ': '-'}
new_rep_codes = re.sub(r"[GLRAYSMITCVPFWHKDENQ ]", lambda x: amino_index[x.group(0)], divided_cleaned)
new_rep_cleaned = re.sub("--","-", new_rep_codes)
return new_rep_cleaned


def clean_wildtype(pdbname: str, pH: str):
pH_fl = float(pH)
pdb = PDBFixer(pdbname)
pdb.findMissingResidues()
pdb.findNonstandardResidues()
pdb.replaceNonstandardResidues()
pdb.removeHeterogens(False)
pdb.findMissingAtoms()
pdb.addMissingAtoms()
pdb.addMissingHydrogens(pH_fl)
PDBFile.writeFile(pdb.topology, pdb.positions, open("wildtype_fixed.pdb", 'w'))


def create_mutants(pdbname: str, new_rep_cleaned, chain: str, pH: str):
pH_fl = float(pH)
for mutant in new_rep_cleaned.splitlines():
mutpdb = PDBFixer(pdbname)
mutpdb.applyMutations([mutant], chain)
mutpdb.findMissingResidues()
mutpdb.findNonstandardResidues()
mutpdb.replaceNonstandardResidues()
mutpdb.removeHeterogens(False)
mutpdb.findMissingAtoms()
mutpdb.addMissingAtoms()
mutpdb.addMissingHydrogens(pH_fl)
PDBFile.writeFile(pdb.topology, pdb.positions, open(mutant + "_fixed.pdb", 'w'))


def main():
arguments = docopt(__doc__, version='mutant_maker.py')
new_rep_cleaned = get_data(arguments['--incsv'], arguments['--from-col'])
clean_wildtype(arguments['--inpdb'], arguments['--pH'])
create_mutants(arguments['--inpdb'], new_rep_cleaned, arguments['--chain'], arguments['--pH'])

if __name__ == '__main__':
main()
75 changes: 75 additions & 0 deletions bin/openmm-minimise.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
#!/usr/bin/env python3
"""openmm-minimise
Minimise the potential energy of the wildtype protein structure and mutated variants, according to AMBER14 potentials
Usage:
openmm-minimise.py [--i=<pdb>] [--max-iter=<iter>] [--tol==<tol>]
Options:
--i=<pdb> Input the fixed PDB files from the Mutant Maker process
--max-iter=<iter> Maximum number of iterations to arrive at minimised energy
--tol=<tol> Tolerance level of energy change after which the protein's structure is considered minimised
"""
import logging
from docopt import docopt
from openmm.app import *
from openmm import *
from openmm.unit import *
import sys
from sys import stdout

def get_pdb(filename: str):
pdb = PDBFile(filename)
return pdb

def setup_forcefield():
forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
return forcefield

def setup_system(pdb, forcefield):
system = forcefield.createSystem(pdb.topology, nonbondedMethod=NoCutoff)
return system

def setup_simulation(pdb, system):
integrator = LangevinMiddleIntegrator(300*kelvin, 1/picosecond, 0.001*picoseconds)
simulation = Simulation(pdb.topology, system, integrator)
simulation.context.setPositions(pdb.positions)
return simulation

def main():
arguments = docopt(__doc__, version='openmm-minimise.py')
pdb = get_pdb(arguments['--i'])
forcefield = setup_forcefield()
system = setup_system(pdb, forcefield, arguments['--no-restraints'])
str_name = arguments['--i']
stem = str_name.replace("_fixed.pdb","")
#csvunfolded = str(stem + "_unfolded.csv")
pdbunfolded = str(stem + "_unfolded.pdb")
pdbfolded = str(stem + "_folded.pdb")
simulation = setup_simulation(pdb, system)
init_state = simulation.context.getState(getEnergy=True, getPositions=True)
logging.info("Starting potential energy = %.9f kcal/mol"
% init_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
simulation.minimizeEnergy(tolerance=0.01)
final_state = simulation.context.getState(getEnergy=True, getPositions=True)
logging.info("Minimised potential energy = %.9f kcal/mol"
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
PDBFile.writeFile(
simulation.topology, final_state.getPositions(), open(pdbunfolded, "w"))
pdbII = get_pdb(pdbunfolded)
forcefield = setup_forcefield()
systemII = setup_system(pdbII, forcefield)
simulationII = setup_simulation(pdbII, systemII)
init_state = simulationII.context.getState(getEnergy=True, getPositions=True)
logging.info("Starting potential energy = %.9f kcal/mol"
% init_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
simulationII.minimizeEnergy(tolerance=0.00000001)
final_state = simulationII.context.getState(getEnergy=True, getPositions=True)
logging.info("Minimised potential energy = %.9f kcal/mol"
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole))
PDBFile.writeFile(
simulationII.topology, final_state.getPositions(), open(pdbfolded, "w"))
if __name__ == '__main__':
arguments = docopt(__doc__, version='openmm-minimise.py')
logging.basicConfig(stream=stem + ".out", level=logging.INFO)
main()
109 changes: 70 additions & 39 deletions bin/openmm-runner.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,77 @@
#!/usr/bin/env python3
"""openmm-md
Minimise the potential energy of the wildtype protein structure and mutated variants, according to AMBER14 potentials
Usage:
openmm-md.py [--i=<pdb>] [--solv-mol=<mol>] [--temp==<temp>] [--pres=<pres>] [--nvt=<nvt>] [--npt=<npt] [--rep=<rep>]
Options:
--i=<pdb> Input the fixed PDB files from the Mutant Maker process
--solv-mol=<mol> Number of solvent molecules (default: solvation with H2O)
--temp=<temp> Temperature of MD thermostat
--pres=<pres> Pressure of MD barostat (NPT MD only)
--nvt=<nvt> Number of NVT trajectory steps to initiate MD (each step = 1fs)
--npt=<npt> Number of NPT steps in principal MD trajectory (each step = 1fs)
--rep=<rep> Reporting rate wherefrom PDB coordinates and therodynamic conditions are outputted to trajectory (PDB) and log (CSV) files respectively
"""
import logging
from docopt import docopt
from openmm.app import *
from openmm import *
from openmm.unit import *
import sys
from sys import stdout
from pdbfixer import PDBFixer

#pdb = PDBFile(sys.argv[1])
pdb = PDBFixer(sys.argv[1])
pdb.applyMutations(["SER-19-ALA"], "A")
pdb.findMissingResidues()
pdb.findNonstandardResidues()
pdb.replaceNonstandardResidues()
pdb.removeHeterogens(True)
pdb.findMissingAtoms()
pdb.addMissingAtoms()
pdb.addMissingHydrogens(7.0)
forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
modeller = Modeller(pdb.topology, pdb.positions)
#modeller.deleteWater()
#residues=modeller.addHydrogens(forcefield)
solvmol=int(sys.argv[4])
modeller.addSolvent(forcefield, numAdded=solvmol)
system = forcefield.createSystem(modeller.topology, nonbondedMethod=PME, nonbondedCutoff=1.0*nanometer, constraints=HBonds)
temp=int(sys.argv[2])
integrator = LangevinMiddleIntegrator(temp*kelvin, 1/picosecond, 0.001*picoseconds)
simulation = Simulation(modeller.topology, system, integrator)
simulation.context.setPositions(modeller.positions)
print("Minimizing energy")
simulation.minimizeEnergy()
print("Running NVT")
reprate=int(sys.argv[7])
simulation.reporters.append(PDBReporter(sys.argv[8], reprate))
simulation.reporters.append(StateDataReporter(stdout, reprate, step=True,
potentialEnergy=True, temperature=True, volume=True))
simulation.reporters.append(StateDataReporter(sys.argv[9], reprate, step=True,


def get_pdb(filename: str):
pdb = PDBFile(filename)
return pdb

def setup_forcefield():
forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml')
return forcefield

def setup_modeller(pdb):
modeller = Modeller(pdb.topology, pdb.positions)
return modeller

def setup_system(modeller, forcefield, solvmol: str):
mol=int(solvmol)
modeller.addSolvent(forcefield, numAdded=mol)
system = forcefield.createSystem(modeller.topology, nonbondedMethod=PME, nonbondedCutoff=1.0*nanometer, constraints=HBonds)
return system

def setup_simulation(modeller, system, temp: str):
temp_fl = int(temp)
integrator = LangevinMiddleIntegrator(temp_fl*kelvin, 1/picosecond, 0.001*picoseconds)
simulation = Simulation(modeller.topology, system, integrator)
simulation.context.setPositions(modeller.positions)
return simulation

def main():
arguments = docopt(__doc__, version='openmm-md.py')
pdb = get_pdb(arguments['--i'])
forcefield = setup_forcefield()
modeller = setup_modeller(pdb)
system = setup_system(modeller, forcefield, arguments['--solv-mol'])
simulation = setup_simulation(modeller, system, arguments['--temp'])
print("Minimizing energy")
simulation.minimizeEnergy()
print("Running NVT")
reprate = int(arguments['--rep'])
nvt = int(arguments['--nvt'])
npt = int(arguments['--npt'])
pres = int(arguments['--pres'])
temp = int(arguments['--temp'])
str_name = arguments['--i']
stem = str_name.replace("_opt.pdb", "")
simulation.reporters.append(PDBReporter(stem + "_MD.pdb", reprate))
simulation.reporters.append(StateDataReporter(stdout, reprate, step=True, potentialEnergy=True, temperature=True, volume=True))
simulation.reporters.append(StateDataReporter(stem + "_log.csv", reprate, step=True,
potentialEnergy=True, temperature=True, volume=True))
nvtsteps=int(sys.argv[5])
pres=int(sys.argv[3])
simulation.step(nvtsteps)
system.addForce(MonteCarloBarostat(pres*bar, temp*kelvin))
simulation.context.reinitialize(preserveState=True)
print("Running NPT")
nptsteps=int(sys.argv[6])
simulation.step(nptsteps)
simulation.step(nvt)
system.addForce(MonteCarloBarostat(pres*bar, temp*kelvin))
simulation.context.reinitialize(preserveState=True)
print("Running NPT")
simulation.step(npt)
if __name__ == '__main__':
main()
49 changes: 17 additions & 32 deletions main.nf
Original file line number Diff line number Diff line change
@@ -1,50 +1,35 @@
// enabling nextflow DSL v2
nextflow.enable.dsl=2

process openmm {

publishDir "${params.resultsDir}", mode: 'copy'

process pdbfixer-mutants {
input:
path incsv
path inpdb
val temp
val pres
val solvmol
val nptrun
val nvtrun
val reprate
output:
path 'traj.pdb'
path 'md_log.txt', emit: md_log

shell:
path '*_fixed.pdb', emit: fixed_pdbs
shell:
"""
openmm-runner.py $inpdb $temp $pres $solvmol $nptrun $nvtrun $reprate traj.pdb md_log.txt
mutant_maker.py --incsv $incsv --from-col ${params.csv.col} --in-pdb $inpdb ${params.mutant.maker.args}
"""

}

process hydro_plot {
process openmm-minimise {
input:
path md_log
val skipsteps

path fixed_pdbs
output:
path '*_unfolded.pdb', emit: unfolded_pdbs
path '*_folded.pdb', emit: folded_pdbs
shell:
"""
plot.py $md_log $skipsteps
"""
"""
openmm-minimise.py --i $fixed_pdbs
"""
}


workflow {
inpath_ch = channel.fromPath("${params.inputFile}")
temp = Channel.value(params.temp)
pres = Channel.value(params.pres)
solvmol = Channel.value(params.solvmol)
nvtrun = Channel.value(params.nvtrun)
nptrun = Channel.value(params.nptrun)
reprate = Channel.value(params.reprate)
skipsteps = Channel.value(params.skipsteps)
openmm(inpath_ch, temp, pres, solvmol, nvtrun, nptrun, reprate)
hydro_plot(openmm.out.md_log, skipsteps)
incsv_ch = channel.fromPath("${params.inputCsv}")
pdbfixer-mutants(inpath_ch, incsv_ch)
openmm-minimise(pdbfixer-mutants.out.fixed_pdbs)
}

4 changes: 3 additions & 1 deletion nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
params {
inputFile = "./testdata/2JIE.pdb"
resultsDir = "./results/"
}
inputCsv = "./testdata/tableExport-2JIEIII.csv"
csv.col = "Mutation"
mutant.maker.args = "--chain A --pH 7.5"

// include basic process configuration options
includeConfig 'conf/base.config'

0 comments on commit bc9db61

Please sign in to comment.