-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
229 additions
and
82 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,70 @@ | ||
#!/usr/bin/env python3 | ||
"""mutant_maker | ||
Fix PDB files, adding missing residues, atoms and hydrogens, then create mutations in desired locations | ||
Usage: | ||
mutant_maker.py [--incsv=<incsv>] [--from-col=<col>] [--in-pdb=<pdb>] [--chain=<chain>] [--pH=<pH>] | ||
Options: | ||
--incsv=<incsv> Read data from csv file | ||
--from-col=<col> Select column title containing mutations | ||
--in-pdb=<pdb> PDB file of protein wildtype | ||
--chain=<chain> Select chain upon which to perform mutation | ||
--pH=<ph> Set pH of the protein | ||
""" | ||
|
||
from docopt import docopt | ||
import pandas as pd | ||
import re | ||
from openmm.app import PDBFile | ||
import sys | ||
from sys import stdout | ||
from pdbfixer import PDBFixer | ||
|
||
pdb = PDBFixer(sys.argv[1]) | ||
pdb.addMutations(mutationChain='A', mutationResidues='19', newResidues='ALA') | ||
pdb.findMissingResidues() | ||
pdb.findNonstandardResidues() | ||
pdb.replaceNonstandardResidues() | ||
pdb.removeHeterogens(True) | ||
pdb.findMissingAtoms() | ||
pdb.addMissingAtoms() | ||
pdb.addMissingHydrogens(7.0) | ||
PDBFile.writeFile(pdb.topology, pdb.positions, open('modified_protein.pdb', 'w')) | ||
def get_data(csvname: str, col: str): | ||
data = pd.read_csv(csvname) | ||
clipped_data=pd.DataFrame(data=data, columns=[col]) | ||
divided_data = clipped_data.Mutation.str.extract(r'([a-zA-Z]+)([^a-zA-Z]+)([a-zA-Z]+)') | ||
divided_cleaned = divided_data.to_string(header=None, index=False) | ||
amino_index = {'G': 'GLY' , 'L': 'LEU', 'R': 'ARG', 'A': 'ALA', 'Y': 'TYR', 'S': 'SER', 'M': 'MET', 'I': 'ILE', 'T': 'THR', 'C': 'CYS', 'V': 'VAL', 'P': 'PRO', 'F': 'PHE', 'W': 'TRP', 'H': 'HIS', 'K': 'LYS', 'D': 'ASP', 'E': 'GLU', 'N': 'ASN', 'Q': 'GLN', ' ': '-'} | ||
new_rep_codes = re.sub(r"[GLRAYSMITCVPFWHKDENQ ]", lambda x: amino_index[x.group(0)], divided_cleaned) | ||
new_rep_cleaned = re.sub("--","-", new_rep_codes) | ||
return new_rep_cleaned | ||
|
||
|
||
def clean_wildtype(pdbname: str, pH: str): | ||
pH_fl = float(pH) | ||
pdb = PDBFixer(pdbname) | ||
pdb.findMissingResidues() | ||
pdb.findNonstandardResidues() | ||
pdb.replaceNonstandardResidues() | ||
pdb.removeHeterogens(False) | ||
pdb.findMissingAtoms() | ||
pdb.addMissingAtoms() | ||
pdb.addMissingHydrogens(pH_fl) | ||
PDBFile.writeFile(pdb.topology, pdb.positions, open("wildtype_fixed.pdb", 'w')) | ||
|
||
|
||
def create_mutants(pdbname: str, new_rep_cleaned, chain: str, pH: str): | ||
pH_fl = float(pH) | ||
for mutant in new_rep_cleaned.splitlines(): | ||
mutpdb = PDBFixer(pdbname) | ||
mutpdb.applyMutations([mutant], chain) | ||
mutpdb.findMissingResidues() | ||
mutpdb.findNonstandardResidues() | ||
mutpdb.replaceNonstandardResidues() | ||
mutpdb.removeHeterogens(False) | ||
mutpdb.findMissingAtoms() | ||
mutpdb.addMissingAtoms() | ||
mutpdb.addMissingHydrogens(pH_fl) | ||
PDBFile.writeFile(pdb.topology, pdb.positions, open(mutant + "_fixed.pdb", 'w')) | ||
|
||
|
||
def main(): | ||
arguments = docopt(__doc__, version='mutant_maker.py') | ||
new_rep_cleaned = get_data(arguments['--incsv'], arguments['--from-col']) | ||
clean_wildtype(arguments['--inpdb'], arguments['--pH']) | ||
create_mutants(arguments['--inpdb'], new_rep_cleaned, arguments['--chain'], arguments['--pH']) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
#!/usr/bin/env python3 | ||
"""openmm-minimise | ||
Minimise the potential energy of the wildtype protein structure and mutated variants, according to AMBER14 potentials | ||
Usage: | ||
openmm-minimise.py [--i=<pdb>] [--max-iter=<iter>] [--tol==<tol>] | ||
Options: | ||
--i=<pdb> Input the fixed PDB files from the Mutant Maker process | ||
--max-iter=<iter> Maximum number of iterations to arrive at minimised energy | ||
--tol=<tol> Tolerance level of energy change after which the protein's structure is considered minimised | ||
""" | ||
import logging | ||
from docopt import docopt | ||
from openmm.app import * | ||
from openmm import * | ||
from openmm.unit import * | ||
import sys | ||
from sys import stdout | ||
|
||
def get_pdb(filename: str): | ||
pdb = PDBFile(filename) | ||
return pdb | ||
|
||
def setup_forcefield(): | ||
forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml') | ||
return forcefield | ||
|
||
def setup_system(pdb, forcefield): | ||
system = forcefield.createSystem(pdb.topology, nonbondedMethod=NoCutoff) | ||
return system | ||
|
||
def setup_simulation(pdb, system): | ||
integrator = LangevinMiddleIntegrator(300*kelvin, 1/picosecond, 0.001*picoseconds) | ||
simulation = Simulation(pdb.topology, system, integrator) | ||
simulation.context.setPositions(pdb.positions) | ||
return simulation | ||
|
||
def main(): | ||
arguments = docopt(__doc__, version='openmm-minimise.py') | ||
pdb = get_pdb(arguments['--i']) | ||
forcefield = setup_forcefield() | ||
system = setup_system(pdb, forcefield, arguments['--no-restraints']) | ||
str_name = arguments['--i'] | ||
stem = str_name.replace("_fixed.pdb","") | ||
#csvunfolded = str(stem + "_unfolded.csv") | ||
pdbunfolded = str(stem + "_unfolded.pdb") | ||
pdbfolded = str(stem + "_folded.pdb") | ||
simulation = setup_simulation(pdb, system) | ||
init_state = simulation.context.getState(getEnergy=True, getPositions=True) | ||
logging.info("Starting potential energy = %.9f kcal/mol" | ||
% init_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)) | ||
simulation.minimizeEnergy(tolerance=0.01) | ||
final_state = simulation.context.getState(getEnergy=True, getPositions=True) | ||
logging.info("Minimised potential energy = %.9f kcal/mol" | ||
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)) | ||
PDBFile.writeFile( | ||
simulation.topology, final_state.getPositions(), open(pdbunfolded, "w")) | ||
pdbII = get_pdb(pdbunfolded) | ||
forcefield = setup_forcefield() | ||
systemII = setup_system(pdbII, forcefield) | ||
simulationII = setup_simulation(pdbII, systemII) | ||
init_state = simulationII.context.getState(getEnergy=True, getPositions=True) | ||
logging.info("Starting potential energy = %.9f kcal/mol" | ||
% init_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)) | ||
simulationII.minimizeEnergy(tolerance=0.00000001) | ||
final_state = simulationII.context.getState(getEnergy=True, getPositions=True) | ||
logging.info("Minimised potential energy = %.9f kcal/mol" | ||
% final_state.getPotentialEnergy().value_in_unit(kilocalories_per_mole)) | ||
PDBFile.writeFile( | ||
simulationII.topology, final_state.getPositions(), open(pdbfolded, "w")) | ||
if __name__ == '__main__': | ||
arguments = docopt(__doc__, version='openmm-minimise.py') | ||
logging.basicConfig(stream=stem + ".out", level=logging.INFO) | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,77 @@ | ||
#!/usr/bin/env python3 | ||
"""openmm-md | ||
Minimise the potential energy of the wildtype protein structure and mutated variants, according to AMBER14 potentials | ||
Usage: | ||
openmm-md.py [--i=<pdb>] [--solv-mol=<mol>] [--temp==<temp>] [--pres=<pres>] [--nvt=<nvt>] [--npt=<npt] [--rep=<rep>] | ||
Options: | ||
--i=<pdb> Input the fixed PDB files from the Mutant Maker process | ||
--solv-mol=<mol> Number of solvent molecules (default: solvation with H2O) | ||
--temp=<temp> Temperature of MD thermostat | ||
--pres=<pres> Pressure of MD barostat (NPT MD only) | ||
--nvt=<nvt> Number of NVT trajectory steps to initiate MD (each step = 1fs) | ||
--npt=<npt> Number of NPT steps in principal MD trajectory (each step = 1fs) | ||
--rep=<rep> Reporting rate wherefrom PDB coordinates and therodynamic conditions are outputted to trajectory (PDB) and log (CSV) files respectively | ||
""" | ||
import logging | ||
from docopt import docopt | ||
from openmm.app import * | ||
from openmm import * | ||
from openmm.unit import * | ||
import sys | ||
from sys import stdout | ||
from pdbfixer import PDBFixer | ||
|
||
#pdb = PDBFile(sys.argv[1]) | ||
pdb = PDBFixer(sys.argv[1]) | ||
pdb.applyMutations(["SER-19-ALA"], "A") | ||
pdb.findMissingResidues() | ||
pdb.findNonstandardResidues() | ||
pdb.replaceNonstandardResidues() | ||
pdb.removeHeterogens(True) | ||
pdb.findMissingAtoms() | ||
pdb.addMissingAtoms() | ||
pdb.addMissingHydrogens(7.0) | ||
forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml') | ||
modeller = Modeller(pdb.topology, pdb.positions) | ||
#modeller.deleteWater() | ||
#residues=modeller.addHydrogens(forcefield) | ||
solvmol=int(sys.argv[4]) | ||
modeller.addSolvent(forcefield, numAdded=solvmol) | ||
system = forcefield.createSystem(modeller.topology, nonbondedMethod=PME, nonbondedCutoff=1.0*nanometer, constraints=HBonds) | ||
temp=int(sys.argv[2]) | ||
integrator = LangevinMiddleIntegrator(temp*kelvin, 1/picosecond, 0.001*picoseconds) | ||
simulation = Simulation(modeller.topology, system, integrator) | ||
simulation.context.setPositions(modeller.positions) | ||
print("Minimizing energy") | ||
simulation.minimizeEnergy() | ||
print("Running NVT") | ||
reprate=int(sys.argv[7]) | ||
simulation.reporters.append(PDBReporter(sys.argv[8], reprate)) | ||
simulation.reporters.append(StateDataReporter(stdout, reprate, step=True, | ||
potentialEnergy=True, temperature=True, volume=True)) | ||
simulation.reporters.append(StateDataReporter(sys.argv[9], reprate, step=True, | ||
|
||
|
||
def get_pdb(filename: str): | ||
pdb = PDBFile(filename) | ||
return pdb | ||
|
||
def setup_forcefield(): | ||
forcefield = ForceField('amber14-all.xml', 'amber14/tip3pfb.xml') | ||
return forcefield | ||
|
||
def setup_modeller(pdb): | ||
modeller = Modeller(pdb.topology, pdb.positions) | ||
return modeller | ||
|
||
def setup_system(modeller, forcefield, solvmol: str): | ||
mol=int(solvmol) | ||
modeller.addSolvent(forcefield, numAdded=mol) | ||
system = forcefield.createSystem(modeller.topology, nonbondedMethod=PME, nonbondedCutoff=1.0*nanometer, constraints=HBonds) | ||
return system | ||
|
||
def setup_simulation(modeller, system, temp: str): | ||
temp_fl = int(temp) | ||
integrator = LangevinMiddleIntegrator(temp_fl*kelvin, 1/picosecond, 0.001*picoseconds) | ||
simulation = Simulation(modeller.topology, system, integrator) | ||
simulation.context.setPositions(modeller.positions) | ||
return simulation | ||
|
||
def main(): | ||
arguments = docopt(__doc__, version='openmm-md.py') | ||
pdb = get_pdb(arguments['--i']) | ||
forcefield = setup_forcefield() | ||
modeller = setup_modeller(pdb) | ||
system = setup_system(modeller, forcefield, arguments['--solv-mol']) | ||
simulation = setup_simulation(modeller, system, arguments['--temp']) | ||
print("Minimizing energy") | ||
simulation.minimizeEnergy() | ||
print("Running NVT") | ||
reprate = int(arguments['--rep']) | ||
nvt = int(arguments['--nvt']) | ||
npt = int(arguments['--npt']) | ||
pres = int(arguments['--pres']) | ||
temp = int(arguments['--temp']) | ||
str_name = arguments['--i'] | ||
stem = str_name.replace("_opt.pdb", "") | ||
simulation.reporters.append(PDBReporter(stem + "_MD.pdb", reprate)) | ||
simulation.reporters.append(StateDataReporter(stdout, reprate, step=True, potentialEnergy=True, temperature=True, volume=True)) | ||
simulation.reporters.append(StateDataReporter(stem + "_log.csv", reprate, step=True, | ||
potentialEnergy=True, temperature=True, volume=True)) | ||
nvtsteps=int(sys.argv[5]) | ||
pres=int(sys.argv[3]) | ||
simulation.step(nvtsteps) | ||
system.addForce(MonteCarloBarostat(pres*bar, temp*kelvin)) | ||
simulation.context.reinitialize(preserveState=True) | ||
print("Running NPT") | ||
nptsteps=int(sys.argv[6]) | ||
simulation.step(nptsteps) | ||
simulation.step(nvt) | ||
system.addForce(MonteCarloBarostat(pres*bar, temp*kelvin)) | ||
simulation.context.reinitialize(preserveState=True) | ||
print("Running NPT") | ||
simulation.step(npt) | ||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,35 @@ | ||
// enabling nextflow DSL v2 | ||
nextflow.enable.dsl=2 | ||
|
||
process openmm { | ||
|
||
publishDir "${params.resultsDir}", mode: 'copy' | ||
|
||
process pdbfixer-mutants { | ||
input: | ||
path incsv | ||
path inpdb | ||
val temp | ||
val pres | ||
val solvmol | ||
val nptrun | ||
val nvtrun | ||
val reprate | ||
output: | ||
path 'traj.pdb' | ||
path 'md_log.txt', emit: md_log | ||
|
||
shell: | ||
path '*_fixed.pdb', emit: fixed_pdbs | ||
shell: | ||
""" | ||
openmm-runner.py $inpdb $temp $pres $solvmol $nptrun $nvtrun $reprate traj.pdb md_log.txt | ||
mutant_maker.py --incsv $incsv --from-col ${params.csv.col} --in-pdb $inpdb ${params.mutant.maker.args} | ||
""" | ||
|
||
} | ||
|
||
process hydro_plot { | ||
process openmm-minimise { | ||
input: | ||
path md_log | ||
val skipsteps | ||
|
||
path fixed_pdbs | ||
output: | ||
path '*_unfolded.pdb', emit: unfolded_pdbs | ||
path '*_folded.pdb', emit: folded_pdbs | ||
shell: | ||
""" | ||
plot.py $md_log $skipsteps | ||
""" | ||
""" | ||
openmm-minimise.py --i $fixed_pdbs | ||
""" | ||
} | ||
|
||
|
||
workflow { | ||
inpath_ch = channel.fromPath("${params.inputFile}") | ||
temp = Channel.value(params.temp) | ||
pres = Channel.value(params.pres) | ||
solvmol = Channel.value(params.solvmol) | ||
nvtrun = Channel.value(params.nvtrun) | ||
nptrun = Channel.value(params.nptrun) | ||
reprate = Channel.value(params.reprate) | ||
skipsteps = Channel.value(params.skipsteps) | ||
openmm(inpath_ch, temp, pres, solvmol, nvtrun, nptrun, reprate) | ||
hydro_plot(openmm.out.md_log, skipsteps) | ||
incsv_ch = channel.fromPath("${params.inputCsv}") | ||
pdbfixer-mutants(inpath_ch, incsv_ch) | ||
openmm-minimise(pdbfixer-mutants.out.fixed_pdbs) | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters