Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

first draft of prep complex workflow #1

Open
wants to merge 18 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,20 @@
# gmx_fileprep
Scripts for local gromacs input file preparation
This repo contains scripts for local gromacs input file preparation, tailored for MobleyLab/SeparatedTopologies:
* solvation: see [prep_complex.py](prep_complex.py)
* gmx options:
* `gen-pairs = no`
* `fudgeLJ = 0.5`

* openFF small molecule forcefield swapping
* solvent swap ex: see [swap_solvent_ligand_ff.py](swap_solvent_ligand_ff.py)
* complex swap ex: see [swap_complex_ligand_ff.py](swap_complex_ligand_ff.py)

## Requires:
Please use gmxfileprep.yaml as your environment. Your environment should use parmed v4 or newer. This version of parmed adds atomtyping checks so atomtypes are not combined haphazardly. This ***should*** prevent atomtyping issues...

```
conda env create -f gmxfileprep.yaml
```

## Warnings
This program uses `parmed`, if you run into strange gromacs errors when running, the first thing I'd suggest to check is that all your atomtypes in the .top file are correct
211 changes: 211 additions & 0 deletions attic/swap_ligand.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
# swap_ligand_params.py
# * this is an EXAMPLE main file using the wrapper functions
# * workflow creates ligand and complex .gro and .top files
# * complex files are ready for energy minimization
# * ligand files need to be solvated and have ions added
# the water and ion FF parameters have already been added
# * the default parameters in the gromacs .top file are set
# for SepTop (gen-pairs: no, fudgeLJ: 0.5)

import parmed_wrapper as pmdw
import io_wrapper as iow
import openff_wrapper as offw
import tempfile
import time
from pathlib import Path
import parmed
import os
from rdkit import Chem
from rdkit.Chem import AllChem

def main():
start_time = time.time()

###################
## COMPLEX SETUP ##
###################

# print("\nComplex Setup\n" + "="*13)
# print("Step 1: Loading parameterized Parmed Structure for Complex + Solvent + Ions")
# pmd_receptor_struct = parmed.load_file(complex_top, xyz=complex_gro)

# print("Step 2: Creating a ligand .mol2 file using Complex LIG positions")
# complex_lig_struct = pmd_receptor_struct[pmdw.create_mask_str([complex_ligcode])]
# with tempfile.NamedTemporaryFile(suffix=".mol2") as complex_mol2:
# complex_lig_struct.save(complex_mol2.name, overwrite=True)
# pmdw.edit_mol2_positions(ligand_mol2, complex_mol2.name, ligand_mol2_complex)

# print("Step 3: Parameterizing Ligand with OpenFF")
# openff_ff = offw.setup_ff(ligand_ff)
# lig_mol = offw.create_molecule(ligand_mol2_complex, complex_ligcode)
# lig_positions = lig_mol.conformers[0]
# lig_topology = lig_mol.to_topology()
# lig_system = openff_ff.create_openmm_system(lig_topology)

# print("Step 4: Creating Parmed Structure for Ligand")
# pmd_lig_struct = pmdw.create_pmd_ligand(lig_topology, lig_system, lig_positions)

# print("Step 5: Creating Parmed Structure for Solvated Complex: Protein + Ligand + Solvent + Ions")
# exclude_resids = [complex_ligcode, 'Na+', 'Cl-', 'HOH']
# unq_lig_struct = pmdw.unique_atom_types(pmd_lig_struct, lig_mol.name)
# unq_lig_struct = pmdw.fix_gen_pairs(unq_lig_struct)
# unq_lig_struct.save("/Users/megosato/Desktop/test.top", overwrite=True)

# pmd_receptor_struct.save("/Users/megosato/Desktop/test2.top", overwrite=True)

# mask = pmdw.create_mask_from_exclusion(pmd_receptor_struct, exclude_resids)
# pmd_complex_struct = pmd_receptor_struct[mask] \
# + unq_lig_struct \
# + pmd_receptor_struct[':Na+:Cl-:HOH']

# # For some reason... the parmed complex needs to be saved out before running
# # unique_atom_types() in order to avoid the scaling 1-4 error
# tmp_top = tempfile.NamedTemporaryFile(suffix='.top', delete=False)
# tmp_gro = tempfile.NamedTemporaryFile(suffix='.gro', delete=False)
# pmd_complex_struct.save(tmp_top.name, overwrite=True)
# pmd_complex_struct.save(tmp_gro.name, overwrite=True)
# pmd_complex_struct = parmed.load_file(tmp_top.name, xyz=tmp_gro.name)
# os.unlink(tmp_gro.name)
# os.unlink(tmp_top.name)

# print("Step 6: Ensuring All Atomtypes are Unique")
# unq_complex_struct = pmdw.unique_atom_types(pmd_complex_struct, lig_mol.name)
# final_complex_struct = pmdw.fix_gen_pairs(unq_complex_struct)

# print("Step 7: Saving out Complex .gro and .top and .pdb")
# final_complex_struct.save(complex_gro_out, overwrite=True)
# final_complex_struct.save(complex_top_out, overwrite=True)
# final_complex_struct.save(complex_pdb_out, overwrite=True)

# # Add a sanity check that confirms gro files are the same before and after addition
# # of differently parameterized ligand
# print("Step 8: Sanity check:")
# print(" .gro: modified file should columns be a subset of the original (subset or !subset)")
# print(" .top: modified file should be different from original")
# print(f" filetype original modified difference")
# print(f" ======== ======== ======== ==========")
# print(f" .gro {iow.count_lines(complex_gro):>8} {iow.count_lines(complex_gro_out):>8} {'subset' if iow.is_a_subset(complex_gro_out, complex_gro) else '!subset':>10}")
# print(f" .top {iow.count_lines(complex_top):>8} {iow.count_lines(complex_top_out):>8} {iow.diff(complex_top, complex_top_out):>10}")



###################
## SOLVENT SETUP ##
###################
print("\n\nSolvent Setup\n" + "="*13)

print("Step 1: Loading parameterized Parmed Structure for Ligand + Solvent + Ions")
pmd_solvent_struct = parmed.load_file(solvent_top, xyz=solvent_gro)



print("Step 2: Creating a ligand .mol2 file using Solvent UNL positions")

rdmol = None
solvent_lig_struct = pmd_solvent_struct[pmdw.create_mask_str([solvent_ligcode])]
with tempfile.NamedTemporaryFile(suffix=".pdb") as solvent_pdb:
solvent_lig_struct.save(solvent_pdb.name, overwrite=True)
#pmdw.edit_mol2_positions(ligand_mol2, solvent_mol2.name, ligand_mol2_solvent)

mol2 = Chem.MolFromMol2File(ligand_mol2)
mol2_smiles = Chem.MolToSmiles(mol2)
template = Chem.MolFromSmiles(mol2_smiles)
docked_pose = AllChem.MolFromPDBFile(solvent_pdb.name)

#Assign the bond order to force correct valence
rdmol = AllChem.AssignBondOrdersFromTemplate(template, docked_pose)

print("Step 3: Parameterizing Ligand with OpenFF")
openff_ff = offw.setup_ff(ligand_ff)
lig_mol = offw.create_molecule(rdmol, solvent_ligcode)
lig_positions = lig_mol.conformers[0]
lig_topology = lig_mol.to_topology()
lig_system = openff_ff.create_openmm_system(lig_topology)

print("Step 4: Creating Parmed Structure for Ligand")
pmd_lig_struct = pmdw.create_pmd_ligand(lig_topology, lig_system, lig_positions)
#pmd_lig_struct.save("/Users/megosato/Desktop/lig_after_ff.top")

print("Step 5: Creating Parmed Structure for Solvent: Ligand + Solvent + Ions")

pmd_solvent_struct = pmd_solvent_struct[':Cl-:Na+'] \
+ pmd_lig_struct \
+ pmd_solvent_struct[':HOH']

unq_solvent_struct = pmdw.unique_atom_types(pmd_solvent_struct, lig_mol.name)
#unq_solvent_struct.save("/Users/megosato/Desktop/solvent_after_combo.top")

print("Step 6: Ensuring Ligand Atomtypes are Unique")
#unq_solvent_struct = pmdw.unique_atom_types(pmd_solvent_struct, lig_mol.name)
final_solvent_struct = pmdw.fix_gen_pairs(unq_solvent_struct)

print("Step 7: Saving out Solvent .gro and .top")
final_solvent_struct.save(solvent_gro_out, overwrite=True)
final_solvent_struct.save(solvent_top_out, overwrite=True)


# Add a sanity check that confirms gro files are the same before and after addition
# of differently parameterized ligand
print("Step 9: Sanity check:")
print(" .gro: modified file columns should be a subset of the original (subset or !subset)")
print(" .top: modified file should be different from original")
print(f" filetype original modified difference")
print(f" ======== ======== ======== ==========")
print(f" .gro {iow.count_lines(solvent_gro):>8} {iow.count_lines(solvent_gro_out):>8} {'subset' if iow.is_a_subset(solvent_gro_out, solvent_gro) else '!subset':>10}")
print(f" .top {iow.count_lines(solvent_top):>8} {iow.count_lines(solvent_top_out):>8} {iow.diff(solvent_top, solvent_top_out):>10}")


# end_time = time.time()
# print(f"Total Time Elapsed: {(end_time - start_time)/60:.2} min")

if __name__ == "__main__":

desktop_path = Path("/Users/megosato/Desktop")
si_path = desktop_path / "SI/BACE1/input"
output_path = desktop_path / "lig11_bace_prep"

ligand_ff = desktop_path / "bace_prep/sage-2.1.0rc.offxml"

complex_ligcode = "LIG"
solvent_ligcode = "UNL"

for ligfam in ["amide_series", "spirocycles", "biphenyl"]:
ligfam_dir = si_path / ligfam
mol2_dir = ligfam_dir / "mol2"
lig_names = []
mol2_files = Path(mol2_dir).glob('*')
for f in mol2_files:
lig_names.append(str(f.stem))

fam_output_dir = output_path / ligfam
fam_output_dir.mkdir(parents=True, exist_ok=True)

for lig in lig_names:

print(ligfam, lig)

if lig not in ["lig_41", "lig_67"]:
continue

lig_si_dir = ligfam_dir / lig
complex_gro = str(lig_si_dir / "complex.gro")
complex_top = str(lig_si_dir / "complex.top")
solvent_gro = str(lig_si_dir / "solvent.gro")
solvent_top = str(lig_si_dir / "solvent.top")
ligand_mol2 = str(mol2_dir / f"{lig}.mol2")

if ligfam == "biphenyl":
lig = "bJ_" + lig.split("_")[-1]

lig_output_dir = fam_output_dir / lig
lig_output_dir.mkdir(parents=True, exist_ok=True)

ligand_mol2_complex = str(lig_output_dir / f"{lig}_complex.mol2")
ligand_mol2_solvent = str(lig_output_dir / f"{lig}_solvent.mol2")
complex_gro_out = str(lig_output_dir / "complex.gro")
complex_top_out = str(lig_output_dir / "complex.top")
complex_pdb_out = str(lig_output_dir / "complex.pdb")
solvent_gro_out = str(lig_output_dir / "solvent.gro")
solvent_top_out = str(lig_output_dir / "solvent.top")

main()
Loading