#!/usr/bin/env python # coding: utf-8 # # *This notebook contains material from [PyRosetta](https://RosettaCommons.github.io/PyRosetta.notebooks); # content is available [on Github](https://github.com/RosettaCommons/PyRosetta.notebooks.git).* # # < [Docking Moves in Rosetta](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/07.02-Docking-Moves-in-Rosetta.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [Global Ligand Docking using `XMLObjects` Using the `ref2015.wts` Scorefunction](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/08.01-Ligand-Docking-XMLObjects.ipynb) >

# # Ligand Refinement in PyRosetta (a.k.a. High-Resolution Local Docking) Using the `ligand.wts` Scorefunction # *Warning*: This notebook uses `pyrosetta.distributed.viewer` code, which runs in `jupyter notebook` and might not run if you're using `jupyterlab`. # In[1]: get_ipython().system('pip install pyrosettacolabsetup') import pyrosettacolabsetup; pyrosettacolabsetup.install_pyrosetta() import pyrosetta; pyrosetta.init() import logging logging.basicConfig(level=logging.INFO) import os import pyrosetta import pyrosetta.distributed import pyrosetta.distributed.viewer as viewer # Initialize PyRosetta and setup the input pose: # In[2]: params_file = "inputs/TPA.gasteiger.fa.params" flags = f""" -extra_res_fa {params_file} # Provide a custom TPA .params file -ignore_unrecognized_res 1 -mute all """ pyrosetta.distributed.init(flags) pose = pyrosetta.io.pose_from_file("inputs/test_lig.pdb") # Before we perform ligand refinement, let's take a look at the input `.pdb` file using the `pyrosetta.distributed.viewer` macromolecular visualizer: # In[8]: chE = pyrosetta.rosetta.core.select.residue_selector.ChainSelector("E") view = viewer.init(pose) view.add(viewer.setStyle()) view.add(viewer.setStyle(command=({"hetflag": True}, {"stick": {"colorscheme": "brownCarbon", "radius": 0.2}}))) view.add(viewer.setSurface(residue_selector=chE, opacity=0.7, color='white')) view.add(viewer.setHydrogenBonds()) view() # *** # *Restart Jupyter Notebook kernel to properly re-initialize PyRosetta* # *** # In[1]: get_ipython().system('pip install pyrosettacolabsetup') import pyrosettacolabsetup; pyrosettacolabsetup.install_pyrosetta() import pyrosetta; pyrosetta.init() import logging logging.basicConfig(level=logging.INFO) import os import pyrosetta import pyrosetta.distributed import pyrosetta.distributed.viewer as viewer # The following ligand refinement example was adapted from `~Rosetta/main/source/src/python/PyRosetta/src/demo/D120_Ligand_interface.py`: # In[9]: def sample_ligand_interface(pdb_filename, partners, ligand_params=[""], jobs=1, job_output="ligand_output"): """ Performs ligand-protein docking using Rosetta fullatom docking (DockingHighRes) on the ligand-protein complex in using the relative chain . If the ligand parameters (a .params file) are not defaultly loaded into PyRosetta, must supply the list of files including the ligand parameters. trajectories are performed with output structures named _(job#).pdb. Note: Global docking, a problem solved by the Rosetta DockingProtocol, requires interface detection and refinement as with other protocols, these tasks are split into centroid (interface detection) and high-resolution (interface refinement) methods without a centroid representation, low-resolution ligand-protein prediction is not possible and as such, only the high-resolution ligand-protein interface refinement is available. If you add a perturbation or randomization step, the high-resolution stages may fail. A perturbation step CAN make this a global docking algorithm however the rigid-body sampling preceding refinement requires extensive sampling to produce accurate results and this algorithm spends most of its effort in refinement (which may be useless for the predicted interface). This script performs ligand-protein interface structure prediction but does NOT perform global ligand-protein docking. Since there is no generic interface detection, the input PDB file must have the ligand placed near the interface that will be refined. If the DockMCMProtocol is applied to a pose without placement near the interface, then the refinement may: -waste steps sampling the wrong interface -fail by predicting an incorrect interface very far from the true interface -fail by separating the ligand from the protein (usually due to a clash) DockMCMProtocol does not require an independent randomization or perturbation step to "seed" its prediction. Additional refinement steps may increase the accuracy of the predicted conformation (see refinement.py). Drastic moves (large conformational changes) should be avoided; if they precede the protocol, the problems above may occur, if they succeed the protocol, the protocol results may be lost. """ # Declare working directory and output directory working_dir = os.getcwd() output_dir = "outputs" if not os.path.exists(output_dir): os.mkdir(output_dir) # Initialize PyRosetta pyrosetta.init() # Create an empty pose from the desired PDB file pose = pyrosetta.rosetta.core.pose.Pose() # If the params list has contents, load .params files # Note: this method of adding ligands to the ResidueTypeSet is unnecessary # if you call pyrosetta.init("-extra_res_fa {}".format(ligand_params)) if len(ligand_params) != 0 and ligand_params[0] != "": ligand_params = pyrosetta.Vector1(ligand_params) res_set = pose.conformation().modifiable_residue_type_set_for_conf() res_set.read_files_for_base_residue_types(ligand_params) pose.conformation().reset_residue_type_set_for_conf(res_set) # Load pdb_filename into pose pyrosetta.io.pose_from_file(pose, pdb_filename) # Setup the docking FoldTree # the method setup_foldtree takes an input pose and sets its # FoldTree to have jump 1 represent the relation between the two docking # partners, the jump points are the residues closest to the centers of # geometry for each partner with a cutpoint at the end of the chain, # the second argument is a string specifying the relative chain orientation # such as "A_B" of "LH_A", ONLY TWO BODY DOCKING is supported and the # partners MUST have different chain IDs and be in the same pose (the # same PDB), additional chains can be grouped with one of the partners, # the "_" character specifies which bodies are separated # the third argument...is currently unsupported but must be set (it is # supposed to specify which jumps are movable, to support multibody # docking...but Rosetta doesn't currently) # the FoldTrees setup by this method are for TWO BODY docking ONLY! dock_jump = 1 # jump number 1 is the inter-body jump pyrosetta.rosetta.protocols.docking.setup_foldtree(pose, partners, pyrosetta.Vector1([dock_jump])) # Create ScoreFunctions for centroid and fullatom docking scorefxn = pyrosetta.create_score_function("ligand.wts") # Setup the high resolution (fullatom) docking protocol using DockMCMProtocol. docking = pyrosetta.rosetta.protocols.docking.DockMCMProtocol() # Many of its options and settings can be set using the setter methods. docking.set_scorefxn(scorefxn) # Change directory temporarily for output os.chdir(output_dir) # Setup the PyJobDistributor jd = pyrosetta.toolbox.py_jobdistributor.PyJobDistributor(job_output, jobs, scorefxn, compress=False) # Set the native pose so that the output scorefile contains the pose rmsd metric jd.native_pose = pose # Optional: setup a PyMOLObserver # pyrosetta.rosetta.protocols.moves.AddPyMOLObserver(test_pose, True) # Perform protein-ligand docking # counter = 0 # for pretty output to PyMOLObserver while not jd.job_complete: test_pose = pose.clone() # Reset test pose to original structure # counter += 1 # Change the pose name, for pretty output to PyMOLObserver # test_pose.pdb_info().name(job_output + '_' + str(counter)) # Perform docking and output to PyMOL: docking.apply(test_pose) # Write the decoy structure to disk: jd.output_decoy(test_pose) os.chdir(working_dir) # Let's test out the `sample_ligand_interface` function (takes ~2 minutes with `jobs=1`, which means nstruct is set to 1 in the `PyJobDistributor`): # In[10]: if not os.getenv("DEBUG"): sample_ligand_interface("inputs/test_lig.pdb", "E_X", ligand_params=["inputs/TPA.gasteiger.fa.params"], jobs=1, job_output="test_lig") # *Interpreting Results:* # # The `PyJobDistributor` will output the lowest scoring pose for each trajectory # (as a `.pdb` file), recording the score in `outputs/.fasc`. Generally, # the decoy generated with the lowest score contains the best prediction # for the protein-ligand conformation. PDB files produced from docking will contain # both docking partners in their predicted conformation. When inspecting these # PDB files (or the `PyMOLObserver` output) be aware that PyMOL can introduce or # predict bonds that do not exist, particularly for close atoms. This rarely # occurs when using the PyMOLMover.keep_history feature (since PyRosetta will # sample some conformation space that has clashes). # # The `PyMOLObserver` will output a series of structures directly produced by the # DockingProtocol. Unfortunately, this may include intermediate structures that # do not yield any insight into the protocol performance. A LARGE number of # structures are output to PyMOL and your machine may have difficulty # loading all of these structures. If this occurs, try changing the # `PyMOLObserver` keep_history to False or running the protocol without the # `PyMOLObserver`. # # Interface structure prediction is useful for considering what physical # properties are important in the binding event and what conformational changes # occur. Once experienced using PyRosetta, you can easily write scripts to # investigate the Rosetta score terms and structural characteristics. There is no # general interpretation of ligand-binding results. Although Rosetta score does # not translate directly to physical meaning (it is not physical energy), # splitting the docked partners and comparing the scores (after packing or # refinement) can indicate the strength of the bonding interaction. # *** # *Restart Jupyter Notebook kernel to properly re-initialize PyRosetta* # *** # In[1]: get_ipython().system('pip install pyrosettacolabsetup') import pyrosettacolabsetup; pyrosettacolabsetup.install_pyrosetta() import pyrosetta; pyrosetta.init() import logging logging.basicConfig(level=logging.INFO) import os, sys import pyrosetta import pyrosetta.distributed import pyrosetta.distributed.viewer as viewer # In[2]: params_file = "inputs/TPA.gasteiger.fa.params" flags = f""" -extra_res_fa {params_file} -ignore_unrecognized_res 1 -mute all """ pyrosetta.distributed.init(flags) pose = pyrosetta.io.pose_from_file("expected_outputs/test_lig_0.pdb") # After ligand refinement has completed, let's take a look at the output `.pdb` file using the `py3Dmol` module: # In[9]: chE = pyrosetta.rosetta.core.select.residue_selector.ChainSelector("E") view = viewer.init(pose) view.add(viewer.setStyle()) view.add(viewer.setStyle(command=({"hetflag": True}, {"stick": {"colorscheme": "brownCarbon", "radius": 0.2}}))) view.add(viewer.setSurface(residue_selector=chE, opacity=0.7, color='white')) view.add(viewer.setHydrogenBonds()) view() # *Coding challenge:* # # Below, write an alternate version of the function `sample_ligand_interface` called `ligand_refinement_from_command_line.py` with the following modifications: # 1. Load ligands into the Rosetta database using the `pyrosetta.init()` method rather than by modification of the `ResidueTypeSet` database. # 2. Change the scorefunction to `talaris2014` # Run it from the command line (Note: the `optparse` module has already been added for you). # *Note*: Notice that the first line of the following cell uses the ipython magic command `%%file` which writes the remainder of the cell contents to the file `outputs/ligand_refinement_from_command_line.py`: # In[11]: get_ipython().run_cell_magic('file', 'outputs/ligand_refinement_from_command_line.py', 'import optparse\nimport os\nimport pyrosetta\n\n\ndef sample_ligand_interface(pdb_filename,\n partners,\n ligand_params=[""],\n jobs=1,\n job_output="ligand_output"):\n """\n Performs ligand-protein docking using Rosetta fullatom docking\n (DockingHighRes) on the ligand-protein complex in \n using the relative chain . If the ligand parameters\n (a .params file) are not defaultly loaded into PyRosetta,\n must supply the list of files including the ligand\n parameters. trajectories are performed with output\n structures named _(job#).pdb.\n \n Note: Global docking, a problem solved by the Rosetta DockingProtocol,\n requires interface detection and refinement as with other protocols,\n these tasks are split into centroid (interface detection) and\n high-resolution (interface refinement) methods without a centroid \n representation, low-resolution ligand-protein prediction is not\n possible and as such, only the high-resolution ligand-protein \n interface refinement is available. If you add a perturbation or \n randomization step, the high-resolution stages may fail. A perturbation\n step CAN make this a global docking algorithm however the rigid-body\n sampling preceding refinement requires extensive sampling to produce\n accurate results and this algorithm spends most of its effort in\n refinement (which may be useless for the predicted interface).\n \n This script performs ligand-protein interface structure prediction but does NOT\n perform global ligand-protein docking. Since there is no generic interface\n detection, the input PDB file must have the ligand placed near the interface\n that will be refined. If the DockMCMProtocol is applied to a pose\n without placement near the interface, then the refinement may:\n -waste steps sampling the wrong interface\n -fail by predicting an incorrect interface very far from the true interface\n -fail by separating the ligand from the protein (usually due to a clash)\n DockMCMProtocol does not require an independent randomization or perturbation\n step to "seed" its prediction.\n \n Additional refinement steps may increase the accuracy of the predicted\n conformation (see refinement.py). Drastic moves (large conformational changes)\n should be avoided; if they precede the protocol, the problems above may occur,\n if they succeed the protocol, the protocol results may be lost.\n """\n\n # Declare working directory and output directory\n working_dir = os.getcwd()\n output_dir = "outputs"\n if not os.path.exists(output_dir):\n os.mkdir(output_dir)\n \n # Initialize PyRosetta\n pyrosetta.init()\n \n # Create an empty pose from the desired PDB file\n pose = pyrosetta.rosetta.core.pose.Pose()\n\n # If the params list has contents, load .params files\n # Note: this method of adding ligands to the ResidueTypeSet is unnecessary\n # if you call pyrosetta.init("-extra_res_fa {}".format(ligand_params))\n if len(ligand_params) != 0 and ligand_params[0] != "":\n ligand_params = pyrosetta.Vector1(ligand_params)\n res_set = pose.conformation().modifiable_residue_type_set_for_conf()\n res_set.read_files_for_base_residue_types(ligand_params)\n pose.conformation().reset_residue_type_set_for_conf(res_set)\n \n # Load pdb_filename into pose\n pyrosetta.io.pose_from_file(pose, pdb_filename)\n\n # Setup the docking FoldTree\n # the method setup_foldtree takes an input pose and sets its\n # FoldTree to have jump 1 represent the relation between the two docking\n # partners, the jump points are the residues closest to the centers of\n # geometry for each partner with a cutpoint at the end of the chain,\n # the second argument is a string specifying the relative chain orientation\n # such as "A_B" of "LH_A", ONLY TWO BODY DOCKING is supported and the\n # partners MUST have different chain IDs and be in the same pose (the\n # same PDB), additional chains can be grouped with one of the partners,\n # the "_" character specifies which bodies are separated\n # the third argument...is currently unsupported but must be set (it is\n # supposed to specify which jumps are movable, to support multibody\n # docking...but Rosetta doesn\'t currently)\n # the FoldTrees setup by this method are for TWO BODY docking ONLY!\n dock_jump = 1 # jump number 1 is the inter-body jump\n pyrosetta.rosetta.protocols.docking.setup_foldtree(pose,\n partners,\n pyrosetta.Vector1([dock_jump]))\n\n # Create a copy of the pose for testing\n test_pose = pose.clone()\n\n # Create ScoreFunctions for centroid and fullatom docking\n scorefxn = pyrosetta.create_score_function("ligand")\n\n # Setup the high resolution (fullatom) docking protocol using DockMCMProtocol.\n docking = pyrosetta.rosetta.protocols.docking.DockMCMProtocol()\n # Many of its options and settings can be set using the setter methods.\n docking.set_scorefxn(scorefxn)\n\n # Change directory temporarily for output\n os.chdir(output_dir)\n \n # Setup the PyJobDistributor\n jd = pyrosetta.toolbox.py_jobdistributor.PyJobDistributor(job_output,\n jobs, scorefxn,\n compress=False)\n\n # Set the native pose so that the output scorefile contains the pose rmsd metric\n jd.native_pose = pose \n \n # Optional: setup a PyMOLObserver\n # pyrosetta.rosetta.protocols.moves.AddPyMOLObserver(test_pose, True)\n\n # Perform protein-ligand docking\n # counter = 0 # for pretty output to PyMOLObserver\n \n while not jd.job_complete:\n test_pose = pose.clone() # Reset test pose to original structure\n \n # counter += 1 # Change the pose name, for pretty output to PyMOLObserver\n # test_pose.pdb_info().name(job_output + \'_\' + str(counter))\n \n docking.apply(test_pose) # Perform docking and output to PyMOL\n\n # Write the decoy structure to disc\n jd.output_decoy(test_pose)\n \n os.chdir(working_dir)\n\nif __name__ == "__main__":\n \n # Declare parser object for managing input options\n parser = optparse.OptionParser()\n parser.add_option("--pdb_filename",\n dest="pdb_filename",\n help="The PDB file containing the ligand and protein to dock.")\n parser.add_option("--partners", \n dest="partners",\n default = "A_X",\n help="The relative chain partners for docking.")\n parser.add_option("--ligand_params",\n dest="ligand_params",\n help="The ligand residue parameter file.")\n parser.add_option("--jobs", \n dest="jobs",\n default="1",\n help="The number of jobs (trajectories) to perform.")\n parser.add_option("--job_output", \n dest="job_output",\n default = "ligand_output",\n help="The name preceding all output, output PDB files and scorefile.")\n (options, args) = parser.parse_args()\n \n # Catch input erros\n if not options.pdb_filename:\n parser.error("pdb_filename not given!")\n if not options.ligand_params:\n parser.error("ligand_params not given!")\n\n # Run ligand refinement protocol\n sample_ligand_interface(pdb_filename=options.pdb_filename,\n partners=options.partners,\n ligand_params=options.ligand_params.split(","),\n jobs=int(options.jobs),\n job_output=options.job_output)\n') # Run `outputs/ligand_refinement_from_command_line.py` from the command line within this Jupyter Notebook! # In[13]: pdb_filename = "inputs/test_lig.pdb" params_file = "inputs/TPA.gasteiger.fa.params" if not os.getenv("DEBUG"): get_ipython().run_line_magic('run', 'expected_outputs/ligand_refinement_from_command_line.py --pdb_filename {pdb_filename} --ligand_params {params_file} --partners E_X --jobs 1 --job_output test_lig_command_line') # *Additional challenge*: # # All of the default variables and parameters used above are specific to # the example with `inputs/test_lig.pdb` and `inputs/TPA.gasteiger.fa.params`, which is supposed to be simple, # straightforward, and speedy. Here is a more practical example: # # Kemp elimination has been a targeted reaction for enzyme design using Rosetta. # Suppose you want to better understand the active site of these enzymes and # decide to investigate using PyRosetta. # # 1. Download a copy of RCSB PDB file 3NZ1 (remove waters and any other HETATM) # 2. Extract the 5-Nitro-Benzotriazole substrate (preferably as a .mol2 file) (Note: using PyMOL, you can save the molecule using the .mol2 extension) # 3. Edit the PDB file removing waters, sulfate, and tartaric acid # 4. Produce the `.params` file for 5-Nitro-Benzotriazole (listed as chain X resdidue 3NY in the PDB file), lets assume the substrate is saved as a `.mol2` file named `3NY.mol2` # # >python molfile_to_params.py 3NY.mol2 -n 3NY # # - (optional) Test that the new PDB file is PyRosetta-friendly (it is) # 5. Make a directory containing: # - the PDB file for 3NZ1 (cleaned of waters and non-substrate HETATMs) lets name it `3NZ1.clean.pdb` here # - this sample script (technically not required, but otherwise the commands in 6. would change since `ligand_interface.py` wouldn't be here) # 6. Run the script from the commandline with appropriate arguments: # # >python ligand_refinement_from_command_line.py --pdb_filename 3NZ1.clean.pdb --partners A_X --ligand_params 3NY.params --jobs 400 --job_output 3NZ1_docking_output # # - The ligand `.params` file should be supplied using the temporary method (Method 1) described above since this script is setup to do this, the file `3NY.params` should have been successfully produced in step 4., if you permanently add `3NY.params` to the chemical database, you do not need to supply anything for the `--ligand_params` option. # - The partners option, `A_X` is PDB specific, if you chose to retain different chains (in step 3.) or otherwise change the chain IDs # in 3NZ1, make sure this string matches the desired chain interaction # - 400 trajectories is low, sampling docking conformations is difficult, typically thousands of (800-1000) trajectories are attempted # - This script features the PyMOLObserver (comment out to avoid using it), Monte Carlo simulations are not expected to produce kinetically meaningful results and as such, viewing the intermediates is only useful when understanding a protocol and rarely produces insight beyond the final output. Therefore, assure that you comment out the PyMOLObserver lines for large-scale sampling. # # 7. Wait for output, this will take a while (performing 400 trajectories of the DockingHighRes is intensive) # 8. Analyze the results by plotting ligand rmsd vs. total_score using the `matplotlib` module # In[ ]: # **Chapter contributors:** # # - Jason C. Klima (University of Washington; Lyell Immunopharma) # # < [Docking Moves in Rosetta](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/07.02-Docking-Moves-in-Rosetta.ipynb) | [Contents](toc.ipynb) | [Index](index.ipynb) | [Global Ligand Docking using `XMLObjects` Using the `ref2015.wts` Scorefunction](http://nbviewer.jupyter.org/github/RosettaCommons/PyRosetta.notebooks/blob/master/notebooks/08.01-Ligand-Docking-XMLObjects.ipynb) >