#!usr/bin/env python from __future__ import print_function ################################################################################ # A GENERAL EXPLANATION """ packer_task.py This sample script explains the PackerTask object frequently changed in PyRosetta protocols. It demonstrates how to set up a proper PackerTask for basic applications and applies this for sidechain repacking and design. Two additional methods are provided for generating resfiles for a PackerTask. Sidechain packing (or just packing) refers to the concept of protein sidechain conformations attaining a physically stable structure dependent upon all residues involved (such as the hydrophobic core). Van der Waals forces provide the constraints defining what sets of sidechain conformations are or are not sufficiently "packed" (hydrophobic separation). Since Rosetta sidechain conformation selection uses a fixed backbone, at least in any individual step, the process of selecting low scoring sidechain conformations (those obeying Van der Waals constraints) is termed "packing" or "repacking". Instructions: 1) ensure that your PDB file is in the current directory 2) run the script: from commandline >python D050_Packer_task.py from within python/ipython [1]: run D050_Packer_task.py Author: Evan H. Baugh revised and motivated by Robert Schleif Last updated by Boon Uranukul, 6/9/12 References: R. L. Dunbrack Jr. and F. Cohen, "Bayesian statistical analysis of protein side-chain rotamer preferences," Prot. Science 6 1661-1681 (1997). """ ################################################################################ # THE BASIC PROTOCOL, packer_task """ This sample script is setup for usage with commandline arguments, default running within a python interpreter, or for import within a python interpreter, (exposing all methods below) The method packer_task: 1. sets up a PackerTask for performing sidechain repacking 2. performs packing -display the change in score -write the packed pose to a PDB file (packed.pdb) 3. sets up a PackerTask for design on a set of residues 4. performs design -display the change in score -display the change in sequence -write the designed pose to a PDB file (designed.pdb) """ import optparse # for option sorting import rosetta.core.pack.task # for using resfiles from rosetta import * from pyrosetta import * init(extra_options = "-constant_seed") # WARNING: option '-constant_seed' is for testing only! MAKE SURE TO REMOVE IT IN PRODUCTION RUNS!!!!! import os; os.chdir('.test.output') def packer_task(pose, PDB_out = False): """ Demonstrates the syntax necessary for basic usage of the PackerTask object performs demonstrative sidechain packing and selected design using and writes structures to PDB files if is True """ # create a copy of the pose test_pose = Pose() test_pose.assign(pose) # this object is contained in PyRosetta v2.0 and above pymover = PyMOLMover() # create a standard ScoreFunction scorefxn = get_fa_scorefxn() # create_score_function_ws_patch('standard', 'score12') ############ # PackerTask # a PackerTask encodes preferences and options for sidechain packing, an # effective Rosetta methodology for changing sidechain conformations, and # design (mutation) # a PackerTask stores information on a per-residue basis # each residue may be packed or designed # PackerTasks are handled slightly differently in PyRosetta ####pose_packer = PackerTask() # this line will not work properly pose_packer = standard_packer_task(test_pose) # the pose argument tells the PackerTask how large it should be # sidechain packing "optimizes" a pose's sidechain conformations by cycling # through (Dunbrack) rotamers (sets of chi angles) at a specific residue # and selecting the rotamer which achieves the lowest score, # enumerating all possibilities for all sidechains simultaneously is # impractically expensive so the residues to be packed are individually # optimized in a "random" order # packing options include: # -"freezing" the residue, preventing it from changing conformation # -including the original sidechain conformation when determining the # lowest scoring conformation pose_packer.restrict_to_repacking() # turns off design pose_packer.or_include_current(True) # considers original conformation print( pose_packer ) # packing and design can be performed by a PackRotamersMover, it requires # a ScoreFunction, for optimizing the sidechains and a PackerTask, # setting the packing and design options packmover = protocols.simple_moves.PackRotamersMover(scorefxn, pose_packer) scorefxn(pose) # to prevent verbose output on the next line print( '\nPre packing score:', scorefxn(test_pose) ) test_pose.pdb_info().name('original') # for PyMOLMover pymover.apply(test_pose) packmover.apply(test_pose) print( 'Post packing score:', scorefxn(test_pose) ) test_pose.pdb_info().name('packed') # for PyMOLMover pymover.apply(test_pose) if PDB_out: test_pose.dump_pdb('packed.pdb') # since the PackerTask specifies how the sidechains change, it has been # extended to include sidechain constitutional changes allowing # protein design, this method of design is very similar to sidechain # packing; all rotamers of the possible mutants at a single residue # are considered and the lowest scoring conformation is selected # design options include: # -allow all amino acids # -allow all amino acids except cysteine # -allow specific amino acids # -prevent specific amino acids # -allow polar amino acids only # -prevent polar amino acids # -allow only the native amino acid # the myriad of packing and design options can be set manually or, more # commonly, using a specific file format known as a resfile # resfile syntax is explained at: # http://www.rosettacommons.org/manuals/archive/rosetta3.1_user_guide/file_resfiles.html # manually setting deign options is tedious, the methods below are handy # for creating resfiles # mutate the "middle" residues center = test_pose.total_residue() // 2 specific_design = {} for i in range(center - 2, center + 3): specific_design[i] = 'ALLAA' # write a resfile to perform these mutations generate_resfile_from_pose(test_pose, 'sample_resfile', False, specific = specific_design) # setup the design PackerTask, use the generated resfile pose_design = standard_packer_task(test_pose) rosetta.core.pack.task.parse_resfile(test_pose, pose_design, 'sample_resfile' ) print( pose_design ) # prepare a new structure test_pose.assign(pose) # perform design designmover = protocols.simple_moves.PackRotamersMover(scorefxn, pose_design) print( '\nDesign with all proteogenic amino acids at (pose numbered)\ residues', center - 2, 'to', center + 2 ) print( 'Pre-design score:', scorefxn(test_pose) ) print( 'Pre-design sequence: ...' + \ test_pose.sequence()[center - 5:center + 4] + '...' ) designmover.apply(test_pose) # perform design print( '\nPost-design score:', scorefxn(test_pose) ) print( 'Post-design sequence: ...' + \ test_pose.sequence()[center - 5:center + 4] + '...' ) test_pose.pdb_info().name( 'designed' ) # for PyMOLMover pymover.apply(test_pose) if PDB_out: test_pose.dump_pdb('designed.pdb') ################################################################################ # PERIPHERAL METHODS # These methods are not called by the main protocol but address a # relevant topic, objects which work intimately with pose. # writes a specified resfile for the user, defaults to packing w/ input SC def generate_resfile_from_pose(pose, resfilename, pack = True, design = False, input_sc = True, freeze = [], specific = {}): """ Writes a resfile for named = True allows packing by default = True allows design using all amino acids by default = True allows usage of the original side chain conformation is an optional list of (pose) residue numbers to exclude (preserve the side chain conformations of these residues) is an optional dictionary with (pose) residue numbers as keys and resfile keywords as corresponding values (for setting individual residue options, it may be easier to add these numbers to freeze and edit the resfile manually) example: generate_resfile_from_pose(pose,'1YY8.resfile') See also: Pose PackRotamersMover TaskFactory """ # determine the header, default settings header = '' if pack: if not design: header += 'NATAA\n' else: header += 'ALLAA\n# ALLAA will NOT work on bridged Cysteines\n' else: header += 'NATRO\n' if input_sc: header += 'USE_INPUT_SC\n' to_write = header + 'start\n' # add list to dict for i in freeze: specific[i] = 'NATRO' # is a dictionary with keys() as pose resi numbers # and values as resfile keywords (PIKAA # use PDBInfo object to write the resfile info = pose.pdb_info() # pose_from_sequence returns empty PDBInfo, Pose() makes NULL if info and info.nres(): for i in specific.keys(): num = pose.pdb_info().number(i) chain = pose.pdb_info().chain(i) to_write += str(num).rjust(4) + str(chain).rjust(3) + ' ' + specific[i] + ' \n' else: for i in specific.keys(): num = i chain = ' ' to_write += str(num).rjust(4) + str(chain).rjust(3) + ' ' + specific[i] + ' \n' f = open(resfilename,'w') f.write(to_write) f.close() # this is silly, as with cleanCRYS, later implement a Biopy PDBParser method def generate_resfile_from_pdb(pdbfilename, resfilename, pack = True, design = False, input_sc = True, freeze = [], specific = {}): """ Writes a resfile for the PDB file named = True allows packing by default = True allows design using all amino acids by default = True allows usage of the original side chain conformation is an optional list of (pose) residue numbers to exclude (preserve the side chain conformations of these residues) is an optional dictionary with (pose) residue numbers as keys and resfile keywords as corresponding values (for setting individual residue options, it may be easier to add these numbers to freeze and edit the resfile manually) example: generate_resfile_from_pdb('1YY8.pdb','1YY8.resfile') See also: generate_resfile_from_pose Pose PackRotamersMover TaskFactory """ p = pose_from_file(pdbfilename) generate_resfile_from_pose(p, resfilename, pack,design, input_sc, freeze, specific) ################################################################################ # INTERPRETING RESULTS """ The PDB files output demonstrating the PackerTask for packing and design should be compared to the original PDB file (test_in.pdb for the sample case). Notice that packed.pdb has the original backbone conformation but many of the sidechain conformations differ. For test_in.pdb, this packing step provides the quickest visible change in the score value. Notice that designed.pdb has the original backbone conformation but the "middle" residues have different amino acids than the original sequence. If you use a PDB file other than test_in.pdb, these amino acids may preserve their identity, it depends on their score evaluation in PyRosetta and it there is an accessible lower score rotamer. Since a PackerTask can set options to due packing on some residues and design on others simultaneously, it is IMPERATIVE that you isolate your desired move. For example, if the design performed in the sample script above also repacked several other residues, would the score change be due to repacking or design? Would the amino acids "mutated" have their lowest scoring conformation? This confounding possibility is why structures must be made Rosetta friendly before applying a protocol. Many Rosetta "moves" may lower the score due to trivial structural changes instead of the protocol of interest. In some cases, design may change the rotamer, but not the amino acid identity, indicating that the new rotamer has the lowest score of all available rotamers. """ ################################################################################ # COMMANDLINE COMPATIBILITY # everything below is added to provide commandline usage, # the available options are specified below # this method: # 1. defines the available options # 2. loads in the commandline or default values # 3. calls packer_task with these values # parser object for managing input options # all defaults are for the example using "test_in.pdb" parser = optparse.OptionParser() parser.add_option('--pdb_filename', dest = 'pdb_filename', default = '../test/data/test_in.pdb', # default example PDB help = 'the PDB file containing the loop to remodel' ) parser.add_option('--PDB_out', dest = 'PDB_out', default = '', # default to False help = 'flag for dumping structures to PDB files' ) (options,args) = parser.parse_args() # PDB file option pdb_filename = options.pdb_filename # create a pose from the desired PDB file # -create an empty Pose object pose = Pose() # -load the data from pdb_file into the pose pose_from_file(pose, pdb_filename) # PDB_out flag PDB_out = bool(options.PDB_out) packer_task(pose, PDB_out)