#!usr/bin/env python from __future__ import print_function ################################################################################ # A GENERAL EXPLANATION """ pose_scoring.py This script displays methods for scoring pose objects and extracting score date. The Python syntax presented here is useful for quick investigation of Rosetta scores, including numerous statistical properties. Instructions: 1) ensure that your PDB file is in the current directory 2) run the script: from commandline >python D020_Pose_scoring.py from within python/ipython [1]: run D020_Pose_scoring.py Author: Evan H. Baugh revised and motivated by Robert Schleif Last updated by Boon Uranukul, 6/9/12 References: A. Leaver-Fay et al., "ROSETTA3: An object-oriented software suite for the simulation and design of macromolecules," Methods in Enymology 487, 548-574 (2011). """ ################################################################################ # THE BASIC PROTOCOL, pose_scoring """ This sample script is setup for usage with commandline arguments, default running within a python interpreter, or for import within a python interpreter, (exposing all methods below) The method pose_scoring: 1. scores the pose using the default fullatom (fa) ScoreFunction by: a. using get_fa_scorefxn b. using create_score_function c. using create_score_function and supply patch as second argument d. using create_score_function and patching it after-wards e. creating an empty ScoreFunction and manually setting the weights 2. outputs the ScoreFunction evaluations 3. obtains the pose's per-residue total scores 4. obtains the non-zero weights of the ScoreFunction, the active ScoreTypes 5. obtains the pose's per-residue energies for all active ScoreTypes 6. obtains the hydrogen bond information 7. creates a dictionary for per-residue hydrogen bond information 8. approximates the pose's radius of gyration 9. outputs the pose information 10. outputs information on the requested residues """ import optparse # for option sorting from rosetta import * from pyrosetta import * init(extra_options = "-constant_seed") # WARNING: option '-constant_seed' is for testing only! MAKE SURE TO REMOVE IT IN PRODUCTION RUNS!!!!! import os; os.chdir('.test.output') def pose_scoring(pose, display_residues = []): """ Extracts and displays various score evaluations on the input and its including: total score score term evaluations per-residue score radius of gyration (approximate) """ # this object is contained in PyRosetta v2.0 and above # create a PyMOLMover for exporting structures directly to PyMOL pymover = PyMOLMover() # 1. score the pose using the default full-atom (fa) ScoreFunction # a ScoreFunction is essentially a list of weights indicating which # ScoreTypes are "on" and how they are scaled when summed together # ScoreTypes define what scoring methods are performed on the pose # some ScoreTypes are centroid or fullatom specific, others are not # there are (at least) 5 ways to obtain the general fullatom ScoreFunction # the 1st (a) and 2nd (b) methods are only present since PyRosetta v2.0 # # a. this method returns the hard-coded set of weights for the standard # fullatom ScoreFunction, which is currently called "talaris2013" fa_scorefxn = get_fa_scorefxn() # b. this method returns a ScoreFunction with its weights set based on # files stored in the database/scoring/weights (.wts files) full_scorefxn = create_score_function('talaris2013') # c. this method sets the weights based on 'talaris2013.wts' and then # corrects them based on 'docking.wts_patch' ws_patch_scorefxn = create_score_function('talaris2013', 'docking') # create_score_function_ws_patch('talaris2013', 'docking') # d. this method returns a ScoreFunction with its weights set by loading # weights from 'talaris2013' followed by an adjustment by setting # weights from 'docking.wts_patch' patch_scorefxn = create_score_function('talaris2013') patch_scorefxn.apply_patch_from_file('docking') # e. here an empty ScoreFunction is created and the weights are set manually scorefxn = ScoreFunction() scorefxn.set_weight(core.scoring.fa_atr, 0.800) # full-atom attractive score scorefxn.set_weight(core.scoring.fa_rep, 0.440) # full-atom repulsive score scorefxn.set_weight(core.scoring.fa_sol, 0.750) # full-atom solvation score scorefxn.set_weight(core.scoring.fa_intra_rep, 0.004) # f.a. intraresidue rep. score scorefxn.set_weight(core.scoring.fa_elec, 0.700) # full-atom electronic score scorefxn.set_weight(core.scoring.pro_close, 1.000) # proline closure scorefxn.set_weight(core.scoring.hbond_sr_bb, 1.170) # short-range hbonding scorefxn.set_weight(core.scoring.hbond_lr_bb, 1.170) # long-range hbonding scorefxn.set_weight(core.scoring.hbond_bb_sc, 1.170) # backbone-sidechain hbonding scorefxn.set_weight(core.scoring.hbond_sc, 1.100) # sidechain-sidechain hbonding scorefxn.set_weight(core.scoring.dslf_fa13, 1.000) # disulfide full-atom score scorefxn.set_weight(core.scoring.rama, 0.200) # ramachandran score scorefxn.set_weight(core.scoring.omega, 0.500) # omega torsion score scorefxn.set_weight(core.scoring.fa_dun, 0.560) # fullatom Dunbrack rotamer score scorefxn.set_weight(core.scoring.p_aa_pp, 0.320) scorefxn.set_weight(core.scoring.ref, 1.000) # reference identity score # ScoreFunction a, b, and e above have the same weights and thus return # the same score for an input pose. Likewise, c and d should return the # same scores. # 2. output the ScoreFunction evaluations #ws_patch_scorefxn(pose) # to prevent verbose output on the next line print( '='*80 ) print( 'ScoreFunction a:', fa_scorefxn(pose) ) print( 'ScoreFunction b:', full_scorefxn(pose) ) print( 'ScoreFunction c:', ws_patch_scorefxn(pose) ) print( 'ScoreFunction d:', patch_scorefxn(pose) ) print( 'ScoreFunction e:', scorefxn(pose) ) pose_score = scorefxn(pose) # 3. obtain the pose Energies object and all the residue total scores energies = pose.energies() residue_energies = [energies.residue_total_energy(i) for i in range(1, pose.total_residue() + 1)] # 4. obtain the non-zero weights of the ScoreFunction, active ScoreTypes weights = [core.scoring.ScoreType(s) for s in range(1, int(core.scoring.end_of_score_type_enumeration) + 1) if scorefxn.weights()[core.scoring.ScoreType(s)]] # 5. obtain all the pose energies using the weights list # Energies.residue_total_energies returns an EMapVector of the unweighted # score values, here they are multiplied by their weights # remember when performing individual investigation, these are the raw # unweighted score! residue_weighted_energies_matrix = [ [energies.residue_total_energies(i)[w] * scorefxn.weights()[w] for i in range(1, pose.total_residue() + 1)] for w in weights] # Unfortunately, hydrogen bonding scores are NOT stored in the structure # returned by Energies.residue_total_energies # 6. hydrogen bonding information must be extracted separately pose_hbonds = core.scoring.hbonds.HBondSet() core.scoring.hbonds.fill_hbond_set( pose , False , pose_hbonds ) # 7. create a dictionary with the pose residue numbers as keys and the # residue hydrogen bonding information as values # hydrogen bonding information is stored as test in the form: # (donor residue) (donor atom) => (acceptor residue) (accecptor atom) |score hbond_dictionary = {} for residue in range(1, pose.total_residue() + 1): hbond_text = '' for hbond in range(1, pose_hbonds.nhbonds() + 1): hbond = pose_hbonds.hbond(hbond) acceptor_residue = hbond.acc_res() donor_residue = hbond.don_res() if residue == acceptor_residue or residue == donor_residue: hbond_text += str(donor_residue).ljust(4) + ' ' + \ str(pose.residue(donor_residue).atom_name(\ hbond.don_hatm() )).strip().ljust(4) + \ ' => ' + str(acceptor_residue).ljust(4) + ' ' + \ str(pose.residue(acceptor_residue).atom_name(\ hbond.acc_atm() )).strip().ljust(4) + \ ' |score: ' + str(hbond.energy()) + '\n' hbond_dictionary[residue] = hbond_text # 8. approximate the radius of gyration # there is an rg ScoreType in PyRosetta for performing this computation so # a ScoreFunction can be made as an Rg calculator, likewise you can # bias a structure towards more or less compact structures using this # NOTE: this is NOT the true radius of gyration for a protein, it uses # the Residue.nbr_atom coordinates to save time, this nbr_atom is the # Residue atom closest to the Residue's center of geometry RadG = ScoreFunction() RadG.set_weight(core.scoring.rg , 1) pose_radg = RadG(pose) # 9. output the pose information # the information is not expressed sequentially as it is produced because # several PyRosetta objects and methods output intermediate information # to screen, this would produce and unattractive output print( '='*80 ) print( 'Loaded from' , pose.pdb_info().name() ) print( pose.total_residue() , 'residues' ) print( 'Radius of Gyration ~' , pose_radg ) print( 'Total Rosetta Score:' , pose_score ) scorefxn.show(pose) # this object is contained in PyRosetta v2.0 and above pymover.apply(pose) pymover.send_energy(pose) # 10. output information on the requested residues for i in display_residues: print( '='*80 ) print( 'Pose numbered Residue' , i ) print( 'Total Residue Score:' , residue_energies[i-1] ) print( 'Score Breakdown:\n' + '-'*45 ) # loop over the weights, extract the scores from the matrix for w in range(len(weights)): print( '\t' + core.scoring.name_from_score_type(weights[w]).ljust(20) + ':\t' ,\ residue_weighted_energies_matrix[w][i-1] ) print( '-'*45 ) # print the hydrogen bond information print( 'Hydrogen bonds involving Residue ' + str(i) + ':' ) print( hbond_dictionary[i][:-1] ) print( '='*80 ) ################################################################################ # INTERPRETING RESULTS """ This sample script is strictly to provide example syntax, it does not perform any significant protocol and merely extracts data from a Pose and from scoring information. The sample method and PDB file presented with this sample script work without error. Since Rosetta is not entirely robust to all PDB files, several problems can occur if the methods are modified or used with new PDB files. The most likely problems could be: -if the input PDB file cannot be loaded into PyRosetta -if the residues to specifically investigate do not exist -if the input pose is not fullatom """ ################################################################################ # COMMANDLINE COMPATIBILITY # everything below is added to provide commandline usage, # the available options are specified below # this method: # 1. defines the available options # 2. loads in the commandline or default values # 3. calls pose_scoring with these values # parser object for managing input options # all defaults are for the example using "test_in.pdb" with reduced # cycles/jobs to provide results quickly parser = optparse.OptionParser() parser.add_option('--pdb_filename', dest = 'pdb_filename', default = '../test/data/test_in.pdb', # default example PDB help = 'the PDB file containing the loop to remodel') parser.add_option('--residues', dest = 'residues', default = '', # default to the median residue number help = 'the (pose numbered) residues to inspect carefully') (options,args) = parser.parse_args() # PDB file option pdb_filename = options.pdb_filename # create a pose from the desired PDB file # -create an empty Pose object pose = Pose() # -load the data from pdb_file into the pose pose_from_file(pose, pdb_filename) # default to the median residue number residues = options.residues if not options.residues: residues = [int(pose.total_residue()/2)] elif options.residues == 'all': # accept the word 'all' in place of a residue list residues = range(1, pose.total_residue() + 1) else: # please provide the residues of interest as , delimited residues = [int(r) for r in options.residues.split(',')] pose_scoring(pose, residues) ################################################################################ # ALTERNATE SCENARIOS ################################# # Obtaining and Editing PDB files """ PDB files are the keys to structural Bioinformatics and structure prediction. PDB files are most easily obtained from the RCSB website but may contain variability which makes them incompatible with PyRosetta. To obtain a new PDB file: 1) locate your protein of interest at http://www.pdb.prg/ 2) download the PDB file, using a browser this includes: a. clicking "Download Files" on the upper right b. clicking "PDB File (text)", the second option 3) Manually edit the file to remove lines which may hinder PyRosetta (use PyMOL, grep, awk, Python, Biopython, or whatever technique you prefer) Methods for downloading and generically "cleaning" PDB files should accompany future PyRosetta releases. """