# 初始化
from pyrosetta import pose_from_pdb, init
init()
PyRosetta-4 2021 [Rosetta PyRosetta4.conda.mac.cxx11thread.serialization.python37.Release 2021.31+release.c7009b3115c22daa9efe2805d9d1ebba08426a54 2021-08-07T10:04:12] retrieved from: http://www.pyrosetta.org (C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team. core.init: {0} Checking for fconfig files in pwd and ./rosetta/flags core.init: {0} Rosetta version: PyRosetta4.conda.mac.cxx11thread.serialization.python37.Release r292 2021.31+release.c7009b3115c c7009b3115c22daa9efe2805d9d1ebba08426a54 http://www.pyrosetta.org 2021-08-07T10:04:12 core.init: {0} command: PyRosetta -ex1 -ex2aro -database /opt/miniconda3/lib/python3.7/site-packages/pyrosetta/database basic.random.init_random_generator: {0} 'RNG device' seed mode, using '/dev/urandom', seed=1792538074 seed_offset=0 real_seed=1792538074 thread_index=0 basic.random.init_random_generator: {0} RandomGenerator:init: Normal mode, seed=1792538074 RG_type=mt19937
SimpleMetrics是2018发布一个可以快速地提取和分析结构特征/性质的组件。
SimpleMetrics的计算内容:
SimpleMetrics根据计算方式分类:
SimpleMetrics计算器的选择
首先需要弄清楚,希望得到什么特征的计算。比如我想计算残基的可及溶剂表面积(SASA)时,我应该考虑我想计算的是每个残基的SASA,还是整体的SASA。以此来选定特定的SimpleMetrics计算器。以下用PerResidueSasaMetric作为demo引出用法和一些需要特别注意的细节。
注意1: 大部分的SimpleMetrics都支持使用ResidueSelector来指定或限定计算的范围。
注意2: 大部分的SimpleMetrics都支持使用set_output_as_pdb_nums来指定输出的编号使用PDB还是Pose编号。
from pyrosetta.rosetta.core.select.residue_selector import ResidueIndexSelector
from pyrosetta.rosetta.core.simple_metrics.per_residue_metrics import PerResidueSasaMetric
# 读取pose
pose = pose_from_pdb('./data/6LZ9_H_L.pdb')
# 定义SimpleMetrics计算器
per_residue_sasa = PerResidueSasaMetric()
sasa_sel = ResidueIndexSelector('16L-25L') # 比如计算L链上16-25号残基每个残基的sasa值
per_residue_sasa.set_residue_selector(sasa_sel)
per_residue_sasa.set_output_as_pdb_nums(True) # 输出时
core.chemical.GlobalResidueTypeSet: {0} Finished initializing fa_standard residue type set. Created 983 residue types core.chemical.GlobalResidueTypeSet: {0} Total time to initialize 0.683192 seconds. core.import_pose.import_pose: {0} File './data/6LZ9_H_L.pdb' automatically determined to be of type PDB core.conformation.Conformation: {0} Found disulfide between residues 21 94 core.conformation.Conformation: {0} current variant for 21 CYS core.conformation.Conformation: {0} current variant for 94 CYS core.conformation.Conformation: {0} current variant for 21 CYD core.conformation.Conformation: {0} current variant for 94 CYD core.conformation.Conformation: {0} Found disulfide between residues 141 206 core.conformation.Conformation: {0} current variant for 141 CYS core.conformation.Conformation: {0} current variant for 206 CYS core.conformation.Conformation: {0} current variant for 141 CYD core.conformation.Conformation: {0} current variant for 206 CYD
每个Metrics都有custom_type, 也就是Metrics的名称,是可独立命名的。 另外计算器的计算结果索引需要有一个“唯一”的索引号。默认来说,每种计算器都有自己的索引名。比如PerResidueSasaMetric计算器,他的索引名即为“res_sasa”。但是这个索引名是可以在apply时被重新自定义的:
per_residue_sasa.apply(pose, prefix='per_', suffix='') # 使用前缀"per_", 后缀为“”, 此时索引名为“per_res_sasa”
以上,metrics的计算已经完成。 特别注意: 每次计算的索引号不得有重复,否则直接报错!
# 再跑一次会怎么样?:
per_residue_sasa.apply(pose, prefix='per_', suffix='') # 使用前缀"per_", 后缀为“”, 此时索引名为“per_res_sasa”
--------------------------------------------------------------------------- RuntimeError Traceback (most recent call last) <ipython-input-4-6ebfee108fc1> in <module> 1 # 再跑一次会怎么样?: ----> 2 per_residue_sasa.apply(pose, prefix='per_', suffix='') # 使用前缀"per_", 后缀为“”, 此时索引名为“per_res_sasa” RuntimeError: File: /Volumes/MacintoshHD3/benchmark/W.fujii.release/rosetta.Fujii.release/_commits_/main/source/src/core/simple_metrics/util.cc:282 SimpleMetric error! The data of type PerResidueSasaMetric with data output tag per_res_sasa already exists! Please use the prefix/suffix settings or set a custom_type for the metric. See the documentation for more: https://www.rosettacommons.org/docs/latest/scripting_documentation/RosettaScripts/SimpleMetrics/SimpleMetrics#effective-use-of-simplemetrics. Note: If this was intentional, please set the override option to true in RunSimpleMetricsMover
# 加个后缀名:
per_residue_sasa.apply(pose, prefix='per_', suffix='_suf') # 使用前缀"per_", 后缀为“”, 此时索引名为“per_res_sasa_suf”
SimpleMetricData数据的储存与提取
在pose中,数据会储存在SimpleMetricData中,因此我们需要调用 get_sm_data(pose)来获取pose中的sm cache data,然后在通过SimpleMetricData来获取具体类型的值,主要有以下几类:
注意: 提取pose中Metric的data.key为字典的key值。但是我们需要知道每种metric的key值名称,如rmsd, sasa, residue_count等,直接使用切片将其取出,如不适用切片,默认返回所有值。
from pyrosetta.rosetta.core.simple_metrics import get_sm_data
sm_data = get_sm_data(pose)
根据分类,PerResidueSasaMetric的计算结果属于real_metric_data:
per_real_metric = sm_data.get_per_residue_real_metric_data()
print(per_real_metric)
map_std_string_std_map_unsigned_long_double_std_less_unsigned_long_std_allocator_std_pair_const_unsigned_long_double_t{per_res_sasa: {134:36.1789, 135:40.0819, 136:158.19, 137:5.63578, 138:53.4908, 139:1.02469, 140:44.6698, 141:1.44319, 142:146.688, 143:5.70025, }, per_res_sasa_suf: {134:36.1789, 135:40.0819, 136:158.19, 137:5.63578, 138:53.4908, 139:1.02469, 140:44.6698, 141:1.44319, 142:146.688, 143:5.70025, }}
计算的结果被存在一个叫map_std_string_std_map_unsigned_long_double_std_less_unsigned_long_std_allocator_std_pair_const_unsigned_long_double_t类型(真长。。)的字典中。
# 通过索引名直接获取数据:
per_real_metric['per_res_sasa']
map_unsigned_long_double{134: 36.1789, 135: 40.0819, 136: 158.19, 137: 5.63578, 138: 53.4908, 139: 1.02469, 140: 44.6698, 141: 1.44319, 142: 146.688, 143: 5.70025}
# 获取134号残基的SASA值。
per_real_metric['per_res_sasa'][134]
36.17887576545817
为了方便大家加深理解,本章节再举几个相关的例子进行说明:
#加载与SimpleMetrics调用有关的class.
from pyrosetta import pose_from_pdb, init
from pyrosetta.rosetta.core.simple_metrics.metrics import *
from pyrosetta.rosetta.core.simple_metrics.per_residue_metrics import *
from pyrosetta.rosetta.core.simple_metrics.composite_metrics import *
from pyrosetta.rosetta.core.simple_metrics import *
init()
#通过第五章ResidueSelector的学习,相信对肝细胞生长因子抗体(PDB:6LZ9)已经很熟悉了,这里依然采用这个抗体,并从PDB中读入生成pose对象。
pose = pose_from_pdb('./data/6LZ9_H_L.pdb')
PyRosetta-4 2021 [Rosetta PyRosetta4.conda.mac.cxx11thread.serialization.python37.Release 2021.31+release.c7009b3115c22daa9efe2805d9d1ebba08426a54 2021-08-07T10:04:12] retrieved from: http://www.pyrosetta.org (C) Copyright Rosetta Commons Member Institutions. Created in JHU by Sergey Lyskov and PyRosetta Team. core.init: {0} Checking for fconfig files in pwd and ./rosetta/flags core.init: {0} Rosetta version: PyRosetta4.conda.mac.cxx11thread.serialization.python37.Release r292 2021.31+release.c7009b3115c c7009b3115c22daa9efe2805d9d1ebba08426a54 http://www.pyrosetta.org 2021-08-07T10:04:12 core.init: {0} command: PyRosetta -ex1 -ex2aro -database /opt/miniconda3/lib/python3.7/site-packages/pyrosetta/database basic.random.init_random_generator: {0} 'RNG device' seed mode, using '/dev/urandom', seed=-2065888904 seed_offset=0 real_seed=-2065888904 thread_index=0 basic.random.init_random_generator: {0} RandomGenerator:init: Normal mode, seed=-2065888904 RG_type=mt19937 core.import_pose.import_pose: {0} File './data/6LZ9_H_L.pdb' automatically determined to be of type PDB core.conformation.Conformation: {0} Found disulfide between residues 21 94 core.conformation.Conformation: {0} current variant for 21 CYS core.conformation.Conformation: {0} current variant for 94 CYS core.conformation.Conformation: {0} current variant for 21 CYD core.conformation.Conformation: {0} current variant for 94 CYD core.conformation.Conformation: {0} Found disulfide between residues 141 206 core.conformation.Conformation: {0} current variant for 141 CYS core.conformation.Conformation: {0} current variant for 206 CYS core.conformation.Conformation: {0} current variant for 141 CYD core.conformation.Conformation: {0} current variant for 206 CYD
RealMetrics可以很方便地计算选择残基的某一物理量,并通过get_real_metric_data()返回实数。
这个Metric用于计算被ResidueSelector选择残基的数量。
# 比如选择抗体的重链,PDB链号为"H":
from pyrosetta.rosetta.core.select.residue_selector import ChainSelector
select_heavy_chain = ChainSelector('H')
#下边将计算选择残基的数量,这里我们将前边提到的SimpleMetrics的名称和SimpleMetrics的前缀/后缀也分别简要说明。
# expample1 SimpleMetrics的名称
num = SelectedResidueCountMetric()
name = 'H_chian'
num.set_custom_type(name) #设置名称
num.set_residue_selector(select_heavy_chain)
num.apply(pose)
from pyrosetta.rosetta.core.simple_metrics import get_sm_data
sm_data = get_sm_data(pose)
real_metric = sm_data.get_real_metric_data()
real_metric['H_chian_selection_count']
118.0
# expample2 SimpleMetrics的前缀/后缀
num = SelectedResidueCountMetric()
num.set_residue_selector(select_heavy_chain)
prefix = 'H_chain_'
num.apply(pose, prefix)
real_metric = sm_data.get_real_metric_data()
real_metric['H_chian_selection_count']
118.0
再次提醒说明
通过比较可以发现,SimpleMetrics的名称和SimpleMetrics的前缀/后缀在apply时会有所差别,当然名称和前缀/后缀也可以同时使用。
每次运行这些名字都会保存,重复运行同一个Metric要指定不同的名字,否则会报错。
# 再来看抗体的残基基本信息:
print(pose.pdb_info())
PDB file name: ./data/6LZ9_H_L.pdb Pose Range Chain PDB Range | #Residues #Atoms 0001 -- 0081 H 0002 -- 0082 | 0081 residues; 01283 atoms 0082 -- 0082 H 0082A -- 0082A | 0001 residues; 00011 atoms 0083 -- 0083 H 0082B -- 0082B | 0001 residues; 00011 atoms 0084 -- 0084 H 0082C -- 0082C | 0001 residues; 00019 atoms 0085 -- 0102 H 0083 -- 0100 | 0018 residues; 00271 atoms 0103 -- 0103 H 0100A -- 0100A | 0001 residues; 00010 atoms 0104 -- 0104 H 0100B -- 0100B | 0001 residues; 00021 atoms 0105 -- 0105 H 0100C -- 0100C | 0001 residues; 00021 atoms 0106 -- 0106 H 0100D -- 0100D | 0001 residues; 00010 atoms 0107 -- 0107 H 0100E -- 0100E | 0001 residues; 00017 atoms 0108 -- 0118 H 0101 -- 0111 | 0011 residues; 00160 atoms 0119 -- 0223 L 0001 -- 0105 | 0105 residues; 01600 atoms TOTAL | 0223 residues; 03434 atoms
结果解读
抗体中含有H和L两条链,分别为1-118和119-223。SelectedResidueCountMetric计算结果与此一致。
计算选择残基的溶剂可及表面积。
# 比如计算L链的溶剂可及表面积。
select_light_chain = ChainSelector('L')
sasa = SasaMetric(select_light_chain)
prefix = 'L_chain_'
sasa.apply(pose,prefix)
real_metric = sm_data.get_real_metric_data()
real_metric['L_chain_sasa']
4854.614232155741
这个Metrics计算得到的结果通过get_string_metric_data来返回字符串。
返回选择残基的poseID或PDBID, 可以快速获取ResidueSelector中到底都选择了哪些残基的列表!
#指定一段残基来给出相应的ID
from pyrosetta.rosetta.core.select.residue_selector import ResidueIndexSelector
range_selector = ResidueIndexSelector('18H-24H')
selected = range_selector.apply(pose)
selected_index = SelectedResiduesMetric(range_selector)
selected_index.set_output_in_rosetta_num(True) # True: poseID, False: PDBID
prefix = 'ID_'
selected_index.apply(pose,prefix)
from pyrosetta.rosetta.core.simple_metrics import get_sm_data
sm_data = get_sm_data(pose)
string_metric = sm_data.get_string_metric_data()
string_metric['ID_selection']
'17,18,19,20,21,22,23'
#验证SelectedResiduesMetric返回值是否正确:
index_list = [index+1 for index, i in enumerate(selected) if i == 1]
print(index_list)
[17, 18, 19, 20, 21, 22, 23]
返回pymol命令: 粘贴复制后可以用于在Pymol中选择selector中的残基。
这个Metric在第五章ResidueSelectors中得到了广泛使用,可以一目了然的知道选择器选择了哪些残基。
index_selector = ResidueIndexSelector('41H,43H,45H')
pymol_selected = SelectedResiduesPyMOLMetric()
pymol_selected.set_residue_selector(index_selector)
prefix = 'index_select_'
pymol_selected.apply(pose,prefix)
string_metric = sm_data.get_string_metric_data()
string_metric['index_select_pymol_selection']
'select rosetta_sele, (chain H and resid 41,43,45)'
这个Metrics与RealMetrics功能相同,但计算得到的是每个残基的物理量。
所有的PerResidueRealMetric都可以指定输出类型为PDBID或者是poseID 利用选项output_as_pdb_nums即可。
计算每个残基的总能(或某一个能量类型如fa_atr, fa_rep, fa_elec等),返回能量的值是经过权重。通过分解总能得到每个氨基酸的能量。
这个计算器有两种使用的模式:
#expample1 计算H链11到20号残基的fa_rep类型能量
range_sel = ResidueIndexSelector('11H-20H')
from pyrosetta.rosetta.core.scoring import fa_atr, fa_rep, fa_elec
from pyrosetta import create_score_function
per_residue_energy = PerResidueEnergyMetric()
per_residue_energy.set_residue_selector(range_sel)
per_residue_energy.set_scoretype(fa_rep)
scorefxn = create_score_function('ref2015')
per_residue_energy.set_scorefunction(scorefxn)
per_residue_energy.apply(pose,'per_')
core.scoring.etable: {0} Starting energy table calculation core.scoring.etable: {0} smooth_etable: changing atr/rep split to bottom of energy well core.scoring.etable: {0} smooth_etable: spline smoothing lj etables (maxdis = 6) core.scoring.etable: {0} smooth_etable: spline smoothing solvation etables (max_dis = 6) core.scoring.etable: {0} Finished calculating energy tables. basic.io.database: {0} Database file opened: scoring/score_functions/hbonds/ref2015_params/HBPoly1D.csv basic.io.database: {0} Database file opened: scoring/score_functions/hbonds/ref2015_params/HBFadeIntervals.csv basic.io.database: {0} Database file opened: scoring/score_functions/hbonds/ref2015_params/HBEval.csv basic.io.database: {0} Database file opened: scoring/score_functions/hbonds/ref2015_params/DonStrength.csv basic.io.database: {0} Database file opened: scoring/score_functions/hbonds/ref2015_params/AccStrength.csv basic.io.database: {0} Database file opened: scoring/score_functions/rama/fd/all.ramaProb basic.io.database: {0} Database file opened: scoring/score_functions/rama/fd/prepro.ramaProb basic.io.database: {0} Database file opened: scoring/score_functions/omega/omega_ppdep.all.txt basic.io.database: {0} Database file opened: scoring/score_functions/omega/omega_ppdep.gly.txt basic.io.database: {0} Database file opened: scoring/score_functions/omega/omega_ppdep.pro.txt basic.io.database: {0} Database file opened: scoring/score_functions/omega/omega_ppdep.valile.txt basic.io.database: {0} Database file opened: scoring/score_functions/P_AA_pp/P_AA basic.io.database: {0} Database file opened: scoring/score_functions/P_AA_pp/P_AA_n core.scoring.P_AA: {0} shapovalov_lib::shap_p_aa_pp_smooth_level of 1( aka low_smooth ) got activated. basic.io.database: {0} Database file opened: scoring/score_functions/P_AA_pp/shapovalov/10deg/kappa131/a20.prop basic.io.database: {0} Database file opened: scoring/score_functions/elec_cp_reps.dat core.scoring.elec.util: {0} Read 40 countpair representative atoms core.pack.dunbrack.RotamerLibrary: {0} shapovalov_lib_fixes_enable option is true. core.pack.dunbrack.RotamerLibrary: {0} shapovalov_lib::shap_dun10_smooth_level of 1( aka lowest_smooth ) got activated. core.pack.dunbrack.RotamerLibrary: {0} Binary rotamer library selected: /opt/miniconda3/lib/python3.7/site-packages/pyrosetta/database/rotamer/shapovalov/StpDwn_0-0-0/Dunbrack10.lib.bin core.pack.dunbrack.RotamerLibrary: {0} Using Dunbrack library binary file '/opt/miniconda3/lib/python3.7/site-packages/pyrosetta/database/rotamer/shapovalov/StpDwn_0-0-0/Dunbrack10.lib.bin'. core.pack.dunbrack.RotamerLibrary: {0} Dunbrack 2010 library took 0.17968 seconds to load from binary
per_real_metric = sm_data.get_per_residue_real_metric_data()
per_real_metric['per_res_energy']
map_unsigned_long_double{10: 0.141929, 11: 0.917974, 12: 1.55712, 13: 0.765261, 14: 0.137776, 15: 2.04166, 16: 0.549227, 17: 3.96559, 18: 0.523508, 19: 3.02056}
#expample2 计算两个pose间每个残基的能量差,这里需要引入另一个pose
com_pose = pose_from_pdb('./data/3K2U_H_L.pdb')
#3K2U与6LZ9的结构是非常相似的
core.import_pose.import_pose: {0} File './data/3K2U_H_L.pdb' automatically determined to be of type PDB core.conformation.Conformation: {0} [ WARNING ] missing heavyatom: OXT on residue THR:CtermProteinFull 121 core.conformation.Conformation: {0} [ WARNING ] missing heavyatom: OXT on residue ARG:CtermProteinFull 229 core.conformation.Conformation: {0} Found disulfide between residues 22 96 core.conformation.Conformation: {0} current variant for 22 CYS core.conformation.Conformation: {0} current variant for 96 CYS core.conformation.Conformation: {0} current variant for 22 CYD core.conformation.Conformation: {0} current variant for 96 CYD core.conformation.Conformation: {0} Found disulfide between residues 144 209 core.conformation.Conformation: {0} current variant for 144 CYS core.conformation.Conformation: {0} current variant for 209 CYS core.conformation.Conformation: {0} current variant for 144 CYD core.conformation.Conformation: {0} current variant for 209 CYD
per_residue_energy = PerResidueEnergyMetric()
per_residue_energy.set_residue_selector(range_sel)
per_residue_energy.set_scoretype(fa_rep)
scorefxn = create_score_function('ref2015')
per_residue_energy.set_scorefunction(scorefxn)
per_residue_energy.set_comparison_pose(com_pose) # <-主要变化的设置在这里;
per_residue_energy.apply(pose,'com_pre_')
per_real_metric = sm_data.get_per_residue_real_metric_data()
per_real_metric['com_pre_res_energy']
map_unsigned_long_double{10: 0.0422257, 11: 0.0217457, 12: -1.09715, 13: -0.255306, 14: -1.16531, 15: 1.88455, 16: 0.385523, 17: 3.58768, 18: -1.32255, 19: 2.705}
同时返回多个物理量,并通过get_composite_real_metric_data来返回实数。
计算总能量中每一项能量值,功能与TotalEnergyMetric部分重合,但是比其方便,可以一次输出所有的值。
# 计算两个pose的H链30到50号残基的各项能量差
from pyrosetta.rosetta.core.simple_metrics.composite_metrics import CompositeEnergyMetric
composite_sel = ResidueIndexSelector('30H-50H')
composite_energy = CompositeEnergyMetric(composite_sel)
composite_energy.set_comparison_pose(com_pose) #如果不指定比较的pose,仅计算pose的各项能量
scorefxn = create_score_function('ref2015')
composite_energy.set_scorefunction(scorefxn)
composite_energy.apply(pose,'30H-50H_')
from pyrosetta.rosetta.core.simple_metrics import get_sm_data
sm_data = get_sm_data(pose)
composite_real_metric = sm_data.get_composite_real_metric_data()
composite_real_metric['30H-50H_composite_energy']
map_std_string_double{dslf_fa13: 0, fa_atr: -3.88672, fa_dun: -2.83299, fa_elec: 1.73887, fa_intra_rep: -0.0201983, fa_intra_sol_xover4: -0.675227, fa_rep: -12.926, fa_sol: 7.53877, hbond_bb_sc: 0.0272416, hbond_lr_bb: -0.36422, hbond_sc: 1.24952, hbond_sr_bb: 0.455738, lk_ball_wtd: -1.62749, omega: 3.88765, p_aa_pp: -0.0911932, pro_close: -3.84744, rama_prepro: 0.450053, ref: -1.64251, yhh_planarity: 1.2478e-07}