import pandas as pd
import numpy as np
inVcfDir='/data/cellardata/users/btsui/dbsnp/Homo_sapiens/All_20170710.f1_byte2_not_00.vcf.gz'
vcfDf=pd.read_csv(inVcfDir,sep='\t',header=None)
vcfDf.columns=['Chr','Pos','RsId','RefBase','AltBase','','','Annot']
vcfDf['Chr']=vcfDf['Chr'].astype(np.str)
/cellar/users/btsui/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py:2785: DtypeWarning: Columns (0) have mixed types. Specify dtype option on import or set low_memory=False. interactivity=interactivity, compiler=compiler, result=result)
top100GeneDf=pd.read_pickle('./top_lgg_somatic_sites.pickle').drop_duplicates('vcfIndex').head(n=100)
#vcfDf[vcfDf.Pos==29944050]
#top100GeneDf
top100GeneDf[(top100GeneDf['rnaseq_n']<20)&(top100GeneDf['auprc']<0.5)]#.mean(axis=0)
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | |
---|---|---|---|---|---|---|---|---|
70 | 0.002137 | 16 | 2.042345 | 0.495708 | (1, 237591774) | 1 | 237591774 | 3 |
79 | 0.007407 | 0 | 1.307692 | 0.481203 | (12, 57099758) | 12 | 57099758 | 3 |
86 | 0.224100 | 13 | 1.728489 | 0.588009 | (3, 75630794) | 3 | 75630794 | 3 |
top100GeneDf[top100GeneDf['auprc']<0.2]
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | |
---|---|---|---|---|---|---|---|---|
30 | 0.116652 | 481 | 300.219466 | 0.601301 | (7, 55165350) | 7 | 55165350 | 6 |
43 | 0.072989 | 401 | 747.178357 | 0.569311 | (6, 31270232) | 6 | 31270232 | 5 |
56 | 0.035856 | 355 | 17.948375 | 0.429899 | (22, 42127537) | 22 | 42127537 | 4 |
64 | 0.025849 | 454 | 1296.377395 | 0.552955 | (6, 29944124) | 6 | 29944124 | 4 |
66 | 0.028601 | 484 | 1758.949290 | 0.509784 | (6, 31356377) | 6 | 31356377 | 4 |
69 | 0.069974 | 516 | 2769.652672 | 0.599747 | (6, 29944135) | 6 | 29944135 | 3 |
70 | 0.002137 | 16 | 2.042345 | 0.495708 | (1, 237591774) | 1 | 237591774 | 3 |
74 | 0.026316 | 436 | 162.306796 | 0.784810 | (6, 32664926) | 6 | 32664926 | 3 |
79 | 0.007407 | 0 | 1.307692 | 0.481203 | (12, 57099758) | 12 | 57099758 | 3 |
84 | 0.061988 | 524 | 2200.085878 | 0.573877 | (6, 29942916) | 6 | 29942916 | 3 |
top100GeneDf[top100GeneDf.auprc<0.1]
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | |
---|---|---|---|---|---|---|---|---|
43 | 0.072989 | 401 | 747.178357 | 0.569311 | (6, 31270232) | 6 | 31270232 | 5 |
56 | 0.035856 | 355 | 17.948375 | 0.429899 | (22, 42127537) | 22 | 42127537 | 4 |
64 | 0.025849 | 454 | 1296.377395 | 0.552955 | (6, 29944124) | 6 | 29944124 | 4 |
66 | 0.028601 | 484 | 1758.949290 | 0.509784 | (6, 31356377) | 6 | 31356377 | 4 |
69 | 0.069974 | 516 | 2769.652672 | 0.599747 | (6, 29944135) | 6 | 29944135 | 3 |
70 | 0.002137 | 16 | 2.042345 | 0.495708 | (1, 237591774) | 1 | 237591774 | 3 |
74 | 0.026316 | 436 | 162.306796 | 0.784810 | (6, 32664926) | 6 | 32664926 | 3 |
79 | 0.007407 | 0 | 1.307692 | 0.481203 | (12, 57099758) | 12 | 57099758 | 3 |
84 | 0.061988 | 524 | 2200.085878 | 0.573877 | (6, 29942916) | 6 | 29942916 | 3 |
((top100GeneDf.rnaseq_n)>0).mean()
0.93
top100GeneDf.sort_values('tcga_wxs_count',ascending=False)
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | |
---|---|---|---|---|---|---|---|---|
0 | 0.998211 | 519 | 133.822519 | 0.984290 | (2, 208248388) | 2 | 208248388 | 371 |
1 | 0.543616 | 520 | 122.076336 | 0.945266 | (17, 7673803) | 17 | 7673803 | 59 |
2 | 0.865149 | 519 | 133.375954 | 0.896410 | (2, 208248389) | 2 | 208248389 | 38 |
5 | 1.000000 | 524 | 353.841603 | 1.000000 | (15, 90088606) | 15 | 90088606 | 15 |
6 | 0.954091 | 520 | 121.372849 | 0.948889 | (17, 7673802) | 17 | 7673802 | 15 |
3 | 0.991546 | 457 | 38.312977 | 0.507767 | (14, 32092134) | 14 | 32092134 | 14 |
7 | 0.738417 | 0 | 0.001931 | 0.500000 | (8, 142877758) | 8 | 142877758 | 11 |
8 | 0.989537 | 91 | 5.262948 | 0.503589 | (X, 24789042) | X | 24789042 | 11 |
9 | 0.999940 | 513 | 210.776718 | 0.996541 | (1, 109690516) | 1 | 109690516 | 10 |
10 | 0.975154 | 496 | 59.135496 | 0.999370 | (17, 7674872) | 17 | 7674872 | 10 |
11 | 0.684925 | 286 | 16.242366 | 0.613307 | (6, 29944050) | 6 | 29944050 | 9 |
13 | 0.998226 | 524 | 1291.374046 | 0.694932 | (X, 24788994) | X | 24788994 | 9 |
14 | 0.994127 | 431 | 36.320537 | 0.621622 | (12, 6018369) | 12 | 6018369 | 9 |
17 | 0.779858 | 509 | 107.205374 | 0.765568 | (17, 7675088) | 17 | 7675088 | 8 |
15 | 0.895074 | 520 | 124.636711 | 0.929366 | (17, 7674220) | 17 | 7674220 | 8 |
16 | 0.850000 | 510 | 91.690840 | 0.998394 | (17, 7675076) | 17 | 7675076 | 8 |
18 | 0.924295 | 502 | 79.051527 | 0.615436 | (12, 6018901) | 12 | 6018901 | 7 |
19 | 0.967688 | 524 | 950.933206 | 0.671795 | (6, 31271836) | 6 | 31271836 | 7 |
20 | 0.524973 | 502 | 1370.792233 | 0.456735 | (6, 31356729) | 6 | 31356729 | 7 |
21 | 0.966475 | 1 | 0.392720 | 0.500000 | (7, 117548682) | 7 | 117548682 | 7 |
22 | 0.843596 | 522 | 2902.068702 | 0.470996 | (6, 29943406) | 6 | 29943406 | 7 |
23 | 0.624032 | 480 | 2319.403475 | 0.517632 | (6, 29943422) | 6 | 29943422 | 7 |
24 | 1.000000 | 520 | 123.956023 | 1.000000 | (17, 7674221) | 17 | 7674221 | 7 |
25 | 0.976915 | 3 | 0.319915 | 0.498495 | (7, 142750675) | 7 | 142750675 | 7 |
31 | 0.999499 | 8 | 2.092742 | 0.781609 | (12, 2685853) | 12 | 2685853 | 6 |
33 | 0.867048 | 315 | 18.980583 | 0.640445 | (6, 29944118) | 6 | 29944118 | 6 |
32 | 0.961239 | 3 | 0.492366 | 0.479700 | (4, 144120554) | 4 | 144120554 | 6 |
30 | 0.116652 | 481 | 300.219466 | 0.601301 | (7, 55165350) | 7 | 55165350 | 6 |
28 | 0.501689 | 472 | 84.025794 | 0.500000 | (9, 128257486) | 9 | 128257486 | 6 |
27 | 0.777019 | 518 | 121.843511 | 0.875674 | (17, 7673776) | 17 | 7673776 | 6 |
... | ... | ... | ... | ... | ... | ... | ... | ... |
92 | 1.000000 | 517 | 117.526718 | 1.000000 | (17, 7674256) | 17 | 7674256 | 3 |
93 | 0.493328 | 512 | 82.120229 | 0.692187 | (17, 7673704) | 17 | 7673704 | 3 |
97 | 0.671406 | 501 | 94.854127 | 0.794175 | (17, 31350209) | 17 | 31350209 | 3 |
95 | 0.666459 | 523 | 986.114504 | 0.500538 | (8, 100709671) | 8 | 100709671 | 3 |
96 | 0.951125 | 0 | 0.047244 | 0.509217 | (3, 183959847) | 3 | 183959847 | 3 |
86 | 0.224100 | 13 | 1.728489 | 0.588009 | (3, 75630794) | 3 | 75630794 | 3 |
98 | 0.530668 | 518 | 380.994275 | 0.738304 | (5, 68295269) | 5 | 68295269 | 3 |
99 | 0.870781 | 495 | 23.572519 | 0.462571 | (22, 24627926) | 22 | 24627926 | 3 |
100 | 0.754902 | 0 | 0.001957 | 0.500000 | (20, 8788776) | 20 | 8788776 | 3 |
101 | 0.289047 | 205 | 13.273622 | 0.551120 | (6, 31271875) | 6 | 31271875 | 3 |
102 | 0.318593 | 524 | 977.551527 | 0.504434 | (6, 31271839) | 6 | 31271839 | 3 |
87 | 0.549532 | 103 | 7.149510 | 0.523889 | (3, 49686483) | 3 | 49686483 | 3 |
83 | 0.965023 | 510 | 2437.399610 | 0.856497 | (6, 29942858) | 6 | 29942858 | 3 |
85 | 0.756602 | 29 | 2.614504 | 0.669605 | (3, 75630855) | 3 | 75630855 | 3 |
74 | 0.026316 | 436 | 162.306796 | 0.784810 | (6, 32664926) | 6 | 32664926 | 3 |
68 | 0.380695 | 512 | 2479.959924 | 0.497330 | (6, 29944132) | 6 | 29944132 | 3 |
69 | 0.069974 | 516 | 2769.652672 | 0.599747 | (6, 29944135) | 6 | 29944135 | 3 |
70 | 0.002137 | 16 | 2.042345 | 0.495708 | (1, 237591774) | 1 | 237591774 | 3 |
71 | 0.808434 | 524 | 3416.650763 | 0.442027 | (6, 29944376) | 6 | 29944376 | 3 |
72 | 0.232668 | 524 | 3618.646947 | 0.603043 | (6, 29944151) | 6 | 29944151 | 3 |
73 | 0.741291 | 506 | 315.198473 | 0.621487 | (6, 32664883) | 6 | 32664883 | 3 |
75 | 0.863203 | 395 | 31.936902 | 0.488386 | (10, 4999206) | 10 | 4999206 | 3 |
84 | 0.061988 | 524 | 2200.085878 | 0.573877 | (6, 29942916) | 6 | 29942916 | 3 |
76 | 0.815908 | 493 | 215.211832 | 0.928361 | (6, 32661393) | 6 | 32661393 | 3 |
77 | 0.380747 | 480 | 191.682692 | 0.596527 | (6, 32661384) | 6 | 32661384 | 3 |
78 | 0.778990 | 27 | 2.581081 | 0.645061 | (6, 32661333) | 6 | 32661333 | 3 |
79 | 0.007407 | 0 | 1.307692 | 0.481203 | (12, 57099758) | 12 | 57099758 | 3 |
81 | 0.993934 | 523 | 1127.133588 | 0.582806 | (16, 2106849) | 16 | 2106849 | 3 |
82 | 0.995522 | 517 | 1391.395753 | 0.989221 | (6, 29942795) | 6 | 29942795 | 3 |
103 | 0.953258 | 522 | 2469.611111 | 0.582500 | (6, 31356399) | 6 | 31356399 | 3 |
100 rows × 8 columns
vcfDf['GeneName']=vcfDf.Annot.str.extract('GENEINFO=(\w+)',expand=False)
posToGeneNameS=vcfDf.drop_duplicates(['Chr','Pos']).set_index(['Chr','Pos'])['GeneName']
top100GeneDf['Chr']=top100GeneDf.vcfIndex.apply(lambda L:L[0])
top100GeneDf['Pos']=top100GeneDf.vcfIndex.apply(lambda L:int(L[1]))
top100GeneDf['Gene']=posToGeneNameS[top100GeneDf.set_index(['Chr','Pos']).index].values
top100GeneDf
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | Gene | |
---|---|---|---|---|---|---|---|---|---|
0 | 0.998211 | 519 | 133.822519 | 0.984290 | (2, 208248388) | 2 | 208248388 | 371 | IDH1 |
1 | 0.543616 | 520 | 122.076336 | 0.945266 | (17, 7673803) | 17 | 7673803 | 59 | TP53 |
2 | 0.865149 | 519 | 133.375954 | 0.896410 | (2, 208248389) | 2 | 208248389 | 38 | IDH1 |
3 | 0.991546 | 457 | 38.312977 | 0.507767 | (14, 32092134) | 14 | 32092134 | 14 | ARHGAP5 |
5 | 1.000000 | 524 | 353.841603 | 1.000000 | (15, 90088606) | 15 | 90088606 | 15 | IDH2 |
6 | 0.954091 | 520 | 121.372849 | 0.948889 | (17, 7673802) | 17 | 7673802 | 15 | TP53 |
7 | 0.738417 | 0 | 0.001931 | 0.500000 | (8, 142877758) | 8 | 142877758 | 11 | CYP11B1 |
8 | 0.989537 | 91 | 5.262948 | 0.503589 | (X, 24789042) | X | 24789042 | 11 | POLA1 |
9 | 0.999940 | 513 | 210.776718 | 0.996541 | (1, 109690516) | 1 | 109690516 | 10 | GSTM1 |
10 | 0.975154 | 496 | 59.135496 | 0.999370 | (17, 7674872) | 17 | 7674872 | 10 | TP53 |
11 | 0.684925 | 286 | 16.242366 | 0.613307 | (6, 29944050) | 6 | 29944050 | 9 | HLA |
13 | 0.998226 | 524 | 1291.374046 | 0.694932 | (X, 24788994) | X | 24788994 | 9 | POLA1 |
14 | 0.994127 | 431 | 36.320537 | 0.621622 | (12, 6018369) | 12 | 6018369 | 9 | VWF |
15 | 0.895074 | 520 | 124.636711 | 0.929366 | (17, 7674220) | 17 | 7674220 | 8 | TP53 |
16 | 0.850000 | 510 | 91.690840 | 0.998394 | (17, 7675076) | 17 | 7675076 | 8 | TP53 |
17 | 0.779858 | 509 | 107.205374 | 0.765568 | (17, 7675088) | 17 | 7675088 | 8 | TP53 |
18 | 0.924295 | 502 | 79.051527 | 0.615436 | (12, 6018901) | 12 | 6018901 | 7 | VWF |
19 | 0.967688 | 524 | 950.933206 | 0.671795 | (6, 31271836) | 6 | 31271836 | 7 | HLA |
20 | 0.524973 | 502 | 1370.792233 | 0.456735 | (6, 31356729) | 6 | 31356729 | 7 | MIR6891 |
21 | 0.966475 | 1 | 0.392720 | 0.500000 | (7, 117548682) | 7 | 117548682 | 7 | CFTR |
22 | 0.843596 | 522 | 2902.068702 | 0.470996 | (6, 29943406) | 6 | 29943406 | 7 | HLA |
23 | 0.624032 | 480 | 2319.403475 | 0.517632 | (6, 29943422) | 6 | 29943422 | 7 | HLA |
24 | 1.000000 | 520 | 123.956023 | 1.000000 | (17, 7674221) | 17 | 7674221 | 7 | TP53 |
25 | 0.976915 | 3 | 0.319915 | 0.498495 | (7, 142750675) | 7 | 142750675 | 7 | PRSS1 |
26 | 0.881503 | 212 | 9.790076 | 0.500000 | (7, 152238825) | 7 | 152238825 | 6 | KMT2C |
27 | 0.777019 | 518 | 121.843511 | 0.875674 | (17, 7673776) | 17 | 7673776 | 6 | TP53 |
28 | 0.501689 | 472 | 84.025794 | 0.500000 | (9, 128257486) | 9 | 128257486 | 6 | GOLGA2 |
29 | 0.962142 | 4 | 0.215953 | 0.501053 | (7, 142750600) | 7 | 142750600 | 6 | PRSS1 |
30 | 0.116652 | 481 | 300.219466 | 0.601301 | (7, 55165350) | 7 | 55165350 | 6 | EGFR |
31 | 0.999499 | 8 | 2.092742 | 0.781609 | (12, 2685853) | 12 | 2685853 | 6 | CACNA1C |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
73 | 0.741291 | 506 | 315.198473 | 0.621487 | (6, 32664883) | 6 | 32664883 | 3 | HLA |
74 | 0.026316 | 436 | 162.306796 | 0.784810 | (6, 32664926) | 6 | 32664926 | 3 | HLA |
75 | 0.863203 | 395 | 31.936902 | 0.488386 | (10, 4999206) | 10 | 4999206 | 3 | AKR1C2 |
76 | 0.815908 | 493 | 215.211832 | 0.928361 | (6, 32661393) | 6 | 32661393 | 3 | HLA |
77 | 0.380747 | 480 | 191.682692 | 0.596527 | (6, 32661384) | 6 | 32661384 | 3 | HLA |
78 | 0.778990 | 27 | 2.581081 | 0.645061 | (6, 32661333) | 6 | 32661333 | 3 | HLA |
79 | 0.007407 | 0 | 1.307692 | 0.481203 | (12, 57099758) | 12 | 57099758 | 3 | STAT6 |
81 | 0.993934 | 523 | 1127.133588 | 0.582806 | (16, 2106849) | 16 | 2106849 | 3 | MIR6511B1 |
82 | 0.995522 | 517 | 1391.395753 | 0.989221 | (6, 29942795) | 6 | 29942795 | 3 | HLA |
83 | 0.965023 | 510 | 2437.399610 | 0.856497 | (6, 29942858) | 6 | 29942858 | 3 | HLA |
84 | 0.061988 | 524 | 2200.085878 | 0.573877 | (6, 29942916) | 6 | 29942916 | 3 | HLA |
85 | 0.756602 | 29 | 2.614504 | 0.669605 | (3, 75630855) | 3 | 75630855 | 3 | LOC107986102 |
86 | 0.224100 | 13 | 1.728489 | 0.588009 | (3, 75630794) | 3 | 75630794 | 3 | LOC107986102 |
87 | 0.549532 | 103 | 7.149510 | 0.523889 | (3, 49686483) | 3 | 49686483 | 3 | MST1 |
88 | 0.683612 | 519 | 1443.973282 | 0.499179 | (6, 29943463) | 6 | 29943463 | 3 | HLA |
89 | 0.758810 | 35 | 3.059961 | 0.520303 | (6, 29943667) | 6 | 29943667 | 3 | HLA |
90 | 0.858112 | 523 | 66.984733 | 0.899413 | (3, 179234284) | 3 | 179234284 | 3 | PIK3CA |
91 | 0.844742 | 513 | 88.143130 | 0.874750 | (17, 7674888) | 17 | 7674888 | 3 | TP53 |
92 | 1.000000 | 517 | 117.526718 | 1.000000 | (17, 7674256) | 17 | 7674256 | 3 | TP53 |
93 | 0.493328 | 512 | 82.120229 | 0.692187 | (17, 7673704) | 17 | 7673704 | 3 | TP53 |
94 | 0.403970 | 55 | 3.786275 | 0.490953 | (17, 21416556) | 17 | 21416556 | 3 | KCNJ12 |
95 | 0.666459 | 523 | 986.114504 | 0.500538 | (8, 100709671) | 8 | 100709671 | 3 | PABPC1 |
96 | 0.951125 | 0 | 0.047244 | 0.509217 | (3, 183959847) | 3 | 183959847 | 3 | ABCC5 |
97 | 0.671406 | 501 | 94.854127 | 0.794175 | (17, 31350209) | 17 | 31350209 | 3 | NF1 |
98 | 0.530668 | 518 | 380.994275 | 0.738304 | (5, 68295269) | 5 | 68295269 | 3 | PIK3R1 |
99 | 0.870781 | 495 | 23.572519 | 0.462571 | (22, 24627926) | 22 | 24627926 | 3 | GGT1 |
100 | 0.754902 | 0 | 0.001957 | 0.500000 | (20, 8788776) | 20 | 8788776 | 3 | PLCB1 |
101 | 0.289047 | 205 | 13.273622 | 0.551120 | (6, 31271875) | 6 | 31271875 | 3 | HLA |
102 | 0.318593 | 524 | 977.551527 | 0.504434 | (6, 31271839) | 6 | 31271839 | 3 | HLA |
103 | 0.953258 | 522 | 2469.611111 | 0.582500 | (6, 31356399) | 6 | 31356399 | 3 | MIR6891 |
100 rows × 9 columns
geneDf=top100GeneDf#.groupby('Gene')#.head(n=1)
top100GeneDf
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | Gene | Classification | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.998211 | 519 | 133.822519 | 0.984290 | (2, 208248388) | 2 | 208248388 | 371 | IDH1 | Oncogene |
1 | 0.543616 | 520 | 122.076336 | 0.945266 | (17, 7673803) | 17 | 7673803 | 59 | TP53 | TSG |
2 | 0.865149 | 519 | 133.375954 | 0.896410 | (2, 208248389) | 2 | 208248389 | 38 | IDH1 | Oncogene |
3 | 0.991546 | 457 | 38.312977 | 0.507767 | (14, 32092134) | 14 | 32092134 | 14 | ARHGAP5 | NaN |
5 | 1.000000 | 524 | 353.841603 | 1.000000 | (15, 90088606) | 15 | 90088606 | 15 | IDH2 | Oncogene |
6 | 0.954091 | 520 | 121.372849 | 0.948889 | (17, 7673802) | 17 | 7673802 | 15 | TP53 | TSG |
7 | 0.738417 | 0 | 0.001931 | 0.500000 | (8, 142877758) | 8 | 142877758 | 11 | CYP11B1 | NaN |
8 | 0.989537 | 91 | 5.262948 | 0.503589 | (X, 24789042) | X | 24789042 | 11 | POLA1 | NaN |
9 | 0.999940 | 513 | 210.776718 | 0.996541 | (1, 109690516) | 1 | 109690516 | 10 | GSTM1 | NaN |
10 | 0.975154 | 496 | 59.135496 | 0.999370 | (17, 7674872) | 17 | 7674872 | 10 | TP53 | TSG |
11 | 0.684925 | 286 | 16.242366 | 0.613307 | (6, 29944050) | 6 | 29944050 | 9 | HLA | NaN |
13 | 0.998226 | 524 | 1291.374046 | 0.694932 | (X, 24788994) | X | 24788994 | 9 | POLA1 | NaN |
14 | 0.994127 | 431 | 36.320537 | 0.621622 | (12, 6018369) | 12 | 6018369 | 9 | VWF | NaN |
15 | 0.895074 | 520 | 124.636711 | 0.929366 | (17, 7674220) | 17 | 7674220 | 8 | TP53 | TSG |
16 | 0.850000 | 510 | 91.690840 | 0.998394 | (17, 7675076) | 17 | 7675076 | 8 | TP53 | TSG |
17 | 0.779858 | 509 | 107.205374 | 0.765568 | (17, 7675088) | 17 | 7675088 | 8 | TP53 | TSG |
18 | 0.924295 | 502 | 79.051527 | 0.615436 | (12, 6018901) | 12 | 6018901 | 7 | VWF | NaN |
19 | 0.967688 | 524 | 950.933206 | 0.671795 | (6, 31271836) | 6 | 31271836 | 7 | HLA | NaN |
20 | 0.524973 | 502 | 1370.792233 | 0.456735 | (6, 31356729) | 6 | 31356729 | 7 | MIR6891 | NaN |
21 | 0.966475 | 1 | 0.392720 | 0.500000 | (7, 117548682) | 7 | 117548682 | 7 | CFTR | NaN |
22 | 0.843596 | 522 | 2902.068702 | 0.470996 | (6, 29943406) | 6 | 29943406 | 7 | HLA | NaN |
23 | 0.624032 | 480 | 2319.403475 | 0.517632 | (6, 29943422) | 6 | 29943422 | 7 | HLA | NaN |
24 | 1.000000 | 520 | 123.956023 | 1.000000 | (17, 7674221) | 17 | 7674221 | 7 | TP53 | TSG |
25 | 0.976915 | 3 | 0.319915 | 0.498495 | (7, 142750675) | 7 | 142750675 | 7 | PRSS1 | NaN |
26 | 0.881503 | 212 | 9.790076 | 0.500000 | (7, 152238825) | 7 | 152238825 | 6 | KMT2C | NaN |
27 | 0.777019 | 518 | 121.843511 | 0.875674 | (17, 7673776) | 17 | 7673776 | 6 | TP53 | TSG |
28 | 0.501689 | 472 | 84.025794 | 0.500000 | (9, 128257486) | 9 | 128257486 | 6 | GOLGA2 | NaN |
29 | 0.962142 | 4 | 0.215953 | 0.501053 | (7, 142750600) | 7 | 142750600 | 6 | PRSS1 | NaN |
30 | 0.116652 | 481 | 300.219466 | 0.601301 | (7, 55165350) | 7 | 55165350 | 6 | EGFR | Oncogene |
31 | 0.999499 | 8 | 2.092742 | 0.781609 | (12, 2685853) | 12 | 2685853 | 6 | CACNA1C | NaN |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
73 | 0.741291 | 506 | 315.198473 | 0.621487 | (6, 32664883) | 6 | 32664883 | 3 | HLA | NaN |
74 | 0.026316 | 436 | 162.306796 | 0.784810 | (6, 32664926) | 6 | 32664926 | 3 | HLA | NaN |
75 | 0.863203 | 395 | 31.936902 | 0.488386 | (10, 4999206) | 10 | 4999206 | 3 | AKR1C2 | NaN |
76 | 0.815908 | 493 | 215.211832 | 0.928361 | (6, 32661393) | 6 | 32661393 | 3 | HLA | NaN |
77 | 0.380747 | 480 | 191.682692 | 0.596527 | (6, 32661384) | 6 | 32661384 | 3 | HLA | NaN |
78 | 0.778990 | 27 | 2.581081 | 0.645061 | (6, 32661333) | 6 | 32661333 | 3 | HLA | NaN |
79 | 0.007407 | 0 | 1.307692 | 0.481203 | (12, 57099758) | 12 | 57099758 | 3 | STAT6 | NaN |
81 | 0.993934 | 523 | 1127.133588 | 0.582806 | (16, 2106849) | 16 | 2106849 | 3 | MIR6511B1 | NaN |
82 | 0.995522 | 517 | 1391.395753 | 0.989221 | (6, 29942795) | 6 | 29942795 | 3 | HLA | NaN |
83 | 0.965023 | 510 | 2437.399610 | 0.856497 | (6, 29942858) | 6 | 29942858 | 3 | HLA | NaN |
84 | 0.061988 | 524 | 2200.085878 | 0.573877 | (6, 29942916) | 6 | 29942916 | 3 | HLA | NaN |
85 | 0.756602 | 29 | 2.614504 | 0.669605 | (3, 75630855) | 3 | 75630855 | 3 | LOC107986102 | NaN |
86 | 0.224100 | 13 | 1.728489 | 0.588009 | (3, 75630794) | 3 | 75630794 | 3 | LOC107986102 | NaN |
87 | 0.549532 | 103 | 7.149510 | 0.523889 | (3, 49686483) | 3 | 49686483 | 3 | MST1 | NaN |
88 | 0.683612 | 519 | 1443.973282 | 0.499179 | (6, 29943463) | 6 | 29943463 | 3 | HLA | NaN |
89 | 0.758810 | 35 | 3.059961 | 0.520303 | (6, 29943667) | 6 | 29943667 | 3 | HLA | NaN |
90 | 0.858112 | 523 | 66.984733 | 0.899413 | (3, 179234284) | 3 | 179234284 | 3 | PIK3CA | Oncogene |
91 | 0.844742 | 513 | 88.143130 | 0.874750 | (17, 7674888) | 17 | 7674888 | 3 | TP53 | TSG |
92 | 1.000000 | 517 | 117.526718 | 1.000000 | (17, 7674256) | 17 | 7674256 | 3 | TP53 | TSG |
93 | 0.493328 | 512 | 82.120229 | 0.692187 | (17, 7673704) | 17 | 7673704 | 3 | TP53 | TSG |
94 | 0.403970 | 55 | 3.786275 | 0.490953 | (17, 21416556) | 17 | 21416556 | 3 | KCNJ12 | NaN |
95 | 0.666459 | 523 | 986.114504 | 0.500538 | (8, 100709671) | 8 | 100709671 | 3 | PABPC1 | NaN |
96 | 0.951125 | 0 | 0.047244 | 0.509217 | (3, 183959847) | 3 | 183959847 | 3 | ABCC5 | NaN |
97 | 0.671406 | 501 | 94.854127 | 0.794175 | (17, 31350209) | 17 | 31350209 | 3 | NF1 | TSG |
98 | 0.530668 | 518 | 380.994275 | 0.738304 | (5, 68295269) | 5 | 68295269 | 3 | PIK3R1 | TSG |
99 | 0.870781 | 495 | 23.572519 | 0.462571 | (22, 24627926) | 22 | 24627926 | 3 | GGT1 | NaN |
100 | 0.754902 | 0 | 0.001957 | 0.500000 | (20, 8788776) | 20 | 8788776 | 3 | PLCB1 | NaN |
101 | 0.289047 | 205 | 13.273622 | 0.551120 | (6, 31271875) | 6 | 31271875 | 3 | HLA | NaN |
102 | 0.318593 | 524 | 977.551527 | 0.504434 | (6, 31271839) | 6 | 31271839 | 3 | HLA | NaN |
103 | 0.953258 | 522 | 2469.611111 | 0.582500 | (6, 31356399) | 6 | 31356399 | 3 | MIR6891 | NaN |
100 rows × 10 columns
dbsnpFlagDf=pd.read_csv('./Data/oncogene_ts.tsv',sep='\t')
geneToStatus=dbsnpFlagDf.set_index(['Gene Symbol'])['Classification*']
withClassificationStatDf=geneDf[geneDf.Classification.notnull()]
import seaborn as sns
withClassificationStatDf
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | Gene | Classification | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.998211 | 519 | 133.822519 | 0.984290 | (2, 208248388) | 2 | 208248388 | 371 | IDH1 | Oncogene |
1 | 0.543616 | 520 | 122.076336 | 0.945266 | (17, 7673803) | 17 | 7673803 | 59 | TP53 | TSG |
2 | 0.865149 | 519 | 133.375954 | 0.896410 | (2, 208248389) | 2 | 208248389 | 38 | IDH1 | Oncogene |
5 | 1.000000 | 524 | 353.841603 | 1.000000 | (15, 90088606) | 15 | 90088606 | 15 | IDH2 | Oncogene |
6 | 0.954091 | 520 | 121.372849 | 0.948889 | (17, 7673802) | 17 | 7673802 | 15 | TP53 | TSG |
10 | 0.975154 | 496 | 59.135496 | 0.999370 | (17, 7674872) | 17 | 7674872 | 10 | TP53 | TSG |
15 | 0.895074 | 520 | 124.636711 | 0.929366 | (17, 7674220) | 17 | 7674220 | 8 | TP53 | TSG |
16 | 0.850000 | 510 | 91.690840 | 0.998394 | (17, 7675076) | 17 | 7675076 | 8 | TP53 | TSG |
17 | 0.779858 | 509 | 107.205374 | 0.765568 | (17, 7675088) | 17 | 7675088 | 8 | TP53 | TSG |
24 | 1.000000 | 520 | 123.956023 | 1.000000 | (17, 7674221) | 17 | 7674221 | 7 | TP53 | TSG |
27 | 0.777019 | 518 | 121.843511 | 0.875674 | (17, 7673776) | 17 | 7673776 | 6 | TP53 | TSG |
30 | 0.116652 | 481 | 300.219466 | 0.601301 | (7, 55165350) | 7 | 55165350 | 6 | EGFR | Oncogene |
34 | 0.831944 | 519 | 120.805344 | 0.871324 | (17, 7674230) | 17 | 7674230 | 5 | TP53 | TSG |
35 | 0.795725 | 513 | 99.967557 | 0.869141 | (17, 7674945) | 17 | 7674945 | 5 | TP53 | TSG |
37 | 0.718190 | 464 | 182.276718 | 0.845446 | (7, 55154129) | 7 | 55154129 | 5 | EGFR | Oncogene |
53 | 0.504771 | 237 | 10.774809 | 0.500000 | (3, 179199690) | 3 | 179199690 | 4 | PIK3CA | Oncogene |
67 | 1.000000 | 516 | 115.853053 | 1.000000 | (17, 7676044) | 17 | 7676044 | 4 | TP53 | TSG |
90 | 0.858112 | 523 | 66.984733 | 0.899413 | (3, 179234284) | 3 | 179234284 | 3 | PIK3CA | Oncogene |
91 | 0.844742 | 513 | 88.143130 | 0.874750 | (17, 7674888) | 17 | 7674888 | 3 | TP53 | TSG |
92 | 1.000000 | 517 | 117.526718 | 1.000000 | (17, 7674256) | 17 | 7674256 | 3 | TP53 | TSG |
93 | 0.493328 | 512 | 82.120229 | 0.692187 | (17, 7673704) | 17 | 7673704 | 3 | TP53 | TSG |
97 | 0.671406 | 501 | 94.854127 | 0.794175 | (17, 31350209) | 17 | 31350209 | 3 | NF1 | TSG |
98 | 0.530668 | 518 | 380.994275 | 0.738304 | (5, 68295269) | 5 | 68295269 | 3 | PIK3R1 | TSG |
g=withClassificationStatDf.groupby('Classification')['auprc']
g.get_group('Oncogene')
0 0.998211 2 0.865149 5 1.000000 30 0.116652 37 0.718190 53 0.504771 90 0.858112 Name: auprc, dtype: float64
sns.boxplot(data=withClassificationStatDf,x='Classification',y='auprc')
<matplotlib.axes._subplots.AxesSubplot at 0x2b5682811780>
geneDf['Classification']=geneToStatus.loc[geneDf.Gene].values
/cellar/users/btsui/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:1: FutureWarning: Passing list-likes to .loc or [] with any missing label will raise KeyError in the future, you can use .reindex() as an alternative. See the documentation here: https://pandas.pydata.org/pandas-docs/stable/indexing.html#deprecate-loc-reindex-listlike """Entry point for launching an IPython kernel.
#top100GeneDf
geneDf.dropna()
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | Gene | Classification | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.998211 | 519 | 133.822519 | 0.984290 | (2, 208248388) | 2 | 208248388 | 371 | IDH1 | Oncogene |
1 | 0.543616 | 520 | 122.076336 | 0.945266 | (17, 7673803) | 17 | 7673803 | 59 | TP53 | TSG |
5 | 1.000000 | 524 | 353.841603 | 1.000000 | (15, 90088606) | 15 | 90088606 | 15 | IDH2 | Oncogene |
30 | 0.116652 | 481 | 300.219466 | 0.601301 | (7, 55165350) | 7 | 55165350 | 6 | EGFR | Oncogene |
53 | 0.504771 | 237 | 10.774809 | 0.500000 | (3, 179199690) | 3 | 179199690 | 4 | PIK3CA | Oncogene |
97 | 0.671406 | 501 | 94.854127 | 0.794175 | (17, 31350209) | 17 | 31350209 | 3 | NF1 | TSG |
98 | 0.530668 | 518 | 380.994275 | 0.738304 | (5, 68295269) | 5 | 68295269 | 3 | PIK3R1 | TSG |
geneDf.dropna()
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | Gene | Classification | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 0.998211 | 519 | 133.822519 | 0.984290 | (2, 208248388) | 2 | 208248388 | 371 | IDH1 | Oncogene |
1 | 0.543616 | 520 | 122.076336 | 0.945266 | (17, 7673803) | 17 | 7673803 | 59 | TP53 | TSG |
5 | 1.000000 | 524 | 353.841603 | 1.000000 | (15, 90088606) | 15 | 90088606 | 15 | IDH2 | Oncogene |
30 | 0.116652 | 481 | 300.219466 | 0.601301 | (7, 55165350) | 7 | 55165350 | 6 | EGFR | Oncogene |
53 | 0.504771 | 237 | 10.774809 | 0.500000 | (3, 179199690) | 3 | 179199690 | 4 | PIK3CA | Oncogene |
97 | 0.671406 | 501 | 94.854127 | 0.794175 | (17, 31350209) | 17 | 31350209 | 3 | NF1 | TSG |
98 | 0.530668 | 518 | 380.994275 | 0.738304 | (5, 68295269) | 5 | 68295269 | 3 | PIK3R1 | TSG |
%matplotlib inline
geneDf.rocauc.hist()
<matplotlib.axes._subplots.AxesSubplot at 0x2b5683136cc0>
top100GeneDf[top100GeneDf.rocauc<0.7]
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | Gene | |
---|---|---|---|---|---|---|---|---|---|
3 | 0.991546 | 457 | 38.312977 | 0.507767 | (14, 32092134) | 14 | 32092134 | 14 | ARHGAP5 |
7 | 0.738417 | 0 | 0.001931 | 0.500000 | (8, 142877758) | 8 | 142877758 | 11 | CYP11B1 |
8 | 0.989537 | 91 | 5.262948 | 0.503589 | (X, 24789042) | X | 24789042 | 11 | POLA1 |
11 | 0.684925 | 286 | 16.242366 | 0.613307 | (6, 29944050) | 6 | 29944050 | 9 | HLA |
13 | 0.998226 | 524 | 1291.374046 | 0.694932 | (X, 24788994) | X | 24788994 | 9 | POLA1 |
14 | 0.994127 | 431 | 36.320537 | 0.621622 | (12, 6018369) | 12 | 6018369 | 9 | VWF |
18 | 0.924295 | 502 | 79.051527 | 0.615436 | (12, 6018901) | 12 | 6018901 | 7 | VWF |
19 | 0.967688 | 524 | 950.933206 | 0.671795 | (6, 31271836) | 6 | 31271836 | 7 | HLA |
20 | 0.524973 | 502 | 1370.792233 | 0.456735 | (6, 31356729) | 6 | 31356729 | 7 | MIR6891 |
21 | 0.966475 | 1 | 0.392720 | 0.500000 | (7, 117548682) | 7 | 117548682 | 7 | CFTR |
22 | 0.843596 | 522 | 2902.068702 | 0.470996 | (6, 29943406) | 6 | 29943406 | 7 | HLA |
23 | 0.624032 | 480 | 2319.403475 | 0.517632 | (6, 29943422) | 6 | 29943422 | 7 | HLA |
25 | 0.976915 | 3 | 0.319915 | 0.498495 | (7, 142750675) | 7 | 142750675 | 7 | PRSS1 |
26 | 0.881503 | 212 | 9.790076 | 0.500000 | (7, 152238825) | 7 | 152238825 | 6 | KMT2C |
28 | 0.501689 | 472 | 84.025794 | 0.500000 | (9, 128257486) | 9 | 128257486 | 6 | GOLGA2 |
29 | 0.962142 | 4 | 0.215953 | 0.501053 | (7, 142750600) | 7 | 142750600 | 6 | PRSS1 |
30 | 0.116652 | 481 | 300.219466 | 0.601301 | (7, 55165350) | 7 | 55165350 | 6 | EGFR |
32 | 0.961239 | 3 | 0.492366 | 0.479700 | (4, 144120554) | 4 | 144120554 | 6 | GYPA |
33 | 0.867048 | 315 | 18.980583 | 0.640445 | (6, 29944118) | 6 | 29944118 | 6 | HLA |
36 | 0.991734 | 9 | 1.315488 | 0.507143 | (19, 14766987) | 19 | 14766987 | 5 | ADGRE2 |
38 | 0.956522 | 0 | 0.064516 | 0.500000 | (7, 127611678) | 7 | 127611678 | 5 | PAX4 |
39 | 0.981141 | 0 | 0.013699 | 0.504950 | (1, 235775088) | 1 | 235775088 | 5 | LYST |
40 | 0.922483 | 1 | 2.292089 | 0.501445 | (1, 173828313) | 1 | 173828313 | 5 | DARS2 |
41 | 0.991328 | 15 | 1.331707 | 0.523174 | (2, 166281810) | 2 | 166281810 | 5 | LOC101929680 |
42 | 0.944444 | 8 | 2.370213 | 0.500000 | (6, 32664778) | 6 | 32664778 | 5 | HLA |
43 | 0.072989 | 401 | 747.178357 | 0.569311 | (6, 31270232) | 6 | 31270232 | 5 | HLA |
44 | 0.890402 | 71 | 7.585603 | 0.508440 | (12, 52897420) | 12 | 52897420 | 5 | KRT8 |
46 | 0.468347 | 517 | 2905.948375 | 0.499899 | (6, 29942845) | 6 | 29942845 | 5 | HLA |
50 | 0.561464 | 500 | 73.971374 | 0.545052 | (12, 6018910) | 12 | 6018910 | 5 | VWF |
51 | 0.996116 | 524 | 189.522901 | 0.500495 | (5, 236441) | 5 | 236441 | 4 | SDHA |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
59 | 0.775089 | 315 | 17.183206 | 0.672237 | (6, 29944059) | 6 | 29944059 | 4 | HLA |
63 | 0.556916 | 283 | 553.902748 | 0.533268 | (6, 31270214) | 6 | 31270214 | 4 | HLA |
64 | 0.025849 | 454 | 1296.377395 | 0.552955 | (6, 29944124) | 6 | 29944124 | 4 | HLA |
66 | 0.028601 | 484 | 1758.949290 | 0.509784 | (6, 31356377) | 6 | 31356377 | 4 | MIR6891 |
68 | 0.380695 | 512 | 2479.959924 | 0.497330 | (6, 29944132) | 6 | 29944132 | 3 | HLA |
69 | 0.069974 | 516 | 2769.652672 | 0.599747 | (6, 29944135) | 6 | 29944135 | 3 | HLA |
70 | 0.002137 | 16 | 2.042345 | 0.495708 | (1, 237591774) | 1 | 237591774 | 3 | RYR2 |
71 | 0.808434 | 524 | 3416.650763 | 0.442027 | (6, 29944376) | 6 | 29944376 | 3 | HLA |
72 | 0.232668 | 524 | 3618.646947 | 0.603043 | (6, 29944151) | 6 | 29944151 | 3 | HLA |
73 | 0.741291 | 506 | 315.198473 | 0.621487 | (6, 32664883) | 6 | 32664883 | 3 | HLA |
75 | 0.863203 | 395 | 31.936902 | 0.488386 | (10, 4999206) | 10 | 4999206 | 3 | AKR1C2 |
77 | 0.380747 | 480 | 191.682692 | 0.596527 | (6, 32661384) | 6 | 32661384 | 3 | HLA |
78 | 0.778990 | 27 | 2.581081 | 0.645061 | (6, 32661333) | 6 | 32661333 | 3 | HLA |
79 | 0.007407 | 0 | 1.307692 | 0.481203 | (12, 57099758) | 12 | 57099758 | 3 | STAT6 |
81 | 0.993934 | 523 | 1127.133588 | 0.582806 | (16, 2106849) | 16 | 2106849 | 3 | MIR6511B1 |
84 | 0.061988 | 524 | 2200.085878 | 0.573877 | (6, 29942916) | 6 | 29942916 | 3 | HLA |
85 | 0.756602 | 29 | 2.614504 | 0.669605 | (3, 75630855) | 3 | 75630855 | 3 | LOC107986102 |
86 | 0.224100 | 13 | 1.728489 | 0.588009 | (3, 75630794) | 3 | 75630794 | 3 | LOC107986102 |
87 | 0.549532 | 103 | 7.149510 | 0.523889 | (3, 49686483) | 3 | 49686483 | 3 | MST1 |
88 | 0.683612 | 519 | 1443.973282 | 0.499179 | (6, 29943463) | 6 | 29943463 | 3 | HLA |
89 | 0.758810 | 35 | 3.059961 | 0.520303 | (6, 29943667) | 6 | 29943667 | 3 | HLA |
93 | 0.493328 | 512 | 82.120229 | 0.692187 | (17, 7673704) | 17 | 7673704 | 3 | TP53 |
94 | 0.403970 | 55 | 3.786275 | 0.490953 | (17, 21416556) | 17 | 21416556 | 3 | KCNJ12 |
95 | 0.666459 | 523 | 986.114504 | 0.500538 | (8, 100709671) | 8 | 100709671 | 3 | PABPC1 |
96 | 0.951125 | 0 | 0.047244 | 0.509217 | (3, 183959847) | 3 | 183959847 | 3 | ABCC5 |
99 | 0.870781 | 495 | 23.572519 | 0.462571 | (22, 24627926) | 22 | 24627926 | 3 | GGT1 |
100 | 0.754902 | 0 | 0.001957 | 0.500000 | (20, 8788776) | 20 | 8788776 | 3 | PLCB1 |
101 | 0.289047 | 205 | 13.273622 | 0.551120 | (6, 31271875) | 6 | 31271875 | 3 | HLA |
102 | 0.318593 | 524 | 977.551527 | 0.504434 | (6, 31271839) | 6 | 31271839 | 3 | HLA |
103 | 0.953258 | 522 | 2469.611111 | 0.582500 | (6, 31356399) | 6 | 31356399 | 3 | MIR6891 |
67 rows × 9 columns
top100GeneDf[top100GeneDf.rocauc>0.7]
auprc | rnaseq_n | rnaseq_rd | rocauc | vcfIndex | Chr | Pos | tcga_wxs_count | Gene | |
---|---|---|---|---|---|---|---|---|---|
0 | 0.998211 | 519 | 133.822519 | 0.984290 | (2, 208248388) | 2 | 208248388 | 371 | IDH1 |
1 | 0.543616 | 520 | 122.076336 | 0.945266 | (17, 7673803) | 17 | 7673803 | 59 | TP53 |
2 | 0.865149 | 519 | 133.375954 | 0.896410 | (2, 208248389) | 2 | 208248389 | 38 | IDH1 |
5 | 1.000000 | 524 | 353.841603 | 1.000000 | (15, 90088606) | 15 | 90088606 | 15 | IDH2 |
6 | 0.954091 | 520 | 121.372849 | 0.948889 | (17, 7673802) | 17 | 7673802 | 15 | TP53 |
9 | 0.999940 | 513 | 210.776718 | 0.996541 | (1, 109690516) | 1 | 109690516 | 10 | GSTM1 |
10 | 0.975154 | 496 | 59.135496 | 0.999370 | (17, 7674872) | 17 | 7674872 | 10 | TP53 |
15 | 0.895074 | 520 | 124.636711 | 0.929366 | (17, 7674220) | 17 | 7674220 | 8 | TP53 |
16 | 0.850000 | 510 | 91.690840 | 0.998394 | (17, 7675076) | 17 | 7675076 | 8 | TP53 |
17 | 0.779858 | 509 | 107.205374 | 0.765568 | (17, 7675088) | 17 | 7675088 | 8 | TP53 |
24 | 1.000000 | 520 | 123.956023 | 1.000000 | (17, 7674221) | 17 | 7674221 | 7 | TP53 |
27 | 0.777019 | 518 | 121.843511 | 0.875674 | (17, 7673776) | 17 | 7673776 | 6 | TP53 |
31 | 0.999499 | 8 | 2.092742 | 0.781609 | (12, 2685853) | 12 | 2685853 | 6 | CACNA1C |
34 | 0.831944 | 519 | 120.805344 | 0.871324 | (17, 7674230) | 17 | 7674230 | 5 | TP53 |
35 | 0.795725 | 513 | 99.967557 | 0.869141 | (17, 7674945) | 17 | 7674945 | 5 | TP53 |
37 | 0.718190 | 464 | 182.276718 | 0.845446 | (7, 55154129) | 7 | 55154129 | 5 | EGFR |
45 | 0.559769 | 524 | 2793.311069 | 0.797976 | (6, 29942825) | 6 | 29942825 | 5 | HLA |
47 | 0.922913 | 368 | 21.582061 | 0.798079 | (6, 29944102) | 6 | 29944102 | 5 | HLA |
48 | 0.802333 | 371 | 21.608779 | 0.777366 | (6, 29944103) | 6 | 29944103 | 5 | HLA |
49 | 0.942262 | 524 | 4361.543893 | 0.960710 | (6, 29944168) | 6 | 29944168 | 5 | HLA |
60 | 0.984222 | 315 | 16.535373 | 0.803603 | (6, 29944067) | 6 | 29944067 | 4 | HLA |
62 | 0.916660 | 399 | 759.471660 | 0.916426 | (6, 31270233) | 6 | 31270233 | 4 | HLA |
65 | 0.916102 | 524 | 3459.543893 | 0.740310 | (6, 29944144) | 6 | 29944144 | 4 | HLA |
67 | 1.000000 | 516 | 115.853053 | 1.000000 | (17, 7676044) | 17 | 7676044 | 4 | TP53 |
74 | 0.026316 | 436 | 162.306796 | 0.784810 | (6, 32664926) | 6 | 32664926 | 3 | HLA |
76 | 0.815908 | 493 | 215.211832 | 0.928361 | (6, 32661393) | 6 | 32661393 | 3 | HLA |
82 | 0.995522 | 517 | 1391.395753 | 0.989221 | (6, 29942795) | 6 | 29942795 | 3 | HLA |
83 | 0.965023 | 510 | 2437.399610 | 0.856497 | (6, 29942858) | 6 | 29942858 | 3 | HLA |
90 | 0.858112 | 523 | 66.984733 | 0.899413 | (3, 179234284) | 3 | 179234284 | 3 | PIK3CA |
91 | 0.844742 | 513 | 88.143130 | 0.874750 | (17, 7674888) | 17 | 7674888 | 3 | TP53 |
92 | 1.000000 | 517 | 117.526718 | 1.000000 | (17, 7674256) | 17 | 7674256 | 3 | TP53 |
97 | 0.671406 | 501 | 94.854127 | 0.794175 | (17, 31350209) | 17 | 31350209 | 3 | NF1 |
98 | 0.530668 | 518 | 380.994275 | 0.738304 | (5, 68295269) | 5 | 68295269 | 3 | PIK3R1 |