%matplotlib inline
import time
import pandas as pd
import os
from matplotlib_venn import venn3
# CONSTANTS
DEBUG = True
DBDIR = os.path.join("..", "databases")
# HELPER FUNCTIONS
def debug(*args):
""" Prints to stdout only if DEBUG is True. """
if DEBUG:
print(*args)
def timed(function):
""" A decorator that prints the running time of the function call. """
def inner(*args, **kwargs):
start_time = time.time()
result = function(*args, **kwargs)
print("{} seconds".format(time.time() - start_time))
return result
return inner
#@timed
def read_csv(path, sep='\t', filter=None, chunksize=100000, skiprows=None):
"""
Reads a csv file from a given filepath into a pandas Data frame.
Files are iterated over with a chunksize of 100000 by default.
Rows can be filtered so that only rows with certain values in a column pass.
All rows that do not pass the filter are immediately discarded.
When using this to load eQTLs, you can include a tuple for filtering based on gene names
(such as VIP genes from PharmGKB).
The first element of the tuple is the name of the field to be filtered on,
the second element is a list of values that pass the filter.
"""
print("Reading file: {}...".format(path))
iterator = pd.read_csv(path, sep=sep, chunksize=chunksize, skiprows=skiprows)
if filter is None:
df = pd.concat(iterator)
else:
df = pd.concat([chunk[chunk[filter[0]].isin(filter[1])] for chunk in iterator], ignore_index=True)
print("Returning {} rows from {}".format(len(df), path))
return df
def read_xls(path, sheet=0, filter=None, skiprows=0):
df = pd.read_excel(path, sheet, skiprows=skiprows)
if filter is not None:
df = df[df[filter[0]].isin(filter[1])]
df = df.reset_index(drop=True)
print("Returning {} rows from {} ({})".format(len(df), path, sheet))
return df
def pass_below(df, filter):
"""
Filters a pandas Data frame, so that all rows where column filter[0] < filter[1] pass.
E.g. filter(df, ('p-value', 1e-8)) returns a dataframe consisting of all rows in df where
the value of column 'p-value' is below 1e-8.
"""
df = df[df[filter[0]] < filter[1]]
df = df.reset_index(drop=True)
print("{} rows left after filtering by column '{}' < {}".format(len(df), filter[0], filter[1]))
return df
Very Important Pharmacogenes (VIP genes) are genes that are involved in drug response and have a summary in PharmGKB.
# dataframe containing the data for VIP genes
vip_genes_df = read_csv(os.path.join(DBDIR, 'pharmgkb clinical annotations', 'genes.tsv'), filter=('Is VIP', [True]))
# set of the VIP gene names
vip_genes = set(vip_genes_df['Symbol'])
vip_genes_df.head()
Reading file: ..\databases\pharmgkb clinical annotations\genes.tsv... Returning 54 rows from ..\databases\pharmgkb clinical annotations\genes.tsv
PharmGKB Accession Id | Entrez Id | Ensembl Id | Name | Symbol | Alternate Names | Alternate Symbols | Is VIP | Has Variant Annotation | Cross-references | Has CPIC Dosing Guideline | Chromosome | Chromosomal Start | Chromosomal Stop | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | PA109 | 1080 | ENSG00000001626 | cystic fibrosis transmembrane conductance regu... | CFTR | ATP-binding cassette sub-family C, member 7, | ABC35,"CFTR/MRP","MRP7","TNR-CFTR","dJ760C5.1", | True | True | HumanCycGene:HS00075,alfred:LO000194O,ctd:1080... | True | chr7 | 117110017 | 117311719 |
1 | PA117 | 1312 | ENSG00000093010 | catechol-O-methyltransferase | COMT | NaN | NaN | True | True | HumanCycGene:HS01791,alfred:LO000195P,ctd:1312... | False | chr22 | 019919263 | 019960498 |
2 | PA121 | 1548 | ENSG00000198077 | cytochrome P450, family 2, subfamily A, polype... | CYP2A6 | NaN | CPA6,"CYP2A", | True | True | HumanCycGene:HS10343,alfred:LO027806X,ctd:1548... | False | chr19 | 041346443 | 041366352 |
3 | PA123 | 1555 | ENSG00000197408 | cytochrome P450, family 2, subfamily B, polype... | CYP2B6 | NaN | CPB6,"CYPIIB6", | True | True | HumanCycGene:HS09587,ctd:1555,ensembl:ENSG0000... | False | chr19 | 041487204 | 041527301 |
4 | PA124 | 1557 | ENSG00000165841 | cytochrome P450, family 2, subfamily C, polype... | CYP2C19 | NaN | CPCJ,"P450IIC19", | True | True | HumanCycGene:HS09293,alfred:LO008778E,ctd:1557... | True | chr10 | 096512463 | 096615671 |
These SNPs are from the PharmGKB website. It is a list of ~25M SNPs that are within genes and are annotated in PharmGKB. Around 70k of those SNPs correspond to VIP genes. However, the only available data in this file is the name of the SNP and identifiers for the related gene.
# SNPs in VIP genes with annotation in PharmGKB
vip_snps = read_csv(os.path.join(DBDIR, 'pharmgkb clinical annotations', 'rsid.tsv'), skiprows=1, filter=('Gene Symbols', vip_genes))
vip_snps.head()
Reading file: ..\databases\pharmgkb clinical annotations\rsid.tsv... Returning 69182 rows from ..\databases\pharmgkb clinical annotations\rsid.tsv
RSID | Gene IDs | Gene Symbols | |
---|---|---|---|
0 | rs5270 | PA293 | PTGS2 |
1 | rs5271 | PA293 | PTGS2 |
2 | rs5272 | PA293 | PTGS2 |
3 | rs5273 | PA293 | PTGS2 |
4 | rs5274 | PA293 | PTGS2 |
Load PharmGKB's clinical annotations from clinicalAnnotations.csv.
# clinical annotations in PharmGKB for VIP genes
vip_annotations = read_csv(os.path.join(DBDIR, 'pharmgkb clinical annotations', 'clinicalAnnotations.csv'), sep=',', filter=('gene', vip_genes))
# set of refSNP IDs with clinical annotation
vip_clinical_snps = set(vip_annotations.variant)
vip_annotations.head()
Reading file: ..\databases\pharmgkb clinical annotations\clinicalAnnotations.csv... Returning 673 rows from ..\databases\pharmgkb clinical annotations\clinicalAnnotations.csv
variant | gene | type | level of evidence | drugs | diseases | |
---|---|---|---|---|---|---|
0 | rs4149056 | SLCO1B1 | Toxicity/ADR | 1A | simvastatin | Muscular Diseases,Myopathy, Central Core |
1 | rs28399504 | CYP2C19 | Efficacy | 1A | clopidogrel | Acute coronary syndrome,Cardiovascular Diseases |
2 | HLA-B *57:01:01 | HLA-B | Toxicity/ADR | 1A | abacavir | Drug Hypersensitivity |
3 | rs4244285 | CYP2C19 | Efficacy | 1A | amitriptyline | NaN |
4 | CYP2C19 *1, CYP2C19 *17, CYP2C19 *2, CYP2C19 *3 | CYP2C19 | Efficacy,Toxicity/ADR | 1A | amitriptyline | NaN |
# counts of clinical annotations in VIP genes
vip_annotations_by_gene = vip_annotations.groupby(['gene', 'level of evidence'])
vip_annotations_by_gene.count().head() # summary
#print(vip_annotations_by_gene.count().to_string()) # all values as string
#from IPython.core.display import HTML, display # all values as HTML
#display(HTML(vip_annotations_by_gene.count().to_html()))
variant | type | drugs | diseases | ||
---|---|---|---|---|---|
gene | level of evidence | ||||
ABCB1 | 2A | 3 | 3 | 3 | 2 |
3 | 86 | 85 | 86 | 71 | |
4 | 28 | 28 | 28 | 6 | |
ACE | 2A | 1 | 1 | 1 | 1 |
3 | 14 | 14 | 14 | 10 |
** Westra et al. **
The eQTLs for blood have been downloaded from http://genenetwork.nl/bloodeqtlbrowser/.
The data are in a single tsv file (2012-12-21-CisAssociationsProbeLevelFDR0.5.txt), ~900k cis-eQTLs (Trans-eQTLs don't have a gene identifier, so those are not used).
vip_blood_eqtls = pass_below(read_csv(os.path.join(DBDIR, 'eqtl-blood', '2012-12-21-CisAssociationsProbeLevelFDR0.5.txt'), filter=('HUGO', vip_genes)),
('PValue', 1e-8))
vip_blood_eqtls.head()
Reading file: ..\databases\eqtl-blood\2012-12-21-CisAssociationsProbeLevelFDR0.5.txt... Returning 2175 rows from ..\databases\eqtl-blood\2012-12-21-CisAssociationsProbeLevelFDR0.5.txt 760 rows left after filtering by column 'PValue' < 1e-08
PValue | SNPName | SNPChr | SNPChrPos | ProbeName | ProbeChr | ProbeCenterChrPos | CisTrans | SNPType | AlleleAssessed | OverallZScore | DatasetsWhereSNPProbePairIsAvailableAndPassesQC | DatasetsZScores | HUGO | FDR | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 9.813428e-198 | rs407257 | 22 | 22676550 | 7400537 | 22 | 22706401 | cis | C/G | G | -69.193035 | EGCUT,SHIP_TREND,Groningen-HT12,Groningen-H8v2... | -30.195047,-30.32998,-32.38628,-12.45152,-25.1... | GSTT1 | 0 |
1 | 9.813428e-198 | rs5760147 | 22 | 22664948 | 7400537 | 22 | 22706401 | cis | A/C | C | -59.187917 | EGCUT,SHIP_TREND,Groningen-HT12,Groningen-H8v2... | -27.385191,-25.535507,-27.544918,-9.413573,-20... | GSTT1 | 0 |
2 | 9.813428e-198 | rs4822466 | 22 | 22642204 | 7400537 | 22 | 22706401 | cis | A/G | G | -57.828173 | EGCUT,SHIP_TREND,Groningen-HT12,Groningen-H8v2... | -27.614359,-23.631924,-28.399845,-11.038388,-2... | GSTT1 | 0 |
3 | 9.813428e-198 | rs5760176 | 22 | 22732321 | 7400537 | 22 | 22706401 | cis | A/G | G | -57.720846 | EGCUT,SHIP_TREND,Groningen-HT12,Groningen-H8v2... | -26.823101,-22.730206,-28.169338,-10.8359165,-... | GSTT1 | 0 |
4 | 9.813428e-198 | rs4822458 | 22 | 22595659 | 7400537 | 22 | 22706401 | cis | T/C | C | -56.486053 | EGCUT,SHIP_TREND,Groningen-HT12,Groningen-H8v2... | -27.78793,-23.153557,-27.657444,-10.880648,-19... | GSTT1 | 0 |
Show counts of different genes in filtered blood eQTL list.
vip_blood_eqtls.groupby(['HUGO'])['HUGO'].count()
HUGO ADRB2 11 BRCA1 179 CYP2J2 45 DPYD 18 F5 84 GSTP1 50 GSTT1 220 KCNH2 3 NQO1 55 PTGS2 33 SLC19A1 6 TPMT 56 Name: HUGO, dtype: int64
Schröder et al.
Genomics of ADME gene expression: mapping expression quantitative trait loci relevant for absorption, distribution, metabolism and excretion of drugs in human liver (http://www.nature.com/tpj/journal/v13/n1/full/tpj201144a.html), Table S2.
The data are in three worksheets of a single xls file (tpj201144x3.xls, with sheets A, B and C), ~1k eQTLs altogether.
def read_adme_xls(sheet):
return read_xls(os.path.join(DBDIR, 'eqtl-liver', 'tpj201144x3.xls'), sheet=sheet, skiprows=7, filter=('SNP-gene HGNC', vip_genes))
vip_liver_eqtls_1 = pass_below(pd.concat([read_adme_xls('Table S2 (A)'), read_adme_xls('Table S2 (B)'), read_adme_xls('Table S2 (C)')]),
('p-value', 1e-8))
vip_liver_eqtls_1.head()
Returning 1 rows from ..\databases\eqtl-liver\tpj201144x3.xls (Table S2 (A)) Returning 0 rows from ..\databases\eqtl-liver\tpj201144x3.xls (Table S2 (B)) Returning 4 rows from ..\databases\eqtl-liver\tpj201144x3.xls (Table S2 (C)) 5 rows left after filtering by column 'p-value' < 1e-08
Description | Description.1 | Linkage (r2) | SNP ID | SNP location | SNP-chromosome | SNP-gene HGNC | SNP-gene RefSeq ID | SNPs Seattle study | Seattle p-value | Trait HGNC | Trait RefSeq ID | Trait-chromosome | p-value | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Homo sapiens cytochrome P450, family 3, subfam... | Homo sapiens cytochrome P450, family 3, subfam... | NaN | rs10242455 | DOWNSTREAM | 7 | CYP3A5 | NM_000777.2 | rs10242455 | 3.350000e-22 | CYP3A5 | NM_000777.2 | 7 | 3.360000e-10 |
1 | Homo sapiens enolase superfamily member 1 (ENO... | Homo sapiens thymidylate synthetase (TYMS), mRNA. | NaN | rs2847153 | INTRONIC | 18 | TYMS | NM_001071.1 | NaN | NaN | ENOSF1 | NM_017512.2 | 18 | 1.430000e-11 |
2 | Homo sapiens nudix (nucleoside diphosphate lin... | Homo sapiens glutathione S-transferase pi (GST... | NaN | rs1695 | NaN | 11 | GSTP1 | NM_000852.2 | NaN | NaN | NUDT8 | NM_181843.1 | 11 | 1.940000e-14 |
3 | Homo sapiens nudix (nucleoside diphosphate lin... | Homo sapiens glutathione S-transferase pi (GST... | NaN | rs6591256 | UPSTREAM | 11 | GSTP1 | NM_000852.2 | NaN | NaN | NUDT8 | NM_181843.1 | 11 | 5.900000e-10 |
4 | Homo sapiens vitamin K epoxide reductase compl... | Homo sapiens vitamin K epoxide reductase compl... | NaN | rs7294 | 3PRIME_UTR | 16 | VKORC1 | NM_024006.4 | NaN | NaN | VKORC1 | NM_024006.4 | 16 | 1.470000e-09 |
Innocenti et al.
Identification, Replication, and Functional Fine-Mapping of Expression Quantitative Trait Loci in Primary Human Liver Tissue (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3102751/), Table S1.
The data are in a single tsv file (pgen.1002078.s013), ~12k eQTLs.
vip_liver_eqtls_2 = pass_below(read_csv(os.path.join(DBDIR, 'eqtl-liver', 'journal.pgen.1002078.s013'), filter=('Gene', vip_genes)),
('UC Best RS UC lm pvalue', 1e-4))
vip_liver_eqtls_2.head()
Reading file: ..\databases\eqtl-liver\journal.pgen.1002078.s013... Returning 43 rows from ..\databases\eqtl-liver\journal.pgen.1002078.s013 14 rows left after filtering by column 'UC Best RS UC lm pvalue' < 0.0001
Gene | Chr | Strand | TSS coord | UC gene cluster | UC probe count in cluster | UC probe ID | UC dbSnp131/1KG in probe count | UC Exon number | UC refseq transcript ID | ... | UC Best RS Merck alleles | UC Best RS Merck MAF | UC Best RS Merck imputed bool | UC Best RS Merck lm beta | UC Best RS Merck pvalue | UC Best RS UC n=60 simulations | UC Best RS UC genotype variance | UC Best RS UW genotype variance | UC Best RS Merck genotype variance | UC Best RS hapmap variance | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | MTHFR | 1 | - | 11788747 | 3 | 1 | A_23_P404045 | 3 | 7 | NM_005957 | ... | G.T | 0.7085 | 1 | 0.009211 | 0.286432 | 0.85 | 0.429156 | 0.274213 | 0.405950 | 0.232063 |
1 | DPYD | 1 | - | 98159203 | 1 | 2 | NaN | NaN | NaN | NaN | ... | C.T | 0.9195 | 1 | -0.020213 | 0.381781 | 0.62 | 0.160711 | 0.066100 | 0.157506 | 0.058594 |
2 | F5 | 1 | - | 167822393 | 1 | 2 | NaN | NaN | NaN | NaN | ... | A.C | 0.4035 | 1 | 0.003945 | 0.773724 | 0.94 | 0.442370 | 0.259345 | 0.469296 | 0.227500 |
3 | P2RY12 | 3 | - | 152585234 | 1 | 1 | A_23_P143902 | 2 | 2 | NM_022788 | ... | G.A | 0.5950 | 1 | -0.000442 | 0.963123 | 0.61 | 0.475411 | 0.374225 | 0.493117 | 0.232687 |
4 | SLC22A1 | 6 | + | 160462852 | 1 | 1 | A_23_P145569 | 2 | 7 | NM_153187 | ... | A.G | 0.6210 | 1 | -0.096047 | 0.000010 | 0.81 | 0.486203 | 0.468644 | 0.489380 | 0.236527 |
5 rows × 45 columns
Kabakchiev et al.
Expression Quantitative Trait Loci Analysis Identifies Associations Between Genotype and Gene Expression in Human Intestine (http://www.sciencedirect.com/science/article/pii/S001650851300293X), Supplementary Table 1.
The data are in mmc1.xls on worksheet '50 Kb', ~14k Cis eQTLs.
vip_intestine_eqtls = pass_below(read_xls(os.path.join(DBDIR, 'eqtl-intestine', 'mmc1.xls'), filter=('Gene', vip_genes)),
('P-value', 1e-8))
vip_intestine_eqtls.head()
Returning 128 rows from ..\databases\eqtl-intestine\mmc1.xls (0) 27 rows left after filtering by column 'P-value' < 1e-08
Gene | Gene Chromosome | SNP | SNP Chromosome | SNP Position | SNP Locus | P-value | FDR P-value | |
---|---|---|---|---|---|---|---|---|
0 | VKORC1 | 16 | rs9923231 | 16 | 31107689 | 312832 | 3.709740e-21 | 3.490030e-17 |
1 | VKORC1 | 16 | rs9934438 | 16 | 31104878 | 312832 | 3.709740e-21 | 3.441560e-17 |
2 | VKORC1 | 16 | rs749671 | 16 | 31088347 | 312832 | 2.647910e-20 | 2.056600e-16 |
3 | VKORC1 | 16 | rs2359612 | 16 | 31103796 | 312832 | 3.784890e-20 | 2.633460e-16 |
4 | VKORC1 | 16 | rs14235 | 16 | 31121793 | 312845 | 6.003060e-20 | 3.855530e-16 |
def eqtl_statistics(df, rsid_column, vip_snps):
count = len(df[df[rsid_column].isin(vip_snps)])
return "Out of {} eQTLs, {} are already annotated in PharmGKB.".format(len(df), count)
# compare eQTLs and PharmGKB SNPs
print("Blood:", eqtl_statistics(vip_blood_eqtls, 'SNPName', vip_snps.RSID))
print("Liver 1:", eqtl_statistics(vip_liver_eqtls_1, 'SNP ID', vip_snps.RSID))
print("Liver 2:", eqtl_statistics(vip_liver_eqtls_2, 'UC Best RSID', vip_snps.RSID))
print("Intestines:", eqtl_statistics(vip_intestine_eqtls, 'SNP', vip_snps.RSID))
Blood: Out of 760 eQTLs, 191 are already annotated in PharmGKB. Liver 1: Out of 5 eQTLs, 4 are already annotated in PharmGKB. Liver 2: Out of 14 eQTLs, 7 are already annotated in PharmGKB. Intestines: Out of 27 eQTLs, 13 are already annotated in PharmGKB.
blood_set = set(vip_blood_eqtls['SNPName'].tolist())
liver_set = set(vip_liver_eqtls_1['SNP ID'].tolist()) | set(vip_liver_eqtls_2['UC Best RSID'])
intestine_set = set(vip_intestine_eqtls['SNP'])
list(map(len, [blood_set, liver_set, intestine_set]))
[668, 19, 27]
venn3([liver_set, intestine_set, blood_set], ('Liver', 'Intestine', 'Blood'))
<matplotlib_venn._common.VennDiagram at 0x9d81748>
intestine_set.intersection(liver_set)
{'rs2303222'}
blood_set.intersection(liver_set)
{'rs10489185', 'rs407257', 'rs6591256'}
blood_set
{'rs1002286', 'rs1003784', 'rs1006771', 'rs1007888', 'rs1011620', 'rs1018743', 'rs1018744', 'rs10236214', 'rs1042714', 'rs1042717', 'rs1042718', 'rs1042719', 'rs1043284', 'rs10449720', 'rs10449721', 'rs10483080', 'rs10489179', 'rs10489185', 'rs10493268', 'rs10493269', 'rs10493271', 'rs10493272', 'rs10493273', 'rs10493274', 'rs10493890', 'rs10517', 'rs1060915', 'rs10737274', 'rs10752983', 'rs10798068', 'rs10798069', 'rs10800456', 'rs10840', 'rs10852459', 'rs10911953', 'rs10919192', 'rs10919193', 'rs11079056', 'rs11090305', 'rs1112183', 'rs11165783', 'rs11168066', 'rs11207542', 'rs11207543', 'rs112272', 'rs1129067', 'rs11572227', 'rs11572235', 'rs11572305', 'rs11572307', 'rs11572321', 'rs11601325', 'rs11603991', 'rs11650913', 'rs11651623', 'rs11653069', 'rs11653231', 'rs11653253', 'rs11653460', 'rs11656097', 'rs11657053', 'rs11657835', 'rs11657883', 'rs11658754', 'rs11703080', 'rs11703421', 'rs11704097', 'rs11704313', 'rs11705032', 'rs11862697', 'rs11871636', 'rs11912715', 'rs11912883', 'rs11913427', 'rs11959615', 'rs11964992', 'rs11967409', 'rs12019048', 'rs12064238', 'rs12064248', 'rs12064386', 'rs12072111', 'rs12084893', 'rs12096779', 'rs12097898', 'rs12129287', 'rs12131397', 'rs12157360', 'rs12189658', 'rs12199316', 'rs12201199', 'rs12402407', 'rs12402701', 'rs12403625', 'rs12406092', 'rs12421329', 'rs12483377', 'rs12483553', 'rs12485146', 'rs12516', 'rs12628303', 'rs12628766', 'rs12664350', 'rs12720541', 'rs12720557', 'rs12738123', 'rs12743368', 'rs12744335', 'rs12755775', 'rs12790798', 'rs12793832', 'rs12800028', 'rs12936831', 'rs12944462', 'rs1318651', 'rs1324490', 'rs1324491', 'rs13329844', 'rs1399291', 'rs1399323', 'rs140188', 'rs140199', 'rs140289', 'rs1432622', 'rs1474590', 'rs1531514', 'rs1557570', 'rs1557572', 'rs1567832', 'rs1569479', 'rs1569480', 'rs1573277', 'rs16880220', 'rs16940', 'rs16941', 'rs16942', 'rs16958965', 'rs16958997', 'rs16959266', 'rs17003912', 'rs17003930', 'rs17003998', 'rs17004044', 'rs17004046', 'rs17004047', 'rs17004049', 'rs17004785', 'rs17004811', 'rs17004824', 'rs17004830', 'rs17119786', 'rs17430460', 'rs17431828', 'rs17471640', 'rs17501521', 'rs17521545', 'rs17525350', 'rs176156', 'rs1799949', 'rs1799966', 'rs183341', 'rs1871034', 'rs1871042', 'rs1871043', 'rs1886330', 'rs1892714', 'rs1892715', 'rs1894697', 'rs1894698', 'rs1894702', 'rs1973646', 'rs1977064', 'rs1984309', 'rs1993839', 'rs1997664', 'rs2000467', 'rs2000469', 'rs2000470', 'rs2012124', 'rs2025500', 'rs2037075', 'rs2040442', 'rs2040444', 'rs20417', 'rs2051198', 'rs2056048', 'rs2070456', 'rs2070460', 'rs2070464', 'rs2070467', 'rs2070834', 'rs2073387', 'rs2073388', 'rs2073389', 'rs2073390', 'rs2073391', 'rs2073395', 'rs2076074', 'rs2082382', 'rs2082395', 'rs2088747', 'rs2116715', 'rs2137975', 'rs2142760', 'rs2143417', 'rs214599', 'rs214600', 'rs214606', 'rs214607', 'rs214610', 'rs214611', 'rs214613', 'rs214614', 'rs214617', 'rs214619', 'rs214620', 'rs214621', 'rs214622', 'rs2154593', 'rs2154594', 'rs2175957', 'rs2186364', 'rs2186366', 'rs2186370', 'rs2205895', 'rs2205896', 'rs2213873', 'rs2223286', 'rs2223303', 'rs2223330', 'rs2227245', 'rs2235798', 'rs2236619', 'rs2236620', 'rs2236621', 'rs2236622', 'rs2236762', 'rs2236870', 'rs2236871', 'rs2239852', 'rs2239854', 'rs2267030', 'rs2267031', 'rs2267032', 'rs2267033', 'rs2267034', 'rs2267035', 'rs2267036', 'rs2267045', 'rs2267046', 'rs2267051', 'rs2267059', 'rs2267060', 'rs2267062', 'rs2267063', 'rs2267064', 'rs2267068', 'rs2267070', 'rs2271539', 'rs2271573', 'rs2271574', 'rs2280273', 'rs2280274', 'rs2282475', 'rs2283807', 'rs2291960', 'rs2292595', 'rs2294946', 'rs2294947', 'rs2294948', 'rs2294950', 'rs2298862', 'rs2298903', 'rs2298905', 'rs2304526', 'rs2330625', 'rs2330626', 'rs2330635', 'rs2330638', 'rs2370141', 'rs2400707', 'rs2420369', 'rs2420378', 'rs2509713', 'rs2514022', 'rs2514027', 'rs2518462', 'rs2518465', 'rs2667945', 'rs2670854', 'rs2739330', 'rs2838951', 'rs2842936', 'rs2842941', 'rs2876476', 'rs2877178', 'rs291592', 'rs291593', 'rs2917667', 'rs2917669', 'rs2917681', 'rs2917683', 'rs2937125', 'rs2937126', 'rs2937127', 'rs2965753', 'rs2965757', 'rs2965759', 'rs3092988', 'rs3092994', 'rs323496', 'rs365237', 'rs368588', 'rs372518', 'rs3737559', 'rs3738474', 'rs3753305', 'rs3758938', 'rs3765088', 'rs3765640', 'rs3766119', 'rs3766120', 'rs3766121', 'rs3766126', 'rs3768003', 'rs3785075', 'rs3788362', 'rs3790084', 'rs3790087', 'rs3790088', 'rs3820060', 'rs382571', 'rs3826153', 'rs3826154', 'rs3884794', 'rs3917744', 'rs3917750', 'rs3917751', 'rs3917768', 'rs3917775', 'rs3917782', 'rs3917786', 'rs3917854', 'rs394328', 'rs3950989', 'rs401841', 'rs405597', 'rs407257', 'rs4072775', 'rs4074444', 'rs4084113', 'rs409918', 'rs4140655', 'rs4147581', 'rs4148959', 'rs422674', 'rs4239149', 'rs4247109', 'rs4262575', 'rs4294009', 'rs4320958', 'rs4388726', 'rs440163', 'rs4428104', 'rs443759', 'rs4442796', 'rs4449636', 'rs4461358', 'rs4521150', 'rs4540684', 'rs455055', 'rs4601132', 'rs4608327', 'rs4650701', 'rs4656188', 'rs4656687', 'rs4725984', 'rs4793187', 'rs4793191', 'rs4793194', 'rs4793197', 'rs4793213', 'rs4820571', 'rs4822441', 'rs4822442', 'rs4822450', 'rs4822451', 'rs4822453', 'rs4822454', 'rs4822458', 'rs4822461', 'rs4822466', 'rs4822469', 'rs4891', 'rs4930430', 'rs4930431', 'rs4930461', 'rs4985527', 'rs4985547', 'rs4986850', 'rs4987280', 'rs4987353', 'rs4987358', 'rs528854', 'rs5751739', 'rs5751741', 'rs5751745', 'rs5751760', 'rs5751761', 'rs5751777', 'rs5751803', 'rs5751813', 'rs575959', 'rs5760020', 'rs5760023', 'rs5760033', 'rs5760044', 'rs5760045', 'rs5760058', 'rs5760060', 'rs5760061', 'rs5760062', 'rs5760065', 'rs5760071', 'rs5760075', 'rs5760090', 'rs5760093', 'rs5760095', 'rs5760096', 'rs5760098', 'rs5760101', 'rs5760102', 'rs5760103', 'rs5760147', 'rs5760176', 'rs5760189', 'rs5760244', 'rs596603', 'rs5996631', 'rs5996633', 'rs5996634', 'rs5996635', 'rs5996651', 'rs5996663', 'rs5996674', 'rs5996675', 'rs6003904', 'rs6003907', 'rs6003909', 'rs6003933', 'rs6003939', 'rs6003959', 'rs6003970', 'rs6003980', 'rs6004011', 'rs6004014', 'rs6004094', 'rs6028', 'rs6030', 'rs609363', 'rs611663', 'rs6127', 'rs6136', 'rs614080', 'rs625978', 'rs638140', 'rs6427202', 'rs6499249', 'rs6499259', 'rs6503721', 'rs6519476', 'rs6519501', 'rs656652', 'rs6591245', 'rs6591251', 'rs6591252', 'rs6591256', 'rs6656909', 'rs6657450', 'rs6662176', 'rs6662687', 'rs6663628', 'rs6670407', 'rs6670678', 'rs6678795', 'rs6681231', 'rs6681958', 'rs6682481', 'rs6683515', 'rs6685578', 'rs6691048', 'rs6701330', 'rs67530', 'rs684928', 'rs689452', 'rs6908777', 'rs6909725', 'rs6910196', 'rs691144', 'rs6912910', 'rs6915893', 'rs6916807', 'rs6920057', 'rs6928437', 'rs6932667', 'rs7103632', 'rs7106423', 'rs7114510', 'rs7124513', 'rs7184995', 'rs7218454', 'rs7223062', 'rs724508', 'rs726706', 'rs7278425', 'rs7291786', 'rs7359387', 'rs738794', 'rs738796', 'rs738798', 'rs738801', 'rs738803', 'rs738804', 'rs738806', 'rs738807', 'rs738809', 'rs738810', 'rs742127', 'rs743359', 'rs749174', 'rs7519192', 'rs752221', 'rs752223', 'rs7525018', 'rs7537234', 'rs7542088', 'rs7542180', 'rs7547468', 'rs7550380', 'rs7554566', 'rs760694', 'rs762803', 'rs766400', 'rs7744132', 'rs7744164', 'rs7754932', 'rs775990', 'rs7764074', 'rs7938563', 'rs7941395', 'rs7941648', 'rs7948073', 'rs7952081', 'rs799905', 'rs799906', 'rs799912', 'rs799916', 'rs799917', 'rs799923', 'rs8057938', 'rs8067269', 'rs8070085', 'rs8070179', 'rs8076790', 'rs8137222', 'rs8137732', 'rs8138555', 'rs8138673', 'rs8138769', 'rs8138827', 'rs8140489', 'rs8140505', 'rs8141314', 'rs8141342', 'rs8141627', 'rs8176091', 'rs8176092', 'rs8176104', 'rs8176126', 'rs8176140', 'rs8176193', 'rs8176194', 'rs8176198', 'rs8176199', 'rs8176234', 'rs8176242', 'rs8176256', 'rs8176257', 'rs8176265', 'rs8176273', 'rs8176289', 'rs8176296', 'rs8176297', 'rs8176318', 'rs8176322', 'rs8176323', 'rs8179309', 'rs873833', 'rs875643', 'rs879756', 'rs915590', 'rs915945', 'rs916438', 'rs917570', 'rs928782', 'rs9332542', 'rs9332546', 'rs9332553', 'rs9332554', 'rs9332575', 'rs9332630', 'rs9332640', 'rs9332665', 'rs9367982', 'rs9436192', 'rs9436611', 'rs9436612', 'rs9477630', 'rs9477643', 'rs947895', 'rs9608184', 'rs9608193', 'rs9608194', 'rs9608195', 'rs9608196', 'rs9608199', 'rs9608201', 'rs9608202', 'rs9608205', 'rs9608216', 'rs9612450', 'rs9612452', 'rs9612453', 'rs9612459', 'rs9612460', 'rs9612473', 'rs9612475', 'rs9612476', 'rs9612478', 'rs9612481', 'rs9612483', 'rs9612485', 'rs9612496', 'rs9612498', 'rs9612503', 'rs9620328', 'rs9624335', 'rs9624364', 'rs9624387', 'rs9624412', 'rs9640171', 'rs9888980', 'rs9888981', 'rs9888993', 'rs9889219', 'rs9911630', 'rs9921652', 'rs9922223', 'rs9924271', 'rs9924953', 'rs9925037', 'rs9932486', 'rs9934681', 'rs993568', 'rs9936464', 'rs9937018', 'rs9938717', 'rs9940008'}
liver_set
{'rs10242455', 'rs10489185', 'rs12489121', 'rs133337', 'rs1695', 'rs174697', 'rs1801131', 'rs2297595', 'rs2303222', 'rs2733753', 'rs2740558', 'rs2847153', 'rs3788190', 'rs3852872', 'rs407257', 'rs4239510', 'rs628031', 'rs6591256', 'rs7294'}
intestine_set
{'rs10786189', 'rs11572093', 'rs11862744', 'rs11865038', 'rs12597511', 'rs1341164', 'rs14235', 'rs1549293', 'rs1592037', 'rs17839567', 'rs1934955', 'rs1934957', 'rs1934982', 'rs1934983', 'rs1978487', 'rs2071426', 'rs2185571', 'rs2303222', 'rs2359612', 'rs2884737', 'rs4468641', 'rs6565217', 'rs6583967', 'rs749671', 'rs8050894', 'rs9923231', 'rs9934438'}