from IPython.display import HTML
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib_venn import venn3, venn3_circles, venn3_unweighted
import seaborn
%pylab inline
Populating the interactive namespace from numpy and matplotlib
#These are defined by the way annovar defines precedence. I found empirically that stop_gain > frame_shift in annovar, hence the reverse
precedence_dict = {
"splicing_variant": 1,
"frameshift_variant": 4,
"stop_gained": 2,
"stop_lost": 3,
"inframe_variant": 5,
"nonsynonymous_variant": 6,
"synonymous_variant": 7,
"5_prime_UTR_variant": 8,
"3_prime_UTR_variant": 9,
"intron_variant": 10,
"upstream_gene_variant": 11,
"downstream_gene_variant": 12,
"intergenic_variant": 13,
"intron_variant": 14,
"upstream_gene_variant": 15,
"regulatory_region_variant": 16,
"ignored": 17
}
def ranked(col):
return max(col, key=lambda val: -1*precedence_dict[val])
with pd.get_store('classified_variant_store.h5') as store:
snpeff_subset = store.get("cftr_snpeff_ensembl_subset")
ensembl_symbol_mapping = {"CFTR":"ENSG00000001626",
"AC000111.3": "ENSG00000232661",
"AC000111.4":"ENSG00000237974",
"AC000111.5": "ENSG00000234001",
"AC000111.6": "ENSG00000083622",
"CTTNBP2": "ENSG00000077063",
"":""}
snpeff_subset["EnsemblGene"] = snpeff_subset["Gene_Name"].apply(lambda x: ensembl_symbol_mapping[x])
grouped_snpeff_subset = snpeff_subset.groupby(["EnsemblGene", "POS", "REF", "ALT"])
grouped_snpeff_subset = grouped_snpeff_subset.agg({"normalized_so_snpeff": ranked})
grouped_snpeff_subset = grouped_snpeff_subset.rename(columns={"normalized_so_snpeff": "normalized_so_snpeff_max"}).reset_index()
grouped_snpeff_subset = pd.merge(grouped_snpeff_subset, snpeff_subset, how="left", on=["POS", "REF", "ALT", "EnsemblGene"])
grouped_snpeff_subset = grouped_snpeff_subset[grouped_snpeff_subset["normalized_so_snpeff_max"] == grouped_snpeff_subset["normalized_so_snpeff"]]
#kludge ties are broken by taking the first element in the group (ie randomly; this should only really effect the transcript level comparisons, ie hgvs etc)
grouped_snpeff_subset = grouped_snpeff_subset.groupby(["EnsemblGene", "POS", "REF", "ALT"]).first()
agg_snpeff = grouped_snpeff_subset.reset_index()
del agg_snpeff["normalized_so_snpeff_max"]
del grouped_snpeff_subset
del snpeff_subset
agg_snpeff.rename(columns={"EnsemblGene":"Gene"}, inplace=True)
agg_snpeff[100000:100050]
Gene | POS | REF | ALT | ID | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | |
---|---|---|---|---|---|---|---|---|---|---|
100000 | ENSG00000001626 | 117235027 | C | G | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Thr784Arg/c.2351C>G | nonsynonymous_variant |
100001 | ENSG00000001626 | 117235027 | C | T | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Thr784Ile/c.2351C>T | nonsynonymous_variant |
100002 | ENSG00000001626 | 117235027 | CA | C | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X784X/c.2352*>-A | frameshift_variant |
100003 | ENSG00000001626 | 117235027 | CAT | C | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X784X/c.2352*>-AT | frameshift_variant |
100004 | ENSG00000001626 | 117235027 | CATG | C | . | CODON_CHANGE_PLUS_CODON_DELETION | CFTR | ENST00000454343 | p.X784Thr/c.2352*>-ATG | inframe_variant |
100005 | ENSG00000001626 | 117235028 | A | AA | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2353*>+A | frameshift_variant |
100006 | ENSG00000001626 | 117235028 | A | AATC | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Trp785X/c.2353*>+ATC | inframe_variant |
100007 | ENSG00000001626 | 117235028 | A | AC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2353*>+C | frameshift_variant |
100008 | ENSG00000001626 | 117235028 | A | ACA | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2353*>+CA | frameshift_variant |
100009 | ENSG00000001626 | 117235028 | A | ACAG | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Trp785X/c.2353*>+CAG | inframe_variant |
100010 | ENSG00000001626 | 117235028 | A | ACG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2353*>+CG | frameshift_variant |
100011 | ENSG00000001626 | 117235028 | A | AG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2353*>+G | frameshift_variant |
100012 | ENSG00000001626 | 117235028 | A | AT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2353*>+T | frameshift_variant |
100013 | ENSG00000001626 | 117235028 | A | C | . | SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Thr784Thr/c.2352A>C | synonymous_variant |
100014 | ENSG00000001626 | 117235028 | A | G | . | SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Thr784Thr/c.2352A>G | synonymous_variant |
100015 | ENSG00000001626 | 117235028 | A | T | . | SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Thr784Thr/c.2352A>T | synonymous_variant |
100016 | ENSG00000001626 | 117235028 | AT | A | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X785X/c.2353*>-T | frameshift_variant |
100017 | ENSG00000001626 | 117235028 | ATG | A | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X785X/c.2353*>-TG | frameshift_variant |
100018 | ENSG00000001626 | 117235028 | ATGG | A | . | CODON_DELETION | CFTR | ENST00000454343 | p.Trp785X/c.2353*>-TGG | inframe_variant |
100019 | ENSG00000001626 | 117235029 | T | A | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Trp785Arg/c.2353T>A | nonsynonymous_variant |
100020 | ENSG00000001626 | 117235029 | T | C | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Trp785Arg/c.2353T>C | nonsynonymous_variant |
100021 | ENSG00000001626 | 117235029 | T | G | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Trp785Gly/c.2353T>G | nonsynonymous_variant |
100022 | ENSG00000001626 | 117235029 | T | TA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Trp785*/c.2354*>+A | stop_gained |
100023 | ENSG00000001626 | 117235029 | T | TC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2354*>+C | frameshift_variant |
100024 | ENSG00000001626 | 117235029 | T | TG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2354*>+G | frameshift_variant |
100025 | ENSG00000001626 | 117235029 | T | TGCT | . | CODON_CHANGE_PLUS_CODON_INSERTION | CFTR | ENST00000454343 | p.Trp785X/c.2354*>+GCT | inframe_variant |
100026 | ENSG00000001626 | 117235029 | T | TT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2354*>+T | frameshift_variant |
100027 | ENSG00000001626 | 117235029 | T | TTC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2354*>+TC | frameshift_variant |
100028 | ENSG00000001626 | 117235029 | T | TTG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2354*>+TG | frameshift_variant |
100029 | ENSG00000001626 | 117235029 | T | TTGA | . | CODON_CHANGE_PLUS_CODON_INSERTION | CFTR | ENST00000454343 | p.Trp785X/c.2354*>+TGA | inframe_variant |
100030 | ENSG00000001626 | 117235029 | TG | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X785X/c.2354*>-G | frameshift_variant |
100031 | ENSG00000001626 | 117235029 | TGG | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X785X/c.2354*>-GG | frameshift_variant |
100032 | ENSG00000001626 | 117235029 | TGGA | T | . | CODON_CHANGE_PLUS_CODON_DELETION | CFTR | ENST00000454343 | p.X785Tyr/c.2354*>-GGA | inframe_variant |
100033 | ENSG00000001626 | 117235030 | G | A | . | STOP_GAINED | CFTR | ENST00000454343 | p.Trp785*/c.2354G>A | stop_gained |
100034 | ENSG00000001626 | 117235030 | G | C | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Trp785Ser/c.2354G>C | nonsynonymous_variant |
100035 | ENSG00000001626 | 117235030 | G | GA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Trp785*/c.2355*>+A | stop_gained |
100036 | ENSG00000001626 | 117235030 | G | GAT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Trp785*/c.2355*>+AT | stop_gained |
100037 | ENSG00000001626 | 117235030 | G | GC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2355*>+C | frameshift_variant |
100038 | ENSG00000001626 | 117235030 | G | GG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2355*>+G | frameshift_variant |
100039 | ENSG00000001626 | 117235030 | G | GGC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2355*>+GC | frameshift_variant |
100040 | ENSG00000001626 | 117235030 | G | GT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Trp785X/c.2355*>+T | frameshift_variant |
100041 | ENSG00000001626 | 117235030 | G | GTGA | . | CODON_CHANGE_PLUS_CODON_INSERTION | CFTR | ENST00000454343 | p.Trp785X/c.2355*>+TGA | inframe_variant |
100042 | ENSG00000001626 | 117235030 | G | GTGC | . | CODON_CHANGE_PLUS_CODON_INSERTION | CFTR | ENST00000454343 | p.Trp785X/c.2355*>+TGC | inframe_variant |
100043 | ENSG00000001626 | 117235030 | G | T | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Trp785Leu/c.2354G>T | nonsynonymous_variant |
100044 | ENSG00000001626 | 117235030 | GG | G | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X785X/c.2355*>-G | frameshift_variant |
100045 | ENSG00000001626 | 117235030 | GGA | G | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X785X/c.2355*>-GA | frameshift_variant |
100046 | ENSG00000001626 | 117235030 | GGAA | G | . | CODON_CHANGE_PLUS_CODON_DELETION | CFTR | ENST00000454343 | p.X785Cys/c.2355*>-GAA | inframe_variant |
100047 | ENSG00000001626 | 117235031 | G | A | . | STOP_GAINED | CFTR | ENST00000454343 | p.Trp785*/c.2355G>A | stop_gained |
100048 | ENSG00000001626 | 117235031 | G | C | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000454343 | p.Trp785Cys/c.2355G>C | nonsynonymous_variant |
100049 | ENSG00000001626 | 117235031 | G | GA | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Asn786X/c.2356*>+A | frameshift_variant |
50 rows × 10 columns
with pd.get_store('classified_variant_store.h5') as store:
vep_subset = store.get("cftr_vep_ensembl_subset")
del vep_subset["Feature"]
vep_subset.drop_duplicates(inplace=True)
grouped_vep_subset = vep_subset.groupby(["Gene", "POS", "REF", "ALT"])
grouped_vep_subset = grouped_vep_subset.agg({"normalized_so_vep": ranked})
grouped_vep_subset = grouped_vep_subset.rename(columns={"normalized_so_vep": "normalized_so_vep_max"}).reset_index()
grouped_vep_subset = pd.merge(grouped_vep_subset, vep_subset, how="left", on=["POS", "REF", "ALT", "Gene"])
grouped_vep_subset = grouped_vep_subset[grouped_vep_subset["normalized_so_vep_max"] == grouped_vep_subset["normalized_so_vep"]]
grouped_vep_subset = grouped_vep_subset.groupby(["Gene", "POS", "REF", "ALT"]).first()
agg_vep = grouped_vep_subset.reset_index()
del grouped_vep_subset
del vep_subset
del agg_vep["normalized_so_vep_max"]
agg_vep[80000:80023]
Gene | POS | REF | ALT | ID | Consequence | hgvs_vep | normalized_so_vep | |
---|---|---|---|---|---|---|---|---|
80000 | ENSG00000001626 | 117188850 | G | GTC | . | frameshift_variant | ENSP00000389119.1:p.Val426SerfsTer14ENST000004... | frameshift_variant |
80001 | ENSG00000001626 | 117188850 | G | GTGA | . | stop_gained | ENSP00000389119.1:p.Ala425_Val426insTerENST000... | stop_gained |
80002 | ENSG00000001626 | 117188850 | G | T | . | synonymous_variant | ENST00000426809.1:c.1275G>T(p.%3D)ENST00000426... | synonymous_variant |
80003 | ENSG00000001626 | 117188850 | GG | G | . | frameshift_variant | ENSP00000389119.1:p.Val426LeufsTer13ENST000004... | frameshift_variant |
80004 | ENSG00000001626 | 117188850 | GGT | G | . | frameshift_variant | ENSP00000389119.1:p.Val426CysfsTer25ENST000004... | frameshift_variant |
80005 | ENSG00000001626 | 117188850 | GGTT | G | . | inframe_deletion | ENSP00000389119.1:p.Val426delENST00000426809.1... | inframe_variant |
80006 | ENSG00000001626 | 117188851 | G | A | . | missense_variant | ENSP00000389119.1:p.Val426IleENST00000426809.1... | nonsynonymous_variant |
80007 | ENSG00000001626 | 117188851 | G | C | . | missense_variant | ENSP00000389119.1:p.Val426LeuENST00000426809.1... | nonsynonymous_variant |
80008 | ENSG00000001626 | 117188851 | G | GA | . | frameshift_variant | ENSP00000389119.1:p.Val426AspfsTer26ENST000004... | frameshift_variant |
80009 | ENSG00000001626 | 117188851 | G | GAC | . | frameshift_variant | ENSP00000389119.1:p.Val426AspfsTer14ENST000004... | frameshift_variant |
80010 | ENSG00000001626 | 117188851 | G | GAGT | . | inframe_insertion | ENSP00000389119.1:p.Val426delinsGluPheENST0000... | inframe_variant |
80011 | ENSG00000001626 | 117188851 | G | GAT | . | frameshift_variant | ENSP00000389119.1:p.Val426AspfsTer14ENST000004... | frameshift_variant |
80012 | ENSG00000001626 | 117188851 | G | GC | . | frameshift_variant | ENSP00000389119.1:p.Val426AlafsTer26ENST000004... | frameshift_variant |
80013 | ENSG00000001626 | 117188851 | G | GG | . | frameshift_variant | ENSP00000389119.1:p.Val426GlyfsTer26ENST000004... | frameshift_variant |
80014 | ENSG00000001626 | 117188851 | G | GGTA | . | inframe_insertion | ENSP00000389119.1:p.Val426delinsGlyIleENST0000... | inframe_variant |
80015 | ENSG00000001626 | 117188851 | G | GT | . | frameshift_variant | ENSP00000389119.1:p.Ala427CysfsTer25ENST000004... | frameshift_variant |
80016 | ENSG00000001626 | 117188851 | G | T | . | missense_variant | ENSP00000389119.1:p.Val426PheENST00000426809.1... | nonsynonymous_variant |
80017 | ENSG00000001626 | 117188851 | GT | G | . | frameshift_variant | ENSP00000389119.1:p.Ala427LeufsTer12ENST000004... | frameshift_variant |
80018 | ENSG00000001626 | 117188851 | GTT | G | . | frameshift_variant | ENSP00000389119.1:p.Val426GlyfsTer25ENST000004... | frameshift_variant |
80019 | ENSG00000001626 | 117188851 | GTTG | G | . | inframe_deletion | ENSP00000389119.1:p.Val426delENST00000426809.1... | inframe_variant |
80020 | ENSG00000001626 | 117188852 | T | A | . | missense_variant | ENSP00000389119.1:p.Val426AspENST00000426809.1... | nonsynonymous_variant |
80021 | ENSG00000001626 | 117188852 | T | C | . | missense_variant | ENSP00000389119.1:p.Val426AlaENST00000426809.1... | nonsynonymous_variant |
80022 | ENSG00000001626 | 117188852 | T | G | . | missense_variant | ENSP00000389119.1:p.Val426GlyENST00000426809.1... | nonsynonymous_variant |
23 rows × 8 columns
with pd.get_store('classified_variant_store.h5') as store:
annovar_subset = store.get("cftr_annovar_ensembl_subset")
grouped_annovar_subset = annovar_subset.groupby(["Gene", "POS", "REF", "ALT"])
agg_annovar = grouped_annovar_subset.agg({"normalized_so_annovar": ranked}).reset_index()
del annovar_subset['normalized_so_annovar']
agg_annovar = pd.merge(agg_annovar, annovar_subset, on=["Gene", "POS", "REF", "ALT"])
del grouped_annovar_subset
del annovar_subset
agg_annovar[2000:2050]
Gene | POS | REF | ALT | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|
2000 | ENSG00000001626 | 117119399 | GGT | G | splicing_variant | splicing | NaN |
2001 | ENSG00000001626 | 117119399 | GGTA | G | splicing_variant | splicing | NaN |
2002 | ENSG00000001626 | 117119400 | G | A | splicing_variant | splicing | NaN |
2003 | ENSG00000001626 | 117119400 | G | C | splicing_variant | splicing | NaN |
2004 | ENSG00000001626 | 117119400 | G | GA | splicing_variant | splicing | NaN |
2005 | ENSG00000001626 | 117119400 | G | GAGC | splicing_variant | splicing | NaN |
2006 | ENSG00000001626 | 117119400 | G | GC | splicing_variant | splicing | NaN |
2007 | ENSG00000001626 | 117119400 | G | GG | splicing_variant | splicing | NaN |
2008 | ENSG00000001626 | 117119400 | G | GGCA | splicing_variant | splicing | NaN |
2009 | ENSG00000001626 | 117119400 | G | GT | splicing_variant | splicing | NaN |
2010 | ENSG00000001626 | 117119400 | G | GTA | splicing_variant | splicing | NaN |
2011 | ENSG00000001626 | 117119400 | G | GTG | splicing_variant | splicing | NaN |
2012 | ENSG00000001626 | 117119400 | G | T | splicing_variant | splicing | NaN |
2013 | ENSG00000001626 | 117119400 | GT | G | splicing_variant | splicing | NaN |
2014 | ENSG00000001626 | 117119400 | GTA | G | splicing_variant | splicing | NaN |
2015 | ENSG00000001626 | 117119400 | GTAA | G | splicing_variant | splicing | NaN |
2016 | ENSG00000001626 | 117119401 | T | A | splicing_variant | splicing | NaN |
2017 | ENSG00000001626 | 117119401 | T | C | splicing_variant | splicing | NaN |
2018 | ENSG00000001626 | 117119401 | T | G | splicing_variant | splicing | NaN |
2019 | ENSG00000001626 | 117119401 | T | TA | splicing_variant | splicing | NaN |
2020 | ENSG00000001626 | 117119401 | T | TAT | splicing_variant | splicing | NaN |
2021 | ENSG00000001626 | 117119401 | T | TC | splicing_variant | splicing | NaN |
2022 | ENSG00000001626 | 117119401 | T | TCGT | splicing_variant | splicing | NaN |
2023 | ENSG00000001626 | 117119401 | T | TG | splicing_variant | splicing | NaN |
2024 | ENSG00000001626 | 117119401 | T | TGCT | splicing_variant | splicing | NaN |
2025 | ENSG00000001626 | 117119401 | T | TGT | splicing_variant | splicing | NaN |
2026 | ENSG00000001626 | 117119401 | T | TT | splicing_variant | splicing | NaN |
2027 | ENSG00000001626 | 117119401 | TA | T | intron_variant | intronic | NaN |
2028 | ENSG00000001626 | 117119401 | TAA | T | intron_variant | intronic | NaN |
2029 | ENSG00000001626 | 117119401 | TAAA | T | intron_variant | intronic | NaN |
2030 | ENSG00000001626 | 117119402 | A | AA | intron_variant | intronic | NaN |
2031 | ENSG00000001626 | 117119402 | A | AAG | intron_variant | intronic | NaN |
2032 | ENSG00000001626 | 117119402 | A | AAGT | intron_variant | intronic | NaN |
2033 | ENSG00000001626 | 117119402 | A | AC | intron_variant | intronic | NaN |
2034 | ENSG00000001626 | 117119402 | A | ACAG | intron_variant | intronic | NaN |
2035 | ENSG00000001626 | 117119402 | A | AG | intron_variant | intronic | NaN |
2036 | ENSG00000001626 | 117119402 | A | AGT | intron_variant | intronic | NaN |
2037 | ENSG00000001626 | 117119402 | A | AT | intron_variant | intronic | NaN |
2038 | ENSG00000001626 | 117119402 | A | C | intron_variant | intronic | NaN |
2039 | ENSG00000001626 | 117119402 | A | G | intron_variant | intronic | NaN |
2040 | ENSG00000001626 | 117119402 | A | T | intron_variant | intronic | NaN |
2041 | ENSG00000001626 | 117119402 | AA | A | intron_variant | intronic | NaN |
2042 | ENSG00000001626 | 117119402 | AAA | A | intron_variant | intronic | NaN |
2043 | ENSG00000001626 | 117119402 | AAAT | A | intron_variant | intronic | NaN |
2044 | ENSG00000001626 | 117119403 | A | AA | intron_variant | intronic | NaN |
2045 | ENSG00000001626 | 117119403 | A | AC | intron_variant | intronic | NaN |
2046 | ENSG00000001626 | 117119403 | A | ACAG | intron_variant | intronic | NaN |
2047 | ENSG00000001626 | 117119403 | A | ACT | intron_variant | intronic | NaN |
2048 | ENSG00000001626 | 117119403 | A | ACTG | intron_variant | intronic | NaN |
2049 | ENSG00000001626 | 117119403 | A | AG | intron_variant | intronic | NaN |
50 rows × 7 columns
vc_snpeff = agg_snpeff.groupby(["normalized_so_snpeff"]).size()
vc_snpeff.name = "SNPeff"
vc_vep = agg_vep.groupby(["normalized_so_vep"]).size()
vc_vep.name = "VEP"
vc_annovar = agg_annovar.groupby(["normalized_so_annovar"]).size()
vc_annovar.name = "Annovar"
vc_df = pd.DataFrame([vc_snpeff, vc_vep, vc_annovar])
vc_df.transpose().plot(kind="barh", fontsize=13, figsize=(16,8))
<matplotlib.axes.AxesSubplot at 0x11ff2cb90>
master_df = pd.merge(agg_snpeff, agg_vep, how="outer", on=["Gene", "POS", "REF", "ALT"])
master_df = pd.merge(master_df, agg_annovar, how="outer", on=["Gene", "POS", "REF", "ALT"])
We just want to find the ratio of rows where all three algorithms report the same effect vs the total number of rows.
Unfortunately, in the case of coding variants (the second calculation) we can't use the total number of rows in the master table for the denominator. Instead, we use the total number of unique rows where at least one algorithm uses the one of the effect in the list of coding effect.
#Number of matching
all_count = 0
for effect in precedence_dict.keys():
all_count += master_df[(master_df["normalized_so_vep"] == effect) &
(master_df["normalized_so_snpeff"] == effect) &
(master_df["normalized_so_annovar"] == effect)].count()
num_matching = all_count["POS"]
num_total = master_df.count()["POS"]
print num_matching
print num_total
print "Percent matching: " + str(100.0*num_matching/num_total)
all_count = 0
total_count = 0
#list of coding level effects
effects = [eff for eff, priority in precedence_dict.iteritems() if priority < 8]
total_count = master_df[master_df["normalized_so_vep"].isin(effects) |
master_df["normalized_so_snpeff"].isin(effects) |
master_df["normalized_so_annovar"].isin(effects)].count()
for effect in effects:
all_count += master_df[(master_df["normalized_so_vep"] == effect) &
(master_df["normalized_so_snpeff"] == effect) &
(master_df["normalized_so_annovar"] == effect)].count()
num_matching = all_count["POS"]
num_total = total_count["POS"]
print num_matching
print num_total
print "Percent matching: " + str(100.0*num_matching/num_total)
140859 287071 Percent matching: 49.0676522533 61688 66418 Percent matching: 92.8784365684
for effect in master_df["normalized_so_snpeff"].unique():
vep_effect = master_df[master_df["normalized_so_vep"] == effect]
annovar_effect = master_df[master_df["normalized_so_annovar"] == effect]
snpeff_effect = master_df[master_df["normalized_so_snpeff"] == effect]
fig = plt.figure(figsize=(10,10), dpi=300)
fig.suptitle(effect, fontsize=14, fontweight='bold')
v = venn3_unweighted([set(vep_effect.index.values), set(snpeff_effect.index.values), set(annovar_effect.index.values)], set_labels=("VEP", "SNPeff", "Annovar"))
plt.plot(fontsize=24)
sampletables = '<h1>Other algo\'s agree, but...</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
sampletables += "<h2> Annovar doesn't match for <em>" + str(effect) + "</em></h2>"
query = master_df.loc[(master_df["normalized_so_annovar"]!=effect) &
(master_df["normalized_so_snpeff"]==effect) &
(master_df["normalized_so_vep"]==effect)]
num_rows = query.count()[0]
if num_rows > 0:
sampletables += query.head(5).to_html()
sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1403 | ENSG00000001626 | 117105737 | C | A | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
1404 | ENSG00000001626 | 117105737 | C | CA | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
1405 | ENSG00000001626 | 117105737 | C | CAG | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
1406 | ENSG00000001626 | 117105737 | C | CC | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
1407 | ENSG00000001626 | 117105737 | C | CCTG | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN |
14148 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2814 | ENSG00000001626 | 117105837 | TT | T | . | EXON | CFTR | ENST00000546407 | ignored | . | non_coding_exon_variant | ENST00000546407.1:n.1delT | ignored | NaN | NaN | NaN | |
2815 | ENSG00000001626 | 117105837 | TTG | T | . | EXON | CFTR | ENST00000546407 | ignored | . | non_coding_exon_variant | ENST00000546407.1:n.1_2delTG | ignored | NaN | NaN | NaN | |
2816 | ENSG00000001626 | 117105837 | TTGA | T | . | EXON | CFTR | ENST00000546407 | ignored | . | non_coding_exon_variant | ENST00000546407.1:n.1_3delTGA | ignored | NaN | NaN | NaN | |
2817 | ENSG00000001626 | 117105838 | T | A | . | EXON | CFTR | ENST00000546407 | ignored | . | non_coding_exon_variant | ENST00000546407.1:n.1T>A | ignored | NaN | NaN | NaN | |
2818 | ENSG00000001626 | 117105838 | T | C | . | EXON | CFTR | ENST00000546407 | ignored | . | non_coding_exon_variant | ENST00000546407.1:n.1T>C | ignored | NaN | NaN | NaN |
661 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3460 | ENSG00000001626 | 117105883 | AAGG | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_donor_variant | ENST00000546407.1:n.47_48+1delAGG | splicing_variant | NaN | NaN | NaN | |
3473 | ENSG00000001626 | 117105884 | AGG | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_donor_variant | ENST00000546407.1:n.48_48+1delGG | splicing_variant | NaN | NaN | NaN | |
3474 | ENSG00000001626 | 117105884 | AGGT | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_donor_variant | ENST00000546407.1:n.48_48+2delGGT | splicing_variant | NaN | NaN | NaN | |
3486 | ENSG00000001626 | 117105885 | GG | G | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_donor_variant | ENST00000546407.1:n.48+1delG | splicing_variant | NaN | NaN | NaN | |
3487 | ENSG00000001626 | 117105885 | GGT | G | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_donor_variant | ENST00000546407.1:n.48+1_48+2delGT | splicing_variant | NaN | NaN | NaN |
486 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3506 | ENSG00000001626 | 117105887 | T | TA | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+A | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insA | intron_variant | NaN | NaN | NaN |
3507 | ENSG00000001626 | 117105887 | T | TAC | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+AC | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insAC | intron_variant | NaN | NaN | NaN |
3508 | ENSG00000001626 | 117105887 | T | TATC | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+ATC | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insATC | intron_variant | NaN | NaN | NaN |
3509 | ENSG00000001626 | 117105887 | T | TC | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+C | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insC | intron_variant | NaN | NaN | NaN |
3510 | ENSG00000001626 | 117105887 | T | TCTA | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+CTA | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insCTA | intron_variant | NaN | NaN | NaN |
66208 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
12941 | ENSG00000001626 | 117119515 | G | GA | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insA | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12942 | ENSG00000001626 | 117119515 | G | GAT | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insAT | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12943 | ENSG00000001626 | 117119515 | G | GC | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insC | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12944 | ENSG00000001626 | 117119515 | G | GCAT | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insCAT | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12945 | ENSG00000001626 | 117119515 | G | GCGA | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insCGA | 5_prime_UTR_variant | splicing_variant | splicing | NaN |
8 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
24393 | ENSG00000001626 | 117144306 | G | GA | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+A | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Ser18ArgfsTer27ENST0000042... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->A,ENST0000000308... |
24394 | ENSG00000001626 | 117144306 | G | GAG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+AG | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Ser18ArgfsTer8ENST00000426... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->AG,ENST000000030... |
24395 | ENSG00000001626 | 117144306 | G | GAT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+AT | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Ser18ArgfsTer8ENST00000426... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->AT,ENST000000030... |
24396 | ENSG00000001626 | 117144306 | G | GC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+C | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Trp19LeufsTer26ENST0000042... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->C,ENST0000000308... |
24399 | ENSG00000001626 | 117144306 | G | GG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+G | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Ser18ArgfsTer27ENST0000042... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->G,ENST0000000308... |
1337 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20862 | ENSG00000001626 | 117120148 | CATG | C | . | CODON_DELETION | CFTR | ENST00000454343 | p.Met1X/c.1*>-ATG | inframe_variant | . | inframe_deletion | ENSP00000389119.1:p.Met1?ENST00000426809.1:c.1... | inframe_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... |
24397 | ENSG00000001626 | 117144306 | G | GCGA | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Ser18X/c.54*>+CGA | inframe_variant | . | inframe_insertion | ENSP00000389119.1:p.Ser18_Trp19insAspENST00000... | inframe_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->CGA,ENST00000003... |
24398 | ENSG00000001626 | 117144306 | G | GCTA | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Ser18X/c.54*>+CTA | inframe_variant | . | inframe_insertion | ENSP00000389119.1:p.Ser18_Trp19insTyrENST00000... | inframe_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->CTA,ENST00000003... |
28750 | ENSG00000001626 | 117149087 | G | GCGA | . | CODON_CHANGE_PLUS_CODON_INSERTION | CFTR | ENST00000454343 | p.Arg55X/c.165*>+CGA | inframe_variant | . | inframe_insertion | ENSP00000389119.1:p.Arg55delinsSerGluENST00000... | inframe_variant | splicing_variant | splicing | ENST00000454343:exon3:c.165-1->CGA,ENST0000000... |
28754 | ENSG00000001626 | 117149087 | G | GTAC | . | CODON_CHANGE_PLUS_CODON_INSERTION | CFTR | ENST00000454343 | p.Arg55X/c.165*>+TAC | inframe_variant | . | inframe_insertion | ENSP00000389119.1:p.Arg55delinsSerThrENST00000... | inframe_variant | splicing_variant | splicing | ENST00000454343:exon3:c.165-1->TAC,ENST0000000... |
422 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136292 | ENSG00000001626 | 117267828 | A | G | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Gly/c.544A>G | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183GlyENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
136297 | ENSG00000001626 | 117267829 | G | A | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Lys/c.545G>A | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183LysENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
136298 | ENSG00000001626 | 117267829 | G | C | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Thr/c.545G>C | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183ThrENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
136307 | ENSG00000001626 | 117267829 | G | T | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Ile/c.545G>T | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183IleENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
136319 | ENSG00000001626 | 117267830 | A | C | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Ser/c.546A>C | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183SerENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
273 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
21380 | ENSG00000001626 | 117120185 | TCCA | T | . | STOP_GAINED | CFTR | ENST00000454343 | p.X13*/c.38*>-CCA | stop_gained | . | stop_gained | ENSP00000389119.1:p.Ser13_Lys14delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon1:c.38_40d... |
24810 | ENSG00000001626 | 117144335 | TACA | T | . | STOP_GAINED | CFTR | ENST00000454343 | p.X28*/c.83*>-ACA | stop_gained | . | stop_gained | ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon2:c.83_85d... |
24824 | ENSG00000001626 | 117144336 | ACAG | A | . | STOP_GAINED | CFTR | ENST00000454343 | p.X28*/c.84*>-CAG | stop_gained | . | stop_gained | ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon2:c.84_86d... |
25230 | ENSG00000001626 | 117144365 | TACC | T | . | STOP_GAINED | CFTR | ENST00000454343 | p.X38*/c.113*>-ACC | stop_gained | . | stop_gained | ENSP00000389119.1:p.Tyr38_Gln39delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon2:c.113_11... |
25244 | ENSG00000001626 | 117144366 | ACCA | A | . | STOP_GAINED | CFTR | ENST00000454343 | p.X38*/c.114*>-CCA | stop_gained | . | stop_gained | ENSP00000389119.1:p.Tyr38_Gln39delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon2:c.114_11... |
145 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136269 | ENSG00000001626 | 117267827 | G | A | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Val181Val/c.543G>A | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.545G>A(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
136270 | ENSG00000001626 | 117267827 | G | C | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Val181Val/c.543G>C | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.545G>C(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
136279 | ENSG00000001626 | 117267827 | G | T | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Val181Val/c.543G>T | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.545G>T(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
136291 | ENSG00000001626 | 117267828 | A | C | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Arg/c.544A>C | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.546A>C(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
136320 | ENSG00000001626 | 117267830 | A | G | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Arg/c.546A>G | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.548A>G(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
85 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
25976 | ENSG00000001626 | 117144419 | T | TA | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->A,ENST000000030... | ||
25977 | ENSG00000001626 | 117144419 | T | TATG | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->ATG,ENST0000000... | ||
25978 | ENSG00000001626 | 117144419 | T | TC | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->C,ENST000000030... | ||
25979 | ENSG00000001626 | 117144419 | T | TCGA | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->CGA,ENST0000000... | ||
25980 | ENSG00000001626 | 117144419 | T | TG | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->G,ENST000000030... |
41993 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136604 | ENSG00000001626 | 117267850 | GTTA | G | . | STOP_LOST | CFTR | ENST00000468795 | p.X189Trpext*?/c.567*>-TTA | stop_lost | . | stop_lost | ENSP00000419254.1:p.CysTer190TrpENST0000046879... | stop_lost | ignored | unknown | UNKNOWN |
136618 | ENSG00000001626 | 117267851 | TTAG | T | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>-TAG | stop_lost | . | stop_lost | ENSP00000419254.1:p.Ter191delextTer5ENST000004... | stop_lost | ignored | unknown | UNKNOWN |
136619 | ENSG00000001626 | 117267852 | T | A | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Lysext*?/c.568T>A | stop_lost | . | stop_lost | ENSP00000419254.1:p.Ter191LysextTer6ENST000004... | stop_lost | ignored | unknown | UNKNOWN |
136620 | ENSG00000001626 | 117267852 | T | C | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Glnext*?/c.568T>C | stop_lost | . | stop_lost | ENSP00000419254.1:p.Ter191GlnextTer6ENST000004... | stop_lost | ignored | unknown | UNKNOWN |
136621 | ENSG00000001626 | 117267852 | T | G | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Gluext*?/c.568T>G | stop_lost | . | stop_lost | ENSP00000419254.1:p.Ter191GluextTer6ENST000004... | stop_lost | ignored | unknown | UNKNOWN |
42 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136649 | ENSG00000001626 | 117267854 | G | GA | . | UTR_3_PRIME | CFTR | ENST00000468795 | 3_prime_UTR_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_572insA | 3_prime_UTR_variant | ignored | unknown | UNKNOWN | |
136650 | ENSG00000001626 | 117267854 | G | GACG | . | UTR_3_PRIME | CFTR | ENST00000468795 | 3_prime_UTR_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_572insACG | 3_prime_UTR_variant | ignored | unknown | UNKNOWN | |
136651 | ENSG00000001626 | 117267854 | G | GC | . | UTR_3_PRIME | CFTR | ENST00000468795 | 3_prime_UTR_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_572insC | 3_prime_UTR_variant | ignored | unknown | UNKNOWN | |
136652 | ENSG00000001626 | 117267854 | G | GCG | . | UTR_3_PRIME | CFTR | ENST00000468795 | 3_prime_UTR_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_572insCG | 3_prime_UTR_variant | ignored | unknown | UNKNOWN | |
136653 | ENSG00000001626 | 117267854 | G | GG | . | UTR_3_PRIME | CFTR | ENST00000468795 | 3_prime_UTR_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.572dupG | 3_prime_UTR_variant | ignored | unknown | UNKNOWN |
1019 rows
0 rows
sampletables = '<h1>At least 1 column doesn\'t match</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
sampletables += "<h2> Annovar doesn't match for <em>" + str(effect) + "</em></h2>"
query = master_df.loc[(master_df["normalized_so_annovar"]!=effect) &
((master_df["normalized_so_snpeff"]==effect) | (master_df["normalized_so_vep"]==effect))]
num_rows = query.count()[0]
if num_rows > 0:
sampletables += query.head(5).to_html()
sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 117105737 | C | A | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||||
1 | 117105737 | C | CA | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||||
2 | 117105737 | C | CAG | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||||
3 | 117105737 | C | CC | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||||
4 | 117105737 | C | CCTG | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1403 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1403 | ENSG00000001626 | 117105737 | C | A | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
1404 | ENSG00000001626 | 117105737 | C | CA | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
1405 | ENSG00000001626 | 117105737 | C | CAG | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
1406 | ENSG00000001626 | 117105737 | C | CC | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
1407 | ENSG00000001626 | 117105737 | C | CCTG | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN |
14170 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2788 | ENSG00000001626 | 117105835 | TTTT | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
2801 | ENSG00000001626 | 117105836 | TTT | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
2802 | ENSG00000001626 | 117105836 | TTTG | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
2806 | ENSG00000001626 | 117105837 | T | TA | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2807 | ENSG00000001626 | 117105837 | T | TAC | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN |
680 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3460 | ENSG00000001626 | 117105883 | AAGG | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_donor_variant | ENST00000546407.1:n.47_48+1delAGG | splicing_variant | NaN | NaN | NaN | |
3473 | ENSG00000001626 | 117105884 | AGG | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_donor_variant | ENST00000546407.1:n.48_48+1delGG | splicing_variant | NaN | NaN | NaN | |
3474 | ENSG00000001626 | 117105884 | AGGT | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_donor_variant | ENST00000546407.1:n.48_48+2delGGT | splicing_variant | NaN | NaN | NaN | |
3477 | ENSG00000001626 | 117105885 | G | GA | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48_48+1insA | ignored | NaN | NaN | NaN | |
3478 | ENSG00000001626 | 117105885 | G | GC | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48_48+1insC | ignored | NaN | NaN | NaN |
1038 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3506 | ENSG00000001626 | 117105887 | T | TA | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+A | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insA | intron_variant | NaN | NaN | NaN |
3507 | ENSG00000001626 | 117105887 | T | TAC | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+AC | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insAC | intron_variant | NaN | NaN | NaN |
3508 | ENSG00000001626 | 117105887 | T | TATC | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+ATC | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insATC | intron_variant | NaN | NaN | NaN |
3509 | ENSG00000001626 | 117105887 | T | TC | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+C | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insC | intron_variant | NaN | NaN | NaN |
3510 | ENSG00000001626 | 117105887 | T | TCTA | . | INTRON | CFTR | ENST00000546407 | n.48+3*>+CTA | intron_variant | . | intron_variant | ENST00000546407.1:n.48+2_48+3insCTA | intron_variant | NaN | NaN | NaN |
66288 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10730 | ENSG00000001626 | 117119357 | T | TA | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insA | intron_variant | upstream_gene_variant | upstream | NaN | |
10731 | ENSG00000001626 | 117119357 | T | TAG | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insAG | intron_variant | upstream_gene_variant | upstream | NaN | |
10732 | ENSG00000001626 | 117119357 | T | TC | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insC | intron_variant | upstream_gene_variant | upstream | NaN | |
10733 | ENSG00000001626 | 117119357 | T | TG | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insG | intron_variant | upstream_gene_variant | upstream | NaN | |
10734 | ENSG00000001626 | 117119357 | T | TGC | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insGC | intron_variant | upstream_gene_variant | upstream | NaN |
27 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20850 | ENSG00000001626 | 117120148 | C | CA | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+A | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insA | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20852 | ENSG00000001626 | 117120148 | C | CC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+C | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1dupC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20853 | ENSG00000001626 | 117120148 | C | CG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+G | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insG | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20854 | ENSG00000001626 | 117120148 | C | CGC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+GC | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insGC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20855 | ENSG00000001626 | 117120148 | C | CT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+T | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insT | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
1823 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20851 | ENSG00000001626 | 117120148 | C | CACT | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Met1X/c.1*>+ACT | inframe_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insACT | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20857 | ENSG00000001626 | 117120148 | C | CTGC | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Met1X/c.1*>+TGC | inframe_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insTGC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20862 | ENSG00000001626 | 117120148 | CATG | C | . | CODON_DELETION | CFTR | ENST00000454343 | p.Met1X/c.1*>-ATG | inframe_variant | . | inframe_deletion | ENSP00000389119.1:p.Met1?ENST00000426809.1:c.1... | inframe_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... |
24397 | ENSG00000001626 | 117144306 | G | GCGA | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Ser18X/c.54*>+CGA | inframe_variant | . | inframe_insertion | ENSP00000389119.1:p.Ser18_Trp19insAspENST00000... | inframe_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->CGA,ENST00000003... |
24398 | ENSG00000001626 | 117144306 | G | GCTA | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Ser18X/c.54*>+CTA | inframe_variant | . | inframe_insertion | ENSP00000389119.1:p.Ser18_Trp19insTyrENST00000... | inframe_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->CTA,ENST00000003... |
424 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136292 | ENSG00000001626 | 117267828 | A | G | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Gly/c.544A>G | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183GlyENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
136297 | ENSG00000001626 | 117267829 | G | A | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Lys/c.545G>A | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183LysENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
136298 | ENSG00000001626 | 117267829 | G | C | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Thr/c.545G>C | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183ThrENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
136307 | ENSG00000001626 | 117267829 | G | T | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Ile/c.545G>T | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183IleENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
136319 | ENSG00000001626 | 117267830 | A | C | . | NON_SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Ser/c.546A>C | nonsynonymous_variant | . | missense_variant | ENSP00000419254.1:p.Arg183SerENST00000468795.1... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
277 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20939 | ENSG00000001626 | 117120154 | G | GT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Arg3*/c.7*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.6_7ins... |
20940 | ENSG00000001626 | 117120154 | G | GTA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Arg3*/c.7*>+TA | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.6_7ins... |
21109 | ENSG00000001626 | 117120166 | G | GT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Glu7*/c.19*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Glu7TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.18_19i... |
21148 | ENSG00000001626 | 117120169 | A | AT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Lys8*/c.22*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.21_22i... |
21149 | ENSG00000001626 | 117120169 | A | ATA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Lys8*/c.22*>+TA | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.21_22i... |
797 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136269 | ENSG00000001626 | 117267827 | G | A | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Val181Val/c.543G>A | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.545G>A(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
136270 | ENSG00000001626 | 117267827 | G | C | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Val181Val/c.543G>C | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.545G>C(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
136279 | ENSG00000001626 | 117267827 | G | T | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Val181Val/c.543G>T | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.545G>T(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
136291 | ENSG00000001626 | 117267828 | A | C | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Arg/c.544A>C | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.546A>C(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
136320 | ENSG00000001626 | 117267830 | A | G | . | SYNONYMOUS_CODING | CFTR | ENST00000468795 | p.Arg182Arg/c.546A>G | synonymous_variant | . | synonymous_variant | ENST00000468795.1:c.548A>G(p.%3D)ENST000004687... | synonymous_variant | ignored | unknown | UNKNOWN |
88 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
25976 | ENSG00000001626 | 117144419 | T | TA | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->A,ENST000000030... | ||
25977 | ENSG00000001626 | 117144419 | T | TATG | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->ATG,ENST0000000... | ||
25978 | ENSG00000001626 | 117144419 | T | TC | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->C,ENST000000030... | ||
25979 | ENSG00000001626 | 117144419 | T | TCGA | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->CGA,ENST0000000... | ||
25980 | ENSG00000001626 | 117144419 | T | TG | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->G,ENST000000030... |
42041 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136603 | ENSG00000001626 | 117267850 | GTT | G | . | FRAME_SHIFT | CFTR | ENST00000468795 | p.X189X/c.567*>-TT | frameshift_variant | . | stop_lost | ENSP00000419254.1:p.Cys190TerENST00000468795.1... | stop_lost | ignored | unknown | UNKNOWN |
136604 | ENSG00000001626 | 117267850 | GTTA | G | . | STOP_LOST | CFTR | ENST00000468795 | p.X189Trpext*?/c.567*>-TTA | stop_lost | . | stop_lost | ENSP00000419254.1:p.CysTer190TrpENST0000046879... | stop_lost | ignored | unknown | UNKNOWN |
136608 | ENSG00000001626 | 117267851 | T | TA | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>+A | stop_lost | . | frameshift_variant | ENSP00000419254.1:p.Ter191IlefsTer6ENST0000046... | frameshift_variant | ignored | unknown | UNKNOWN |
136609 | ENSG00000001626 | 117267851 | T | TC | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>+C | stop_lost | . | frameshift_variant | ENSP00000419254.1:p.Ter191LeufsTer6ENST0000046... | frameshift_variant | ignored | unknown | UNKNOWN |
136610 | ENSG00000001626 | 117267851 | T | TCT | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>+CT | stop_lost | . | frameshift_variant | ENSP00000419254.1:p.Ter191LeufsTer11ENST000004... | frameshift_variant | ignored | unknown | UNKNOWN |
98 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136632 | ENSG00000001626 | 117267852 | TAGA | T | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.569*>-AGA | stop_lost | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_571delAGA | 3_prime_UTR_variant | ignored | unknown | UNKNOWN |
136645 | ENSG00000001626 | 117267853 | AGA | A | . | FRAME_SHIFT | CFTR | ENST00000468795 | p.X190X/c.570*>-GA | frameshift_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_572delGA | 3_prime_UTR_variant | ignored | unknown | UNKNOWN |
136646 | ENSG00000001626 | 117267853 | AGAC | A | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.570*>-GAC | stop_lost | . | 3_prime_UTR_variant | ENST00000468795.1:c.*2_572delGAC | 3_prime_UTR_variant | ignored | unknown | UNKNOWN |
136649 | ENSG00000001626 | 117267854 | G | GA | . | UTR_3_PRIME | CFTR | ENST00000468795 | 3_prime_UTR_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_572insA | 3_prime_UTR_variant | ignored | unknown | UNKNOWN | |
136650 | ENSG00000001626 | 117267854 | G | GACG | . | UTR_3_PRIME | CFTR | ENST00000468795 | 3_prime_UTR_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_572insACG | 3_prime_UTR_variant | ignored | unknown | UNKNOWN |
1036 rows
0 rows
sampletables =''
for effect in master_df["normalized_so_snpeff"].unique():
sampletables += "<h2> Snpeff doesn't match for <em>" + str(effect) + "</em></h2>"
query = master_df.loc[(master_df["normalized_so_annovar"]==effect) &
(master_df["normalized_so_snpeff"]!=effect) &
(master_df["normalized_so_vep"]==effect)]
num_rows = query.count()[0]
if num_rows > 0:
sampletables += query.tail(5).to_html()
sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
0 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
259692 | ENSG00000232661 | 117204728 | GAAC | G | . | UPSTREAM | AC000111.3 | ENST00000441019 | upstream_gene_variant | . | non_coding_exon_variant | ignored | ignored | ncRNA_exonic | NaN | ||
259705 | ENSG00000232661 | 117204729 | AAC | A | . | UPSTREAM | AC000111.3 | ENST00000441019 | upstream_gene_variant | . | non_coding_exon_variant | ignored | ignored | ncRNA_exonic | NaN | ||
259706 | ENSG00000232661 | 117204729 | AACT | A | . | UPSTREAM | AC000111.3 | ENST00000441019 | upstream_gene_variant | . | non_coding_exon_variant | ignored | ignored | ncRNA_exonic | NaN |
3 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
187471 | ENSG00000001626 | 117355809 | C | CCG | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insCG | intron_variant | intron_variant | intronic | NaN | |
187472 | ENSG00000001626 | 117355809 | C | CG | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insG | intron_variant | intron_variant | intronic | NaN | |
187473 | ENSG00000001626 | 117355809 | C | CGCA | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insGCA | intron_variant | intron_variant | intronic | NaN | |
187474 | ENSG00000001626 | 117355809 | C | CGT | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insGT | intron_variant | intron_variant | intronic | NaN | |
187475 | ENSG00000001626 | 117355809 | C | CT | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insT | intron_variant | intron_variant | intronic | NaN |
208 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20853 | ENSG00000001626 | 117120148 | C | CG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+G | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insG | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20854 | ENSG00000001626 | 117120148 | C | CGC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+GC | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insGC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20855 | ENSG00000001626 | 117120148 | C | CT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+T | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insT | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20856 | ENSG00000001626 | 117120148 | C | CTG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+TG | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insTG | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20857 | ENSG00000001626 | 117120148 | C | CTGC | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Met1X/c.1*>+TGC | inframe_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insTGC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
24 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
160354 | ENSG00000001626 | 117307159 | T | TC | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+C | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420LeufsTer74ENST00000... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
160356 | ENSG00000001626 | 117307159 | T | TG | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+G | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420ValfsTer74ENST00000... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
160357 | ENSG00000001626 | 117307159 | T | TGA | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+GA | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420AspfsTer8ENST000004... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
160358 | ENSG00000001626 | 117307159 | T | TT | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+T | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420LeufsTer74ENST00000... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257d... |
160359 | ENSG00000001626 | 117307159 | T | TTC | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+TC | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420SerfsTer8ENST000004... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
776 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
150498 | ENSG00000001626 | 117304914 | T | TCGA | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | inframe_insertion | ENSP00000389119.1:p.Val1349_Thr1350insGluENST0... | inframe_variant | inframe_variant | nonframeshift insertion | ENSG00000001626:ENST00000426809:exon24:c.4046_... | |
154782 | ENSG00000001626 | 117305618 | G | GCGT | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | inframe_insertion | ENSP00000389119.1:p.Leu1384_Val1385insArgENST0... | inframe_variant | inframe_variant | nonframeshift insertion | ENSG00000001626:ENST00000426809:exon25:c.4152_... | |
154785 | ENSG00000001626 | 117305618 | G | GGTA | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | inframe_insertion | ENSP00000389119.1:p.Leu1384_Val1385insValENST0... | inframe_variant | inframe_variant | nonframeshift insertion | ENSG00000001626:ENST00000426809:exon25:c.4152_... | |
159858 | ENSG00000001626 | 117307123 | AGAG | A | . | FRAME_SHIFT | CFTR | ENST00000426809 | p.X1439X/c.4315*>-GAG | frameshift_variant | . | inframe_deletion | ENSP00000403677.1:p.Glu1408delENST00000454343.... | inframe_variant | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000454343:exon26:c.4222_... |
159872 | ENSG00000001626 | 117307124 | GAGG | G | . | FRAME_SHIFT | CFTR | ENST00000426809 | p.X1439X/c.4316*>-AGG | frameshift_variant | . | inframe_deletion | ENSP00000403677.1:p.Glu1409delENST00000454343.... | inframe_variant | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000454343:exon26:c.4223_... |
53 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
25948 | ENSG00000001626 | 117144417 | G | GATG | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | stop_gained | ENSP00000389119.1:p.Arg55_Glu56insTerENST00000... | stop_gained | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.164_16... | |
49816 | ENSG00000001626 | 117176727 | A | AATG | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | stop_gained | ENSP00000389119.1:p.Gln260_Thr261insTerENST000... | stop_gained | stop_gained | stopgain SNV | ENSG00000001626:ENST00000426809:exon6:c.779_78... | |
150499 | ENSG00000001626 | 117304914 | T | TCTA | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | stop_gained | ENSP00000389119.1:p.Val1349_Thr1350insTerENST0... | stop_gained | stop_gained | stopgain SNV | ENSG00000001626:ENST00000426809:exon24:c.4046_... |
3 rows
0 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
160360 | ENSG00000001626 | 117307159 | TT | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X1420X/c.4258*>-T | frameshift_variant | . | stop_lost | ENSP00000403677.1:p.Ter1420ArgENST00000454343.... | stop_lost | stop_lost | stoploss SNV | ENSG00000001626:ENST00000454343:exon26:c.4258d... |
1 rows
0 rows
0 rows
sampletables = '<h1>Other algo\'s agree, but...</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
sampletables += "<h2> Snpeff doesn't match for <em>" + str(effect) + "</em></h2>"
query = master_df.loc[(master_df["normalized_so_annovar"]==effect) &
(master_df["normalized_so_snpeff"]!=effect) &
(master_df["normalized_so_vep"]==effect)]
num_rows = query.count()[0]
if num_rows > 0:
sampletables += query.tail(5).to_html()
sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
0 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
259692 | ENSG00000232661 | 117204728 | GAAC | G | . | UPSTREAM | AC000111.3 | ENST00000441019 | upstream_gene_variant | . | non_coding_exon_variant | ignored | ignored | ncRNA_exonic | NaN | ||
259705 | ENSG00000232661 | 117204729 | AAC | A | . | UPSTREAM | AC000111.3 | ENST00000441019 | upstream_gene_variant | . | non_coding_exon_variant | ignored | ignored | ncRNA_exonic | NaN | ||
259706 | ENSG00000232661 | 117204729 | AACT | A | . | UPSTREAM | AC000111.3 | ENST00000441019 | upstream_gene_variant | . | non_coding_exon_variant | ignored | ignored | ncRNA_exonic | NaN |
3 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
187471 | ENSG00000001626 | 117355809 | C | CCG | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insCG | intron_variant | intron_variant | intronic | NaN | |
187472 | ENSG00000001626 | 117355809 | C | CG | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insG | intron_variant | intron_variant | intronic | NaN | |
187473 | ENSG00000001626 | 117355809 | C | CGCA | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insGCA | intron_variant | intron_variant | intronic | NaN | |
187474 | ENSG00000001626 | 117355809 | C | CGT | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insGT | intron_variant | intron_variant | intronic | NaN | |
187475 | ENSG00000001626 | 117355809 | C | CT | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000600166 | splicing_variant | . | intron_variant | ENST00000610149.1:n.450-3_450-2insT | intron_variant | intron_variant | intronic | NaN |
208 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20853 | ENSG00000001626 | 117120148 | C | CG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+G | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insG | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20854 | ENSG00000001626 | 117120148 | C | CGC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+GC | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insGC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20855 | ENSG00000001626 | 117120148 | C | CT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+T | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insT | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20856 | ENSG00000001626 | 117120148 | C | CTG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+TG | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insTG | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20857 | ENSG00000001626 | 117120148 | C | CTGC | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Met1X/c.1*>+TGC | inframe_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insTGC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
24 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
160354 | ENSG00000001626 | 117307159 | T | TC | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+C | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420LeufsTer74ENST00000... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
160356 | ENSG00000001626 | 117307159 | T | TG | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+G | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420ValfsTer74ENST00000... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
160357 | ENSG00000001626 | 117307159 | T | TGA | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+GA | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420AspfsTer8ENST000004... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
160358 | ENSG00000001626 | 117307159 | T | TT | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+T | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420LeufsTer74ENST00000... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257d... |
160359 | ENSG00000001626 | 117307159 | T | TTC | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4258*>+TC | stop_lost | . | frameshift_variant | ENSP00000403677.1:p.Ter1420SerfsTer8ENST000004... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
776 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
150498 | ENSG00000001626 | 117304914 | T | TCGA | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | inframe_insertion | ENSP00000389119.1:p.Val1349_Thr1350insGluENST0... | inframe_variant | inframe_variant | nonframeshift insertion | ENSG00000001626:ENST00000426809:exon24:c.4046_... | |
154782 | ENSG00000001626 | 117305618 | G | GCGT | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | inframe_insertion | ENSP00000389119.1:p.Leu1384_Val1385insArgENST0... | inframe_variant | inframe_variant | nonframeshift insertion | ENSG00000001626:ENST00000426809:exon25:c.4152_... | |
154785 | ENSG00000001626 | 117305618 | G | GGTA | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | inframe_insertion | ENSP00000389119.1:p.Leu1384_Val1385insValENST0... | inframe_variant | inframe_variant | nonframeshift insertion | ENSG00000001626:ENST00000426809:exon25:c.4152_... | |
159858 | ENSG00000001626 | 117307123 | AGAG | A | . | FRAME_SHIFT | CFTR | ENST00000426809 | p.X1439X/c.4315*>-GAG | frameshift_variant | . | inframe_deletion | ENSP00000403677.1:p.Glu1408delENST00000454343.... | inframe_variant | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000454343:exon26:c.4222_... |
159872 | ENSG00000001626 | 117307124 | GAGG | G | . | FRAME_SHIFT | CFTR | ENST00000426809 | p.X1439X/c.4316*>-AGG | frameshift_variant | . | inframe_deletion | ENSP00000403677.1:p.Glu1409delENST00000454343.... | inframe_variant | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000454343:exon26:c.4223_... |
53 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
25948 | ENSG00000001626 | 117144417 | G | GATG | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | stop_gained | ENSP00000389119.1:p.Arg55_Glu56insTerENST00000... | stop_gained | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.164_16... | |
49816 | ENSG00000001626 | 117176727 | A | AATG | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | stop_gained | ENSP00000389119.1:p.Gln260_Thr261insTerENST000... | stop_gained | stop_gained | stopgain SNV | ENSG00000001626:ENST00000426809:exon6:c.779_78... | |
150499 | ENSG00000001626 | 117304914 | T | TCTA | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | stop_gained | ENSP00000389119.1:p.Val1349_Thr1350insTerENST0... | stop_gained | stop_gained | stopgain SNV | ENSG00000001626:ENST00000426809:exon24:c.4046_... |
3 rows
0 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
160360 | ENSG00000001626 | 117307159 | TT | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X1420X/c.4258*>-T | frameshift_variant | . | stop_lost | ENSP00000403677.1:p.Ter1420ArgENST00000454343.... | stop_lost | stop_lost | stoploss SNV | ENSG00000001626:ENST00000454343:exon26:c.4258d... |
1 rows
0 rows
0 rows
sampletables = '<h1>At least 1 column doesn\'t match</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
sampletables += "<h2> Snpeff doesn't match for <em>" + str(effect) + "</em></h2>"
query = num_rows = master_df.loc[(master_df["normalized_so_snpeff"]!=effect) &
((master_df["normalized_so_annovar"]==effect) | (master_df["normalized_so_vep"]==effect))]
num_rows = query.count()[0]
if num_rows > 0:
sampletables += query.head(5).to_html()
sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2806 | ENSG00000001626 | 117105837 | T | TA | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2807 | ENSG00000001626 | 117105837 | T | TAC | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2808 | ENSG00000001626 | 117105837 | T | TC | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2809 | ENSG00000001626 | 117105837 | T | TG | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2810 | ENSG00000001626 | 117105837 | T | TGAT | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN |
1424 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2788 | ENSG00000001626 | 117105835 | TTTT | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
2801 | ENSG00000001626 | 117105836 | TTT | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
2802 | ENSG00000001626 | 117105836 | TTTG | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
3477 | ENSG00000001626 | 117105885 | G | GA | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48_48+1insA | ignored | NaN | NaN | NaN | |
3478 | ENSG00000001626 | 117105885 | G | GC | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48_48+1insC | ignored | NaN | NaN | NaN |
17901 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11346 | ENSG00000001626 | 117119401 | T | TA | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+A | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insA | intron_variant | splicing_variant | splicing | NaN |
11347 | ENSG00000001626 | 117119401 | T | TAT | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+AT | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insAT | intron_variant | splicing_variant | splicing | NaN |
11348 | ENSG00000001626 | 117119401 | T | TC | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+C | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insC | intron_variant | splicing_variant | splicing | NaN |
11349 | ENSG00000001626 | 117119401 | T | TCGT | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+CGT | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insCGT | intron_variant | splicing_variant | splicing | NaN |
11350 | ENSG00000001626 | 117119401 | T | TG | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+G | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insG | intron_variant | splicing_variant | splicing | NaN |
489 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
6250 | ENSG00000001626 | 117115742 | T | TA | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000546407 | splicing_variant | . | intron_variant | ENST00000546407.1:n.49-3_49-2insA | intron_variant | NaN | NaN | NaN | |
6251 | ENSG00000001626 | 117115742 | T | TC | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000546407 | splicing_variant | . | intron_variant | ENST00000546407.1:n.49-3_49-2insC | intron_variant | NaN | NaN | NaN | |
6252 | ENSG00000001626 | 117115742 | T | TCA | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000546407 | splicing_variant | . | intron_variant | ENST00000546407.1:n.49-3_49-2insCA | intron_variant | NaN | NaN | NaN | |
6253 | ENSG00000001626 | 117115742 | T | TCAG | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000546407 | splicing_variant | . | intron_variant | ENST00000546407.1:n.49-3_49-2insCAG | intron_variant | NaN | NaN | NaN | |
6254 | ENSG00000001626 | 117115742 | T | TCAT | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000546407 | splicing_variant | . | intron_variant | ENST00000546407.1:n.49-3_49-2insCAT | intron_variant | NaN | NaN | NaN |
18177 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11300 | ENSG00000001626 | 117119397 | AAGG | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000446805 | splicing_variant | . | splice_donor_variant | ENST00000446805.1:c.-425_-424+1delAGG | splicing_variant | 5_prime_UTR_variant | UTR5 | NaN | |
11313 | ENSG00000001626 | 117119398 | AGG | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000446805 | splicing_variant | . | splice_donor_variant | ENST00000446805.1:c.-424_-424+1delGG | splicing_variant | 5_prime_UTR_variant | UTR5 | NaN | |
11314 | ENSG00000001626 | 117119398 | AGGT | A | . | SPLICE_SITE_DONOR | CFTR | ENST00000446805 | splicing_variant | . | splice_donor_variant | ENST00000446805.1:c.-424_-424+2delGGT | splicing_variant | 5_prime_UTR_variant | UTR5 | NaN | |
11317 | ENSG00000001626 | 117119399 | G | GA | . | SPLICE_SITE_DONOR | CFTR | ENST00000446805 | splicing_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-424_-424+1insA | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN | |
11318 | ENSG00000001626 | 117119399 | G | GAT | . | SPLICE_SITE_DONOR | CFTR | ENST00000446805 | splicing_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-424_-424+1insAT | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
36 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20862 | ENSG00000001626 | 117120148 | CATG | C | . | CODON_DELETION | CFTR | ENST00000454343 | p.Met1X/c.1*>-ATG | inframe_variant | . | inframe_deletion | ENSP00000389119.1:p.Met1?ENST00000426809.1:c.1... | inframe_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... |
20939 | ENSG00000001626 | 117120154 | G | GT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Arg3*/c.7*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.6_7ins... |
20940 | ENSG00000001626 | 117120154 | G | GTA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Arg3*/c.7*>+TA | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.6_7ins... |
21109 | ENSG00000001626 | 117120166 | G | GT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Glu7*/c.19*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Glu7TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.18_19i... |
21148 | ENSG00000001626 | 117120169 | A | AT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Lys8*/c.22*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.21_22i... |
1393 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
21380 | ENSG00000001626 | 117120185 | TCCA | T | . | STOP_GAINED | CFTR | ENST00000454343 | p.X13*/c.38*>-CCA | stop_gained | . | stop_gained | ENSP00000389119.1:p.Ser13_Lys14delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon1:c.38_40d... |
21594 | ENSG00000001626 | 117120201 | G | GACT | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | inframe_insertion | ENSP00000389119.1:p.Ser18delinsArgLeuENST00000... | inframe_variant | inframe_variant | nonframeshift insertion | ENSG00000001626:ENST00000003084:exon1:c.53_54i... | |
21597 | ENSG00000001626 | 117120201 | G | GCAG | . | SPLICE_SITE_DONOR | CFTR | ENST00000454343 | splicing_variant | . | inframe_insertion | ENSP00000389119.1:p.Ser18dupENST00000426809.1:... | inframe_variant | inframe_variant | nonframeshift insertion | ENSG00000001626:ENST00000003084:exon1:c.53_54i... | |
24810 | ENSG00000001626 | 117144335 | TACA | T | . | STOP_GAINED | CFTR | ENST00000454343 | p.X28*/c.83*>-ACA | stop_gained | . | stop_gained | ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon2:c.83_85d... |
24824 | ENSG00000001626 | 117144336 | ACAG | A | . | STOP_GAINED | CFTR | ENST00000454343 | p.X28*/c.84*>-CAG | stop_gained | . | stop_gained | ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon2:c.84_86d... |
175 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136647 | ENSG00000001626 | 117267854 | G | A | . | SYNONYMOUS_STOP | CFTR | ENST00000468795 | p.*190*/c.570G>A | synonymous_variant | . | stop_retained_variant | ENST00000468795.1:c.572G>A(p.%3D)ENST000004687... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
160391 | ENSG00000001626 | 117307162 | G | A | . | SYNONYMOUS_STOP | CFTR | ENST00000454343 | p.*1420*/c.4260G>A | synonymous_variant | . | stop_retained_variant | ENST00000454343.1:c.4260G>A(p.%3D)ENST00000454... | nonsynonymous_variant | synonymous_variant | synonymous SNV | ENSG00000001626:ENST00000454343:exon26:c.G4260... |
188918 | ENSG00000001626 | 117355912 | A | G | . | SYNONYMOUS_STOP | CFTR | ENST00000600166 | p.*156*/c.467A>G | synonymous_variant | . | stop_retained_variant | ENST00000600166.1:c.469A>G(p.%3D)ENST000006001... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
188932 | ENSG00000001626 | 117355913 | A | G | . | SYNONYMOUS_STOP | CFTR | ENST00000600166 | p.*156*/c.468A>G | synonymous_variant | . | stop_retained_variant | ENST00000600166.1:c.470A>G(p.%3D)ENST000006001... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
4 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
24430 | ENSG00000001626 | 117144308 | TG | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X19X/c.56*>-G | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Trp19TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.56delG... |
24444 | ENSG00000001626 | 117144309 | GG | G | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X19X/c.57*>-G | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Trp19TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.57delG... |
24626 | ENSG00000001626 | 117144322 | TT | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X24X/c.70*>-T | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Leu24TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.70delT... |
24640 | ENSG00000001626 | 117144323 | TT | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X24X/c.71*>-T | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Leu24TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.71delT... |
24822 | ENSG00000001626 | 117144336 | AC | A | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X28X/c.84*>-C | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Tyr28TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.84delC... |
214 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
28718 | ENSG00000001626 | 117149085 | C | CA | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000454343 | splicing_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN | ||
28719 | ENSG00000001626 | 117149085 | C | CAC | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000454343 | splicing_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN | ||
28720 | ENSG00000001626 | 117149085 | C | CATG | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000454343 | splicing_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN | ||
28721 | ENSG00000001626 | 117149085 | C | CC | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000454343 | splicing_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN | ||
28722 | ENSG00000001626 | 117149085 | C | CG | . | SPLICE_SITE_ACCEPTOR | CFTR | ENST00000454343 | splicing_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN |
48 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136603 | ENSG00000001626 | 117267850 | GTT | G | . | FRAME_SHIFT | CFTR | ENST00000468795 | p.X189X/c.567*>-TT | frameshift_variant | . | stop_lost | ENSP00000419254.1:p.Cys190TerENST00000468795.1... | stop_lost | ignored | unknown | UNKNOWN |
136616 | ENSG00000001626 | 117267851 | TT | T | . | FRAME_SHIFT | CFTR | ENST00000468795 | p.X190X/c.568*>-T | frameshift_variant | . | stop_lost | ENSP00000419254.1:p.Ter191ArgENST00000468795.1... | stop_lost | ignored | unknown | UNKNOWN |
136617 | ENSG00000001626 | 117267851 | TTA | T | . | FRAME_SHIFT | CFTR | ENST00000468795 | p.X190X/c.568*>-TA | frameshift_variant | . | stop_lost | ENSP00000419254.1:p.Ter191AspENST00000468795.1... | stop_lost | ignored | unknown | UNKNOWN |
136622 | ENSG00000001626 | 117267852 | T | TA | . | FRAME_SHIFT | CFTR | ENST00000468795 | p.*190X/c.569*>+A | frameshift_variant | . | stop_lost | ENSP00000419254.1:p.Ter191TerENST00000468795.1... | stop_lost | ignored | unknown | UNKNOWN |
136626 | ENSG00000001626 | 117267852 | T | TG | . | FRAME_SHIFT | CFTR | ENST00000468795 | p.*190X/c.569*>+G | frameshift_variant | . | stop_lost | ENSP00000419254.1:p.Ter191TerENST00000468795.1... | stop_lost | ignored | unknown | UNKNOWN |
33 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136632 | ENSG00000001626 | 117267852 | TAGA | T | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.569*>-AGA | stop_lost | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_571delAGA | 3_prime_UTR_variant | ignored | unknown | UNKNOWN |
136645 | ENSG00000001626 | 117267853 | AGA | A | . | FRAME_SHIFT | CFTR | ENST00000468795 | p.X190X/c.570*>-GA | frameshift_variant | . | 3_prime_UTR_variant | ENST00000468795.1:c.*1_572delGA | 3_prime_UTR_variant | ignored | unknown | UNKNOWN |
136646 | ENSG00000001626 | 117267853 | AGAC | A | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.570*>-GAC | stop_lost | . | 3_prime_UTR_variant | ENST00000468795.1:c.*2_572delGAC | 3_prime_UTR_variant | ignored | unknown | UNKNOWN |
160376 | ENSG00000001626 | 117307160 | TAGA | T | . | STOP_LOST | CFTR | ENST00000454343 | p.*1420Xext*?/c.4259*>-AGA | stop_lost | . | 3_prime_UTR_variant | ENST00000454343.1:c.*1_4259delAGA | 3_prime_UTR_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000454343:exon26:c.4259_... |
160389 | ENSG00000001626 | 117307161 | AGA | A | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X1420X/c.4260*>-GA | frameshift_variant | . | 3_prime_UTR_variant | ENST00000454343.1:c.*1_4260delGA | 3_prime_UTR_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000454343:exon26:c.4260_... |
35 rows
0 rows
sampletables = '<h1>Other algo\'s agree, but...</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
sampletables += "<h2> VEP doesn't match for <em>" + str(effect) + "</em></h2>"
query = master_df.loc[(master_df["normalized_so_annovar"]==effect) &
(master_df["normalized_so_snpeff"]==effect) &
(master_df["normalized_so_vep"]!=effect)]
num_rows = query.count()[0]
if num_rows > 0:
sampletables += query.head(5).to_html()
sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
0 rows
0 rows
0 rows
0 rows
0 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20834 | ENSG00000001626 | 117120146 | ACCA | A | . | FRAME_SHIFT | CFTR | ENST00000426809 | p.X1X/c.1*>-CCA | frameshift_variant | . | 5_prime_UTR_variant | 5_prime_UTR_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... | |
20847 | ENSG00000001626 | 117120147 | CCA | C | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X1X/c.1*>-CA | frameshift_variant | . | 5_prime_UTR_variant | 5_prime_UTR_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... | |
20848 | ENSG00000001626 | 117120147 | CCAT | C | . | FRAME_SHIFT | CFTR | ENST00000426809 | p.X1X/c.1*>-CAT | frameshift_variant | . | 5_prime_UTR_variant | 5_prime_UTR_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... | |
160347 | ENSG00000001626 | 117307158 | TTT | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X1419X/c.4257*>-TT | frameshift_variant | . | stop_lost | ENSP00000403677.1:p.Ter1420GluENST00000454343.... | stop_lost | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000454343:exon26:c.4257_... |
160361 | ENSG00000001626 | 117307159 | TTA | T | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X1420X/c.4258*>-TA | frameshift_variant | . | stop_lost | ENSP00000403677.1:p.Ter1420GluENST00000454343.... | stop_lost | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000454343:exon26:c.4258_... |
12 rows
0 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
24422 | ENSG00000001626 | 117144308 | T | TA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Trp19*/c.56*>+A | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Trp19TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.55_56i... |
24423 | ENSG00000001626 | 117144308 | T | TAG | . | STOP_GAINED | CFTR | ENST00000454343 | p.Trp19*/c.56*>+AG | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Trp19TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.55_56i... |
24435 | ENSG00000001626 | 117144309 | G | GA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Trp19*/c.57*>+A | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Trp19TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.56_57i... |
24800 | ENSG00000001626 | 117144335 | T | TA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Tyr28*/c.83*>+A | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Tyr28TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.83dupA... |
24805 | ENSG00000001626 | 117144335 | T | TG | . | STOP_GAINED | CFTR | ENST00000454343 | p.Tyr28*/c.83*>+G | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Tyr28TerENST00000426809.1:... | frameshift_variant | stop_gained | stopgain SNV | ENSG00000001626:ENST00000003084:exon2:c.82_83i... |
399 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
160391 | ENSG00000001626 | 117307162 | G | A | . | SYNONYMOUS_STOP | CFTR | ENST00000454343 | p.*1420*/c.4260G>A | synonymous_variant | . | stop_retained_variant | ENST00000454343.1:c.4260G>A(p.%3D)ENST00000454... | nonsynonymous_variant | synonymous_variant | synonymous SNV | ENSG00000001626:ENST00000454343:exon26:c.G4260... |
1 rows
0 rows
0 rows
0 rows
0 rows
sampletables = '<h1>At least 1 column doesn\'t match</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
sampletables += "<h2> VEP doesn't match for <em>" + str(effect) + "</em></h2>"
query = master_df.loc[(master_df["normalized_so_vep"] != effect) &
((master_df["normalized_so_annovar"]==effect) | (master_df["normalized_so_snpeff"]==effect))]
num_rows = query.count()[0]
if num_rows > 0:
sampletables += query.head(5).to_html()
sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 117105737 | C | A | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||||
1 | 117105737 | C | CA | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||||
2 | 117105737 | C | CAG | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||||
3 | 117105737 | C | CC | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||||
4 | 117105737 | C | CCTG | . | INTERGENIC | intergenic_variant | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
1403 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2788 | ENSG00000001626 | 117105835 | TTTT | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
2801 | ENSG00000001626 | 117105836 | TTT | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
2802 | ENSG00000001626 | 117105836 | TTTG | T | . | UPSTREAM | CFTR | ENST00000546407 | upstream_gene_variant | . | non_coding_exon_variant | ignored | NaN | NaN | NaN | ||
9327 | ENSG00000001626 | 117119257 | A | AA | . | INTRON | CFTR | ENST00000546407 | n.166+3396*>+A | intron_variant | . | intron_variant | ENST00000546407.1:n.166+3395dupA | intron_variant | upstream_gene_variant | upstream | NaN |
9328 | ENSG00000001626 | 117119257 | A | AAC | . | INTRON | CFTR | ENST00000546407 | n.166+3396*>+AC | intron_variant | . | intron_variant | ENST00000546407.1:n.166+3395_166+3396insAC | intron_variant | upstream_gene_variant | upstream | NaN |
1414 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2806 | ENSG00000001626 | 117105837 | T | TA | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2807 | ENSG00000001626 | 117105837 | T | TAC | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2808 | ENSG00000001626 | 117105837 | T | TC | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2809 | ENSG00000001626 | 117105837 | T | TG | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN | ||
2810 | ENSG00000001626 | 117105837 | T | TGAT | . | EXON | CFTR | ENST00000546407 | ignored | . | upstream_gene_variant | upstream_gene_variant | NaN | NaN | NaN |
17895 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
3477 | ENSG00000001626 | 117105885 | G | GA | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48_48+1insA | ignored | NaN | NaN | NaN | |
3478 | ENSG00000001626 | 117105885 | G | GC | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48_48+1insC | ignored | NaN | NaN | NaN | |
3479 | ENSG00000001626 | 117105885 | G | GCGA | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48_48+1insCGA | ignored | NaN | NaN | NaN | |
3480 | ENSG00000001626 | 117105885 | G | GG | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48dupG | ignored | NaN | NaN | NaN | |
3481 | ENSG00000001626 | 117105885 | G | GGA | . | SPLICE_SITE_DONOR | CFTR | ENST00000546407 | splicing_variant | . | splice_region_variant | ENST00000546407.1:n.48_48+1insGA | ignored | NaN | NaN | NaN |
1041 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
25984 | ENSG00000001626 | 117144419 | TA | T | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN | ||
25985 | ENSG00000001626 | 117144419 | TAT | T | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN | ||
25986 | ENSG00000001626 | 117144419 | TATG | T | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN | ||
25987 | ENSG00000001626 | 117144420 | A | AA | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN | ||
25988 | ENSG00000001626 | 117144420 | A | AC | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | intron_variant | intronic | NaN |
17889 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
10730 | ENSG00000001626 | 117119357 | T | TA | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insA | intron_variant | upstream_gene_variant | upstream | NaN | |
10731 | ENSG00000001626 | 117119357 | T | TAG | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insAG | intron_variant | upstream_gene_variant | upstream | NaN | |
10732 | ENSG00000001626 | 117119357 | T | TC | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insC | intron_variant | upstream_gene_variant | upstream | NaN | |
10733 | ENSG00000001626 | 117119357 | T | TG | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insG | intron_variant | upstream_gene_variant | upstream | NaN | |
10734 | ENSG00000001626 | 117119357 | T | TGC | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | intron_variant | ENST00000546407.1:n.166+3495_166+3496insGC | intron_variant | upstream_gene_variant | upstream | NaN |
25 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20834 | ENSG00000001626 | 117120146 | ACCA | A | . | FRAME_SHIFT | CFTR | ENST00000426809 | p.X1X/c.1*>-CCA | frameshift_variant | . | 5_prime_UTR_variant | 5_prime_UTR_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... | |
20847 | ENSG00000001626 | 117120147 | CCA | C | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.X1X/c.1*>-CA | frameshift_variant | . | 5_prime_UTR_variant | 5_prime_UTR_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... | |
20848 | ENSG00000001626 | 117120147 | CCAT | C | . | FRAME_SHIFT | CFTR | ENST00000426809 | p.X1X/c.1*>-CAT | frameshift_variant | . | 5_prime_UTR_variant | 5_prime_UTR_variant | frameshift_variant | frameshift deletion | ENSG00000001626:ENST00000426809:wholegene,ENSG... | |
20850 | ENSG00000001626 | 117120148 | C | CA | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+A | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insA | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20852 | ENSG00000001626 | 117120148 | C | CC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Met1X/c.1*>+C | frameshift_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1dupC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
217 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20851 | ENSG00000001626 | 117120148 | C | CACT | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Met1X/c.1*>+ACT | inframe_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insACT | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
20857 | ENSG00000001626 | 117120148 | C | CTGC | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Met1X/c.1*>+TGC | inframe_variant | . | 5_prime_UTR_variant | ENST00000454343.1:c.-1_1insTGC | 5_prime_UTR_variant | 5_prime_UTR_variant | UTR5 | NaN |
21380 | ENSG00000001626 | 117120185 | TCCA | T | . | STOP_GAINED | CFTR | ENST00000454343 | p.X13*/c.38*>-CCA | stop_gained | . | stop_gained | ENSP00000389119.1:p.Ser13_Lys14delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon1:c.38_40d... |
24810 | ENSG00000001626 | 117144335 | TACA | T | . | STOP_GAINED | CFTR | ENST00000454343 | p.X28*/c.83*>-ACA | stop_gained | . | stop_gained | ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon2:c.83_85d... |
24824 | ENSG00000001626 | 117144336 | ACAG | A | . | STOP_GAINED | CFTR | ENST00000454343 | p.X28*/c.84*>-CAG | stop_gained | . | stop_gained | ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... | stop_gained | inframe_variant | nonframeshift deletion | ENSG00000001626:ENST00000003084:exon2:c.84_86d... |
124 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
20939 | ENSG00000001626 | 117120154 | G | GT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Arg3*/c.7*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.6_7ins... |
20940 | ENSG00000001626 | 117120154 | G | GTA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Arg3*/c.7*>+TA | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.6_7ins... |
21109 | ENSG00000001626 | 117120166 | G | GT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Glu7*/c.19*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Glu7TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.18_19i... |
21148 | ENSG00000001626 | 117120169 | A | AT | . | STOP_GAINED | CFTR | ENST00000454343 | p.Lys8*/c.22*>+T | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.21_22i... |
21149 | ENSG00000001626 | 117120169 | A | ATA | . | STOP_GAINED | CFTR | ENST00000454343 | p.Lys8*/c.22*>+TA | stop_gained | . | frameshift_variant | ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... | frameshift_variant | frameshift_variant | frameshift insertion | ENSG00000001626:ENST00000003084:exon1:c.21_22i... |
1262 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136647 | ENSG00000001626 | 117267854 | G | A | . | SYNONYMOUS_STOP | CFTR | ENST00000468795 | p.*190*/c.570G>A | synonymous_variant | . | stop_retained_variant | ENST00000468795.1:c.572G>A(p.%3D)ENST000004687... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
160391 | ENSG00000001626 | 117307162 | G | A | . | SYNONYMOUS_STOP | CFTR | ENST00000454343 | p.*1420*/c.4260G>A | synonymous_variant | . | stop_retained_variant | ENST00000454343.1:c.4260G>A(p.%3D)ENST00000454... | nonsynonymous_variant | synonymous_variant | synonymous SNV | ENSG00000001626:ENST00000454343:exon26:c.G4260... |
188918 | ENSG00000001626 | 117355912 | A | G | . | SYNONYMOUS_STOP | CFTR | ENST00000600166 | p.*156*/c.467A>G | synonymous_variant | . | stop_retained_variant | ENST00000600166.1:c.469A>G(p.%3D)ENST000006001... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
188932 | ENSG00000001626 | 117355913 | A | G | . | SYNONYMOUS_STOP | CFTR | ENST00000600166 | p.*156*/c.468A>G | synonymous_variant | . | stop_retained_variant | ENST00000600166.1:c.470A>G(p.%3D)ENST000006001... | nonsynonymous_variant | ignored | unknown | UNKNOWN |
4 rows
0 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
136608 | ENSG00000001626 | 117267851 | T | TA | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>+A | stop_lost | . | frameshift_variant | ENSP00000419254.1:p.Ter191IlefsTer6ENST0000046... | frameshift_variant | ignored | unknown | UNKNOWN |
136609 | ENSG00000001626 | 117267851 | T | TC | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>+C | stop_lost | . | frameshift_variant | ENSP00000419254.1:p.Ter191LeufsTer6ENST0000046... | frameshift_variant | ignored | unknown | UNKNOWN |
136610 | ENSG00000001626 | 117267851 | T | TCT | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>+CT | stop_lost | . | frameshift_variant | ENSP00000419254.1:p.Ter191LeufsTer11ENST000004... | frameshift_variant | ignored | unknown | UNKNOWN |
136611 | ENSG00000001626 | 117267851 | T | TG | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>+G | stop_lost | . | frameshift_variant | ENSP00000419254.1:p.Ter191ValfsTer6ENST0000046... | frameshift_variant | ignored | unknown | UNKNOWN |
136612 | ENSG00000001626 | 117267851 | T | TGA | . | STOP_LOST | CFTR | ENST00000468795 | p.*190Xext*?/c.568*>+GA | stop_lost | . | frameshift_variant | ENSP00000419254.1:p.Ter191AspfsTer11ENST000004... | frameshift_variant | ignored | unknown | UNKNOWN |
24 rows
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
176140 | ENSG00000001626 | 117350702 | TGAT | T | . | INTRON | CFTR | ENST00000600166 | c.367-5109*>-GAT | intron_variant | . | intron_variant | ENST00000429014.1:n.210-5109_210-5107delGAT | intron_variant | 3_prime_UTR_variant | UTR3 | NaN |
176153 | ENSG00000001626 | 117350703 | GAT | G | . | INTRON | CFTR | ENST00000600166 | c.367-5108*>-AT | intron_variant | . | intron_variant | ENST00000429014.1:n.210-5108_210-5107delAT | intron_variant | 3_prime_UTR_variant | UTR3 | NaN |
176154 | ENSG00000001626 | 117350703 | GATT | G | . | INTRON | CFTR | ENST00000600166 | c.367-5108*>-ATT | intron_variant | . | intron_variant | ENST00000429014.1:n.210-5108_210-5106delATT | intron_variant | 3_prime_UTR_variant | UTR3 | NaN |
176167 | ENSG00000001626 | 117350704 | ATT | A | . | INTRON | CFTR | ENST00000600166 | c.367-5107*>-TT | intron_variant | . | intron_variant | ENST00000429014.1:n.210-5107_210-5106delTT | intron_variant | 3_prime_UTR_variant | UTR3 | NaN |
176168 | ENSG00000001626 | 117350704 | ATTT | A | . | INTRON | CFTR | ENST00000600166 | c.367-5107*>-TTT | intron_variant | . | intron_variant | ENST00000429014.1:n.210-5107_210-5105delTTT | intron_variant | 3_prime_UTR_variant | UTR3 | NaN |
34 rows
0 rows
master_df.loc[(master_df["normalized_so_annovar"]=="splicing_variant") &
(master_df["normalized_so_snpeff"]!="splicing_variant") &
(master_df["normalized_so_vep"]!="splicing_variant")].head(50)
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
11346 | ENSG00000001626 | 117119401 | T | TA | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+A | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insA | intron_variant | splicing_variant | splicing | NaN |
11347 | ENSG00000001626 | 117119401 | T | TAT | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+AT | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insAT | intron_variant | splicing_variant | splicing | NaN |
11348 | ENSG00000001626 | 117119401 | T | TC | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+C | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insC | intron_variant | splicing_variant | splicing | NaN |
11349 | ENSG00000001626 | 117119401 | T | TCGT | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+CGT | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insCGT | intron_variant | splicing_variant | splicing | NaN |
11350 | ENSG00000001626 | 117119401 | T | TG | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+G | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insG | intron_variant | splicing_variant | splicing | NaN |
11351 | ENSG00000001626 | 117119401 | T | TGCT | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+GCT | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2_-424+3insGCT | intron_variant | splicing_variant | splicing | NaN |
11352 | ENSG00000001626 | 117119401 | T | TGT | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+GT | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+1_-424+2dupGT | intron_variant | splicing_variant | splicing | NaN |
11353 | ENSG00000001626 | 117119401 | T | TT | . | INTRON | CFTR | ENST00000446805 | c.-424+3*>+T | intron_variant | . | intron_variant | ENST00000446805.1:c.-424+2dupT | intron_variant | splicing_variant | splicing | NaN |
12941 | ENSG00000001626 | 117119515 | G | GA | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insA | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12942 | ENSG00000001626 | 117119515 | G | GAT | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insAT | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12943 | ENSG00000001626 | 117119515 | G | GC | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insC | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12944 | ENSG00000001626 | 117119515 | G | GCAT | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insCAT | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12945 | ENSG00000001626 | 117119515 | G | GCGA | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insCGA | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12946 | ENSG00000001626 | 117119515 | G | GG | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1dupG | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12947 | ENSG00000001626 | 117119515 | G | GGA | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insGA | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
12948 | ENSG00000001626 | 117119515 | G | GT | . | UTR_5_PRIME | CFTR | ENST00000446805 | 5_prime_UTR_variant | . | 5_prime_UTR_variant | ENST00000446805.1:c.-423-1_-423insT | 5_prime_UTR_variant | splicing_variant | splicing | NaN | |
16232 | ENSG00000001626 | 117119750 | T | TA | . | INTRON | CFTR | ENST00000446805 | c.-191+3*>+A | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+2_-191+3insA | intron_variant | splicing_variant | splicing | NaN |
16233 | ENSG00000001626 | 117119750 | T | TC | . | INTRON | CFTR | ENST00000446805 | c.-191+3*>+C | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+2_-191+3insC | intron_variant | splicing_variant | splicing | NaN |
16234 | ENSG00000001626 | 117119750 | T | TCA | . | INTRON | CFTR | ENST00000446805 | c.-191+3*>+CA | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+2_-191+3insCA | intron_variant | splicing_variant | splicing | NaN |
16235 | ENSG00000001626 | 117119750 | T | TCTA | . | INTRON | CFTR | ENST00000446805 | c.-191+3*>+CTA | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+2_-191+3insCTA | intron_variant | splicing_variant | splicing | NaN |
16236 | ENSG00000001626 | 117119750 | T | TG | . | INTRON | CFTR | ENST00000446805 | c.-191+3*>+G | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+2_-191+3insG | intron_variant | splicing_variant | splicing | NaN |
16237 | ENSG00000001626 | 117119750 | T | TGC | . | INTRON | CFTR | ENST00000446805 | c.-191+3*>+GC | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+2_-191+3insGC | intron_variant | splicing_variant | splicing | NaN |
16238 | ENSG00000001626 | 117119750 | T | TT | . | INTRON | CFTR | ENST00000446805 | c.-191+3*>+T | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+2dupT | intron_variant | splicing_variant | splicing | NaN |
16239 | ENSG00000001626 | 117119750 | T | TTCG | . | INTRON | CFTR | ENST00000446805 | c.-191+3*>+TCG | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+2_-191+3insTCG | intron_variant | splicing_variant | splicing | NaN |
21622 | ENSG00000001626 | 117120203 | T | TA | . | INTRON | CFTR | ENST00000446805 | c.-191+456*>+A | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+455_-191+456insA | intron_variant | splicing_variant | splicing | ENST00000454343:exon1:c.53+2->A,ENST0000000308... |
21623 | ENSG00000001626 | 117120203 | T | TAC | . | INTRON | CFTR | ENST00000446805 | c.-191+456*>+AC | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+455_-191+456insAC | intron_variant | splicing_variant | splicing | ENST00000454343:exon1:c.53+2->AC,ENST000000030... |
21624 | ENSG00000001626 | 117120203 | T | TAGC | . | INTRON | CFTR | ENST00000446805 | c.-191+456*>+AGC | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+455_-191+456insAGC | intron_variant | splicing_variant | splicing | ENST00000454343:exon1:c.53+2->AGC,ENST00000003... |
21625 | ENSG00000001626 | 117120203 | T | TC | . | INTRON | CFTR | ENST00000446805 | c.-191+456*>+C | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+455_-191+456insC | intron_variant | splicing_variant | splicing | ENST00000454343:exon1:c.53+2->C,ENST0000000308... |
21626 | ENSG00000001626 | 117120203 | T | TG | . | INTRON | CFTR | ENST00000446805 | c.-191+456*>+G | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+455_-191+456insG | intron_variant | splicing_variant | splicing | ENST00000454343:exon1:c.53+2->G,ENST0000000308... |
21627 | ENSG00000001626 | 117120203 | T | TGC | . | INTRON | CFTR | ENST00000446805 | c.-191+456*>+GC | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+455_-191+456insGC | intron_variant | splicing_variant | splicing | ENST00000454343:exon1:c.53+2->GC,ENST000000030... |
21628 | ENSG00000001626 | 117120203 | T | TGCT | . | INTRON | CFTR | ENST00000446805 | c.-191+456*>+GCT | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+455_-191+456insGCT | intron_variant | splicing_variant | splicing | ENST00000454343:exon1:c.53+2->GCT,ENST00000003... |
21629 | ENSG00000001626 | 117120203 | T | TT | . | INTRON | CFTR | ENST00000446805 | c.-191+456*>+T | intron_variant | . | intron_variant | ENST00000446805.1:c.-191+455dupT | intron_variant | splicing_variant | splicing | ENST00000454343:exon1:c.53+2->T,ENST0000000308... |
24393 | ENSG00000001626 | 117144306 | G | GA | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+A | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Ser18ArgfsTer27ENST0000042... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->A,ENST0000000308... |
24394 | ENSG00000001626 | 117144306 | G | GAG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+AG | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Ser18ArgfsTer8ENST00000426... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->AG,ENST000000030... |
24395 | ENSG00000001626 | 117144306 | G | GAT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+AT | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Ser18ArgfsTer8ENST00000426... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->AT,ENST000000030... |
24396 | ENSG00000001626 | 117144306 | G | GC | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+C | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Trp19LeufsTer26ENST0000042... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->C,ENST0000000308... |
24397 | ENSG00000001626 | 117144306 | G | GCGA | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Ser18X/c.54*>+CGA | inframe_variant | . | inframe_insertion | ENSP00000389119.1:p.Ser18_Trp19insAspENST00000... | inframe_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->CGA,ENST00000003... |
24398 | ENSG00000001626 | 117144306 | G | GCTA | . | CODON_INSERTION | CFTR | ENST00000454343 | p.Ser18X/c.54*>+CTA | inframe_variant | . | inframe_insertion | ENSP00000389119.1:p.Ser18_Trp19insTyrENST00000... | inframe_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->CTA,ENST00000003... |
24399 | ENSG00000001626 | 117144306 | G | GG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+G | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Ser18ArgfsTer27ENST0000042... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->G,ENST0000000308... |
24400 | ENSG00000001626 | 117144306 | G | GT | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Ser18X/c.54*>+T | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Trp19LeufsTer26ENST0000042... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon2:c.54-1->T,ENST0000000308... |
25976 | ENSG00000001626 | 117144419 | T | TA | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->A,ENST000000030... | ||
25977 | ENSG00000001626 | 117144419 | T | TATG | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->ATG,ENST0000000... | ||
25978 | ENSG00000001626 | 117144419 | T | TC | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->C,ENST000000030... | ||
25979 | ENSG00000001626 | 117144419 | T | TCGA | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->CGA,ENST0000000... | ||
25980 | ENSG00000001626 | 117144419 | T | TG | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->G,ENST000000030... | ||
25981 | ENSG00000001626 | 117144419 | T | TGC | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->GC,ENST00000003... | ||
25982 | ENSG00000001626 | 117144419 | T | TT | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->T,ENST000000030... | ||
25983 | ENSG00000001626 | 117144419 | T | TTA | . | DOWNSTREAM | CFTR | ENST00000546407 | downstream_gene_variant | . | downstream_gene_variant | downstream_gene_variant | splicing_variant | splicing | ENST00000454343:exon2:c.164+2->TA,ENST00000003... | ||
28747 | ENSG00000001626 | 117149087 | G | GA | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Arg55X/c.165*>+A | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Glu56ArgfsTer4ENST00000426... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon3:c.165-1->A,ENST000000030... |
28748 | ENSG00000001626 | 117149087 | G | GAG | . | FRAME_SHIFT | CFTR | ENST00000454343 | p.Arg55X/c.165*>+AG | frameshift_variant | . | frameshift_variant | ENSP00000389119.1:p.Trp57AsnfsTer35ENST0000042... | frameshift_variant | splicing_variant | splicing | ENST00000454343:exon3:c.165-1->AG,ENST00000003... |
50 rows × 17 columns
master_df.loc[(master_df["normalized_so_annovar"]=="splicing_variant") &
(master_df["normalized_so_snpeff"]!="splicing_variant")].tail()
Gene | POS | REF | ALT | ID_x | Effect | Gene_Name | Transcript_ID | hgvs_snpeff | normalized_so_snpeff | ID_y | Consequence | hgvs_vep | normalized_so_vep | normalized_so_annovar | combined_effect | hgvs | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
249128 | ENSG00000083622 | 117282491 | G | GG | . | INTRON | AC000111.6 | ENST00000456270 | n.65+4913*>+C | intron_variant | . | intron_variant | ENST00000456270.1:n.65+4913dupC | intron_variant | splicing_variant | splicing | ENST00000454343:exon22:c.3535-1->G,ENST0000000... |
249129 | ENSG00000083622 | 117282491 | G | GGA | . | INTRON | AC000111.6 | ENST00000456270 | n.65+4913*>+CT | intron_variant | . | intron_variant | ENST00000456270.1:n.65+4913_65+4914insTC | intron_variant | splicing_variant | splicing | ENST00000454343:exon22:c.3535-1->GA,ENST000000... |
249131 | ENSG00000083622 | 117282491 | G | GGT | . | INTRON | AC000111.6 | ENST00000456270 | n.65+4913*>+CA | intron_variant | . | intron_variant | ENST00000456270.1:n.65+4912_65+4913dupAC | intron_variant | splicing_variant | splicing | ENST00000454343:exon22:c.3535-1->GT,ENST000000... |
249132 | ENSG00000083622 | 117282491 | G | GT | . | INTRON | AC000111.6 | ENST00000456270 | n.65+4913*>+A | intron_variant | . | intron_variant | ENST00000456270.1:n.65+4913_65+4914insA | intron_variant | splicing_variant | splicing | ENST00000454343:exon22:c.3535-1->T,ENST0000000... |
249133 | ENSG00000083622 | 117282491 | G | T | . | INTRON | AC000111.6 | ENST00000456270 | n.65+4914C>A | intron_variant | . | intron_variant | ENST00000456270.1:n.65+4914C>A | intron_variant | splicing_variant | splicing | ENST00000454343:exon22:c.3535-1G>T,ENST0000000... |
5 rows × 17 columns