In [1]:
from IPython.display import HTML
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
from matplotlib_venn import venn3, venn3_circles, venn3_unweighted
import seaborn
%pylab inline
Populating the interactive namespace from numpy and matplotlib
In [2]:
#These are defined by the way annovar defines precedence. I found empirically that stop_gain > frame_shift in annovar, hence the reverse
precedence_dict = {
"splicing_variant": 1,
"frameshift_variant": 4,
"stop_gained": 2,
"stop_lost": 3,
"inframe_variant": 5,
"nonsynonymous_variant": 6,
"synonymous_variant": 7,
"5_prime_UTR_variant": 8,
"3_prime_UTR_variant": 9,
"intron_variant": 10,
"upstream_gene_variant": 11,
"downstream_gene_variant": 12,
"intergenic_variant": 13,
"intron_variant": 14,
"upstream_gene_variant": 15,
"regulatory_region_variant": 16,
"ignored": 17
}

def ranked(col):
    return max(col, key=lambda val: -1*precedence_dict[val])
In [3]:
with pd.get_store('classified_variant_store.h5') as store:
    snpeff_subset = store.get("cftr_snpeff_ensembl_subset")
ensembl_symbol_mapping = {"CFTR":"ENSG00000001626",
                          "AC000111.3": "ENSG00000232661",
                          "AC000111.4":"ENSG00000237974",
                          "AC000111.5": "ENSG00000234001",
                          "AC000111.6": "ENSG00000083622", 
                          "CTTNBP2": "ENSG00000077063",
                          "":""}
snpeff_subset["EnsemblGene"] = snpeff_subset["Gene_Name"].apply(lambda x: ensembl_symbol_mapping[x])
grouped_snpeff_subset = snpeff_subset.groupby(["EnsemblGene", "POS", "REF", "ALT"])
grouped_snpeff_subset = grouped_snpeff_subset.agg({"normalized_so_snpeff": ranked})
grouped_snpeff_subset = grouped_snpeff_subset.rename(columns={"normalized_so_snpeff": "normalized_so_snpeff_max"}).reset_index()
grouped_snpeff_subset = pd.merge(grouped_snpeff_subset, snpeff_subset, how="left", on=["POS", "REF", "ALT", "EnsemblGene"])
grouped_snpeff_subset = grouped_snpeff_subset[grouped_snpeff_subset["normalized_so_snpeff_max"] == grouped_snpeff_subset["normalized_so_snpeff"]]
#kludge ties are broken by taking the first element in the group (ie randomly; this should only really effect the transcript level comparisons, ie hgvs etc)
grouped_snpeff_subset = grouped_snpeff_subset.groupby(["EnsemblGene", "POS", "REF", "ALT"]).first()
agg_snpeff = grouped_snpeff_subset.reset_index()
del agg_snpeff["normalized_so_snpeff_max"]
del grouped_snpeff_subset
del snpeff_subset
agg_snpeff.rename(columns={"EnsemblGene":"Gene"}, inplace=True)
agg_snpeff[100000:100050]
Out[3]:
Gene POS REF ALT ID Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff
100000 ENSG00000001626 117235027 C G . NON_SYNONYMOUS_CODING CFTR ENST00000454343 p.Thr784Arg/c.2351C>G nonsynonymous_variant
100001 ENSG00000001626 117235027 C T . NON_SYNONYMOUS_CODING CFTR ENST00000454343 p.Thr784Ile/c.2351C>T nonsynonymous_variant
100002 ENSG00000001626 117235027 CA C . FRAME_SHIFT CFTR ENST00000454343 p.X784X/c.2352*>-A frameshift_variant
100003 ENSG00000001626 117235027 CAT C . FRAME_SHIFT CFTR ENST00000454343 p.X784X/c.2352*>-AT frameshift_variant
100004 ENSG00000001626 117235027 CATG C . CODON_CHANGE_PLUS_CODON_DELETION CFTR ENST00000454343 p.X784Thr/c.2352*>-ATG inframe_variant
100005 ENSG00000001626 117235028 A AA . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2353*>+A frameshift_variant
100006 ENSG00000001626 117235028 A AATC . CODON_INSERTION CFTR ENST00000454343 p.Trp785X/c.2353*>+ATC inframe_variant
100007 ENSG00000001626 117235028 A AC . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2353*>+C frameshift_variant
100008 ENSG00000001626 117235028 A ACA . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2353*>+CA frameshift_variant
100009 ENSG00000001626 117235028 A ACAG . CODON_INSERTION CFTR ENST00000454343 p.Trp785X/c.2353*>+CAG inframe_variant
100010 ENSG00000001626 117235028 A ACG . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2353*>+CG frameshift_variant
100011 ENSG00000001626 117235028 A AG . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2353*>+G frameshift_variant
100012 ENSG00000001626 117235028 A AT . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2353*>+T frameshift_variant
100013 ENSG00000001626 117235028 A C . SYNONYMOUS_CODING CFTR ENST00000454343 p.Thr784Thr/c.2352A>C synonymous_variant
100014 ENSG00000001626 117235028 A G . SYNONYMOUS_CODING CFTR ENST00000454343 p.Thr784Thr/c.2352A>G synonymous_variant
100015 ENSG00000001626 117235028 A T . SYNONYMOUS_CODING CFTR ENST00000454343 p.Thr784Thr/c.2352A>T synonymous_variant
100016 ENSG00000001626 117235028 AT A . FRAME_SHIFT CFTR ENST00000454343 p.X785X/c.2353*>-T frameshift_variant
100017 ENSG00000001626 117235028 ATG A . FRAME_SHIFT CFTR ENST00000454343 p.X785X/c.2353*>-TG frameshift_variant
100018 ENSG00000001626 117235028 ATGG A . CODON_DELETION CFTR ENST00000454343 p.Trp785X/c.2353*>-TGG inframe_variant
100019 ENSG00000001626 117235029 T A . NON_SYNONYMOUS_CODING CFTR ENST00000454343 p.Trp785Arg/c.2353T>A nonsynonymous_variant
100020 ENSG00000001626 117235029 T C . NON_SYNONYMOUS_CODING CFTR ENST00000454343 p.Trp785Arg/c.2353T>C nonsynonymous_variant
100021 ENSG00000001626 117235029 T G . NON_SYNONYMOUS_CODING CFTR ENST00000454343 p.Trp785Gly/c.2353T>G nonsynonymous_variant
100022 ENSG00000001626 117235029 T TA . STOP_GAINED CFTR ENST00000454343 p.Trp785*/c.2354*>+A stop_gained
100023 ENSG00000001626 117235029 T TC . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2354*>+C frameshift_variant
100024 ENSG00000001626 117235029 T TG . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2354*>+G frameshift_variant
100025 ENSG00000001626 117235029 T TGCT . CODON_CHANGE_PLUS_CODON_INSERTION CFTR ENST00000454343 p.Trp785X/c.2354*>+GCT inframe_variant
100026 ENSG00000001626 117235029 T TT . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2354*>+T frameshift_variant
100027 ENSG00000001626 117235029 T TTC . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2354*>+TC frameshift_variant
100028 ENSG00000001626 117235029 T TTG . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2354*>+TG frameshift_variant
100029 ENSG00000001626 117235029 T TTGA . CODON_CHANGE_PLUS_CODON_INSERTION CFTR ENST00000454343 p.Trp785X/c.2354*>+TGA inframe_variant
100030 ENSG00000001626 117235029 TG T . FRAME_SHIFT CFTR ENST00000454343 p.X785X/c.2354*>-G frameshift_variant
100031 ENSG00000001626 117235029 TGG T . FRAME_SHIFT CFTR ENST00000454343 p.X785X/c.2354*>-GG frameshift_variant
100032 ENSG00000001626 117235029 TGGA T . CODON_CHANGE_PLUS_CODON_DELETION CFTR ENST00000454343 p.X785Tyr/c.2354*>-GGA inframe_variant
100033 ENSG00000001626 117235030 G A . STOP_GAINED CFTR ENST00000454343 p.Trp785*/c.2354G>A stop_gained
100034 ENSG00000001626 117235030 G C . NON_SYNONYMOUS_CODING CFTR ENST00000454343 p.Trp785Ser/c.2354G>C nonsynonymous_variant
100035 ENSG00000001626 117235030 G GA . STOP_GAINED CFTR ENST00000454343 p.Trp785*/c.2355*>+A stop_gained
100036 ENSG00000001626 117235030 G GAT . STOP_GAINED CFTR ENST00000454343 p.Trp785*/c.2355*>+AT stop_gained
100037 ENSG00000001626 117235030 G GC . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2355*>+C frameshift_variant
100038 ENSG00000001626 117235030 G GG . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2355*>+G frameshift_variant
100039 ENSG00000001626 117235030 G GGC . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2355*>+GC frameshift_variant
100040 ENSG00000001626 117235030 G GT . FRAME_SHIFT CFTR ENST00000454343 p.Trp785X/c.2355*>+T frameshift_variant
100041 ENSG00000001626 117235030 G GTGA . CODON_CHANGE_PLUS_CODON_INSERTION CFTR ENST00000454343 p.Trp785X/c.2355*>+TGA inframe_variant
100042 ENSG00000001626 117235030 G GTGC . CODON_CHANGE_PLUS_CODON_INSERTION CFTR ENST00000454343 p.Trp785X/c.2355*>+TGC inframe_variant
100043 ENSG00000001626 117235030 G T . NON_SYNONYMOUS_CODING CFTR ENST00000454343 p.Trp785Leu/c.2354G>T nonsynonymous_variant
100044 ENSG00000001626 117235030 GG G . FRAME_SHIFT CFTR ENST00000454343 p.X785X/c.2355*>-G frameshift_variant
100045 ENSG00000001626 117235030 GGA G . FRAME_SHIFT CFTR ENST00000454343 p.X785X/c.2355*>-GA frameshift_variant
100046 ENSG00000001626 117235030 GGAA G . CODON_CHANGE_PLUS_CODON_DELETION CFTR ENST00000454343 p.X785Cys/c.2355*>-GAA inframe_variant
100047 ENSG00000001626 117235031 G A . STOP_GAINED CFTR ENST00000454343 p.Trp785*/c.2355G>A stop_gained
100048 ENSG00000001626 117235031 G C . NON_SYNONYMOUS_CODING CFTR ENST00000454343 p.Trp785Cys/c.2355G>C nonsynonymous_variant
100049 ENSG00000001626 117235031 G GA . FRAME_SHIFT CFTR ENST00000454343 p.Asn786X/c.2356*>+A frameshift_variant

50 rows × 10 columns

In [4]:
with pd.get_store('classified_variant_store.h5') as store:
    vep_subset = store.get("cftr_vep_ensembl_subset")
del vep_subset["Feature"]
vep_subset.drop_duplicates(inplace=True)
grouped_vep_subset = vep_subset.groupby(["Gene", "POS", "REF", "ALT"])
grouped_vep_subset = grouped_vep_subset.agg({"normalized_so_vep": ranked})
grouped_vep_subset = grouped_vep_subset.rename(columns={"normalized_so_vep": "normalized_so_vep_max"}).reset_index()
grouped_vep_subset = pd.merge(grouped_vep_subset, vep_subset, how="left", on=["POS", "REF", "ALT", "Gene"])
grouped_vep_subset = grouped_vep_subset[grouped_vep_subset["normalized_so_vep_max"] == grouped_vep_subset["normalized_so_vep"]]
grouped_vep_subset = grouped_vep_subset.groupby(["Gene", "POS", "REF", "ALT"]).first()
agg_vep = grouped_vep_subset.reset_index()
del grouped_vep_subset
del vep_subset
del agg_vep["normalized_so_vep_max"]
agg_vep[80000:80023]
Out[4]:
Gene POS REF ALT ID Consequence hgvs_vep normalized_so_vep
80000 ENSG00000001626 117188850 G GTC . frameshift_variant ENSP00000389119.1:p.Val426SerfsTer14ENST000004... frameshift_variant
80001 ENSG00000001626 117188850 G GTGA . stop_gained ENSP00000389119.1:p.Ala425_Val426insTerENST000... stop_gained
80002 ENSG00000001626 117188850 G T . synonymous_variant ENST00000426809.1:c.1275G>T(p.%3D)ENST00000426... synonymous_variant
80003 ENSG00000001626 117188850 GG G . frameshift_variant ENSP00000389119.1:p.Val426LeufsTer13ENST000004... frameshift_variant
80004 ENSG00000001626 117188850 GGT G . frameshift_variant ENSP00000389119.1:p.Val426CysfsTer25ENST000004... frameshift_variant
80005 ENSG00000001626 117188850 GGTT G . inframe_deletion ENSP00000389119.1:p.Val426delENST00000426809.1... inframe_variant
80006 ENSG00000001626 117188851 G A . missense_variant ENSP00000389119.1:p.Val426IleENST00000426809.1... nonsynonymous_variant
80007 ENSG00000001626 117188851 G C . missense_variant ENSP00000389119.1:p.Val426LeuENST00000426809.1... nonsynonymous_variant
80008 ENSG00000001626 117188851 G GA . frameshift_variant ENSP00000389119.1:p.Val426AspfsTer26ENST000004... frameshift_variant
80009 ENSG00000001626 117188851 G GAC . frameshift_variant ENSP00000389119.1:p.Val426AspfsTer14ENST000004... frameshift_variant
80010 ENSG00000001626 117188851 G GAGT . inframe_insertion ENSP00000389119.1:p.Val426delinsGluPheENST0000... inframe_variant
80011 ENSG00000001626 117188851 G GAT . frameshift_variant ENSP00000389119.1:p.Val426AspfsTer14ENST000004... frameshift_variant
80012 ENSG00000001626 117188851 G GC . frameshift_variant ENSP00000389119.1:p.Val426AlafsTer26ENST000004... frameshift_variant
80013 ENSG00000001626 117188851 G GG . frameshift_variant ENSP00000389119.1:p.Val426GlyfsTer26ENST000004... frameshift_variant
80014 ENSG00000001626 117188851 G GGTA . inframe_insertion ENSP00000389119.1:p.Val426delinsGlyIleENST0000... inframe_variant
80015 ENSG00000001626 117188851 G GT . frameshift_variant ENSP00000389119.1:p.Ala427CysfsTer25ENST000004... frameshift_variant
80016 ENSG00000001626 117188851 G T . missense_variant ENSP00000389119.1:p.Val426PheENST00000426809.1... nonsynonymous_variant
80017 ENSG00000001626 117188851 GT G . frameshift_variant ENSP00000389119.1:p.Ala427LeufsTer12ENST000004... frameshift_variant
80018 ENSG00000001626 117188851 GTT G . frameshift_variant ENSP00000389119.1:p.Val426GlyfsTer25ENST000004... frameshift_variant
80019 ENSG00000001626 117188851 GTTG G . inframe_deletion ENSP00000389119.1:p.Val426delENST00000426809.1... inframe_variant
80020 ENSG00000001626 117188852 T A . missense_variant ENSP00000389119.1:p.Val426AspENST00000426809.1... nonsynonymous_variant
80021 ENSG00000001626 117188852 T C . missense_variant ENSP00000389119.1:p.Val426AlaENST00000426809.1... nonsynonymous_variant
80022 ENSG00000001626 117188852 T G . missense_variant ENSP00000389119.1:p.Val426GlyENST00000426809.1... nonsynonymous_variant

23 rows × 8 columns

In [5]:
with pd.get_store('classified_variant_store.h5') as store:
    annovar_subset = store.get("cftr_annovar_ensembl_subset")
grouped_annovar_subset = annovar_subset.groupby(["Gene", "POS", "REF", "ALT"])
agg_annovar = grouped_annovar_subset.agg({"normalized_so_annovar": ranked}).reset_index()
del annovar_subset['normalized_so_annovar']
agg_annovar = pd.merge(agg_annovar, annovar_subset, on=["Gene", "POS", "REF", "ALT"])
del grouped_annovar_subset
del annovar_subset
agg_annovar[2000:2050]
Out[5]:
Gene POS REF ALT normalized_so_annovar combined_effect hgvs
2000 ENSG00000001626 117119399 GGT G splicing_variant splicing NaN
2001 ENSG00000001626 117119399 GGTA G splicing_variant splicing NaN
2002 ENSG00000001626 117119400 G A splicing_variant splicing NaN
2003 ENSG00000001626 117119400 G C splicing_variant splicing NaN
2004 ENSG00000001626 117119400 G GA splicing_variant splicing NaN
2005 ENSG00000001626 117119400 G GAGC splicing_variant splicing NaN
2006 ENSG00000001626 117119400 G GC splicing_variant splicing NaN
2007 ENSG00000001626 117119400 G GG splicing_variant splicing NaN
2008 ENSG00000001626 117119400 G GGCA splicing_variant splicing NaN
2009 ENSG00000001626 117119400 G GT splicing_variant splicing NaN
2010 ENSG00000001626 117119400 G GTA splicing_variant splicing NaN
2011 ENSG00000001626 117119400 G GTG splicing_variant splicing NaN
2012 ENSG00000001626 117119400 G T splicing_variant splicing NaN
2013 ENSG00000001626 117119400 GT G splicing_variant splicing NaN
2014 ENSG00000001626 117119400 GTA G splicing_variant splicing NaN
2015 ENSG00000001626 117119400 GTAA G splicing_variant splicing NaN
2016 ENSG00000001626 117119401 T A splicing_variant splicing NaN
2017 ENSG00000001626 117119401 T C splicing_variant splicing NaN
2018 ENSG00000001626 117119401 T G splicing_variant splicing NaN
2019 ENSG00000001626 117119401 T TA splicing_variant splicing NaN
2020 ENSG00000001626 117119401 T TAT splicing_variant splicing NaN
2021 ENSG00000001626 117119401 T TC splicing_variant splicing NaN
2022 ENSG00000001626 117119401 T TCGT splicing_variant splicing NaN
2023 ENSG00000001626 117119401 T TG splicing_variant splicing NaN
2024 ENSG00000001626 117119401 T TGCT splicing_variant splicing NaN
2025 ENSG00000001626 117119401 T TGT splicing_variant splicing NaN
2026 ENSG00000001626 117119401 T TT splicing_variant splicing NaN
2027 ENSG00000001626 117119401 TA T intron_variant intronic NaN
2028 ENSG00000001626 117119401 TAA T intron_variant intronic NaN
2029 ENSG00000001626 117119401 TAAA T intron_variant intronic NaN
2030 ENSG00000001626 117119402 A AA intron_variant intronic NaN
2031 ENSG00000001626 117119402 A AAG intron_variant intronic NaN
2032 ENSG00000001626 117119402 A AAGT intron_variant intronic NaN
2033 ENSG00000001626 117119402 A AC intron_variant intronic NaN
2034 ENSG00000001626 117119402 A ACAG intron_variant intronic NaN
2035 ENSG00000001626 117119402 A AG intron_variant intronic NaN
2036 ENSG00000001626 117119402 A AGT intron_variant intronic NaN
2037 ENSG00000001626 117119402 A AT intron_variant intronic NaN
2038 ENSG00000001626 117119402 A C intron_variant intronic NaN
2039 ENSG00000001626 117119402 A G intron_variant intronic NaN
2040 ENSG00000001626 117119402 A T intron_variant intronic NaN
2041 ENSG00000001626 117119402 AA A intron_variant intronic NaN
2042 ENSG00000001626 117119402 AAA A intron_variant intronic NaN
2043 ENSG00000001626 117119402 AAAT A intron_variant intronic NaN
2044 ENSG00000001626 117119403 A AA intron_variant intronic NaN
2045 ENSG00000001626 117119403 A AC intron_variant intronic NaN
2046 ENSG00000001626 117119403 A ACAG intron_variant intronic NaN
2047 ENSG00000001626 117119403 A ACT intron_variant intronic NaN
2048 ENSG00000001626 117119403 A ACTG intron_variant intronic NaN
2049 ENSG00000001626 117119403 A AG intron_variant intronic NaN

50 rows × 7 columns

In [6]:
vc_snpeff = agg_snpeff.groupby(["normalized_so_snpeff"]).size()
vc_snpeff.name = "SNPeff"
vc_vep = agg_vep.groupby(["normalized_so_vep"]).size()
vc_vep.name = "VEP"
vc_annovar = agg_annovar.groupby(["normalized_so_annovar"]).size()
vc_annovar.name = "Annovar"
vc_df = pd.DataFrame([vc_snpeff, vc_vep, vc_annovar])
vc_df.transpose().plot(kind="barh", fontsize=13, figsize=(16,8))
Out[6]:
<matplotlib.axes.AxesSubplot at 0x11ff2cb90>
In [7]:
master_df = pd.merge(agg_snpeff, agg_vep, how="outer", on=["Gene", "POS", "REF", "ALT"])
master_df = pd.merge(master_df, agg_annovar, how="outer", on=["Gene", "POS", "REF", "ALT"])

Determining a concordance value

We just want to find the ratio of rows where all three algorithms report the same effect vs the total number of rows.

Unfortunately, in the case of coding variants (the second calculation) we can't use the total number of rows in the master table for the denominator. Instead, we use the total number of unique rows where at least one algorithm uses the one of the effect in the list of coding effect.

In [19]:
#Number of matching
all_count = 0
for effect in precedence_dict.keys():
    all_count += master_df[(master_df["normalized_so_vep"] == effect) & 
                           (master_df["normalized_so_snpeff"] == effect) &
                           (master_df["normalized_so_annovar"] == effect)].count()
num_matching = all_count["POS"]
num_total = master_df.count()["POS"]
print num_matching
print num_total
print "Percent matching: " + str(100.0*num_matching/num_total)

all_count = 0
total_count = 0
#list of coding level effects
effects = [eff for eff, priority in precedence_dict.iteritems() if priority < 8]

total_count = master_df[master_df["normalized_so_vep"].isin(effects) | 
                        master_df["normalized_so_snpeff"].isin(effects) |
                        master_df["normalized_so_annovar"].isin(effects)].count()
for effect in effects:
    all_count += master_df[(master_df["normalized_so_vep"] == effect) & 
                           (master_df["normalized_so_snpeff"] == effect) &
                           (master_df["normalized_so_annovar"] == effect)].count()
num_matching = all_count["POS"]
num_total = total_count["POS"]
print num_matching
print num_total
print "Percent matching: " + str(100.0*num_matching/num_total)                                  
140859
287071
Percent matching: 49.0676522533
61688
66418
Percent matching: 92.8784365684
In [49]:
for effect in master_df["normalized_so_snpeff"].unique():
    vep_effect = master_df[master_df["normalized_so_vep"] == effect]
    annovar_effect = master_df[master_df["normalized_so_annovar"] == effect]
    snpeff_effect = master_df[master_df["normalized_so_snpeff"] == effect]
    fig = plt.figure(figsize=(10,10), dpi=300)
    fig.suptitle(effect, fontsize=14, fontweight='bold')
    v = venn3_unweighted([set(vep_effect.index.values), set(snpeff_effect.index.values), set(annovar_effect.index.values)], set_labels=("VEP", "SNPeff", "Annovar"))
    plt.plot(fontsize=24)
In [50]:
sampletables = '<h1>Other algo\'s agree, but...</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
    sampletables += "<h2> Annovar doesn't match for <em>" + str(effect) + "</em></h2>"
    query = master_df.loc[(master_df["normalized_so_annovar"]!=effect) & 
                                  (master_df["normalized_so_snpeff"]==effect) & 
                                  (master_df["normalized_so_vep"]==effect)]
    num_rows = query.count()[0]
    if num_rows > 0:
        sampletables += query.head(5).to_html()
    sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Out[50]:

Other algo's agree, but...

Annovar doesn't match for intergenic_variant

0 rows

Annovar doesn't match for upstream_gene_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
1403 ENSG00000001626 117105737 C A . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN
1404 ENSG00000001626 117105737 C CA . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN
1405 ENSG00000001626 117105737 C CAG . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN
1406 ENSG00000001626 117105737 C CC . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN
1407 ENSG00000001626 117105737 C CCTG . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN

14148 rows

Annovar doesn't match for ignored

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
2814 ENSG00000001626 117105837 TT T . EXON CFTR ENST00000546407 ignored . non_coding_exon_variant ENST00000546407.1:n.1delT ignored NaN NaN NaN
2815 ENSG00000001626 117105837 TTG T . EXON CFTR ENST00000546407 ignored . non_coding_exon_variant ENST00000546407.1:n.1_2delTG ignored NaN NaN NaN
2816 ENSG00000001626 117105837 TTGA T . EXON CFTR ENST00000546407 ignored . non_coding_exon_variant ENST00000546407.1:n.1_3delTGA ignored NaN NaN NaN
2817 ENSG00000001626 117105838 T A . EXON CFTR ENST00000546407 ignored . non_coding_exon_variant ENST00000546407.1:n.1T>A ignored NaN NaN NaN
2818 ENSG00000001626 117105838 T C . EXON CFTR ENST00000546407 ignored . non_coding_exon_variant ENST00000546407.1:n.1T>C ignored NaN NaN NaN

661 rows

Annovar doesn't match for splicing_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
3460 ENSG00000001626 117105883 AAGG A . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_donor_variant ENST00000546407.1:n.47_48+1delAGG splicing_variant NaN NaN NaN
3473 ENSG00000001626 117105884 AGG A . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_donor_variant ENST00000546407.1:n.48_48+1delGG splicing_variant NaN NaN NaN
3474 ENSG00000001626 117105884 AGGT A . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_donor_variant ENST00000546407.1:n.48_48+2delGGT splicing_variant NaN NaN NaN
3486 ENSG00000001626 117105885 GG G . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_donor_variant ENST00000546407.1:n.48+1delG splicing_variant NaN NaN NaN
3487 ENSG00000001626 117105885 GGT G . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_donor_variant ENST00000546407.1:n.48+1_48+2delGT splicing_variant NaN NaN NaN

486 rows

Annovar doesn't match for intron_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
3506 ENSG00000001626 117105887 T TA . INTRON CFTR ENST00000546407 n.48+3*>+A intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insA intron_variant NaN NaN NaN
3507 ENSG00000001626 117105887 T TAC . INTRON CFTR ENST00000546407 n.48+3*>+AC intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insAC intron_variant NaN NaN NaN
3508 ENSG00000001626 117105887 T TATC . INTRON CFTR ENST00000546407 n.48+3*>+ATC intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insATC intron_variant NaN NaN NaN
3509 ENSG00000001626 117105887 T TC . INTRON CFTR ENST00000546407 n.48+3*>+C intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insC intron_variant NaN NaN NaN
3510 ENSG00000001626 117105887 T TCTA . INTRON CFTR ENST00000546407 n.48+3*>+CTA intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insCTA intron_variant NaN NaN NaN

66208 rows

Annovar doesn't match for 5_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
12941 ENSG00000001626 117119515 G GA . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insA 5_prime_UTR_variant splicing_variant splicing NaN
12942 ENSG00000001626 117119515 G GAT . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insAT 5_prime_UTR_variant splicing_variant splicing NaN
12943 ENSG00000001626 117119515 G GC . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insC 5_prime_UTR_variant splicing_variant splicing NaN
12944 ENSG00000001626 117119515 G GCAT . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insCAT 5_prime_UTR_variant splicing_variant splicing NaN
12945 ENSG00000001626 117119515 G GCGA . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insCGA 5_prime_UTR_variant splicing_variant splicing NaN

8 rows

Annovar doesn't match for frameshift_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
24393 ENSG00000001626 117144306 G GA . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+A frameshift_variant . frameshift_variant ENSP00000389119.1:p.Ser18ArgfsTer27ENST0000042... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->A,ENST0000000308...
24394 ENSG00000001626 117144306 G GAG . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+AG frameshift_variant . frameshift_variant ENSP00000389119.1:p.Ser18ArgfsTer8ENST00000426... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->AG,ENST000000030...
24395 ENSG00000001626 117144306 G GAT . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+AT frameshift_variant . frameshift_variant ENSP00000389119.1:p.Ser18ArgfsTer8ENST00000426... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->AT,ENST000000030...
24396 ENSG00000001626 117144306 G GC . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+C frameshift_variant . frameshift_variant ENSP00000389119.1:p.Trp19LeufsTer26ENST0000042... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->C,ENST0000000308...
24399 ENSG00000001626 117144306 G GG . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+G frameshift_variant . frameshift_variant ENSP00000389119.1:p.Ser18ArgfsTer27ENST0000042... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->G,ENST0000000308...

1337 rows

Annovar doesn't match for inframe_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20862 ENSG00000001626 117120148 CATG C . CODON_DELETION CFTR ENST00000454343 p.Met1X/c.1*>-ATG inframe_variant . inframe_deletion ENSP00000389119.1:p.Met1?ENST00000426809.1:c.1... inframe_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
24397 ENSG00000001626 117144306 G GCGA . CODON_INSERTION CFTR ENST00000454343 p.Ser18X/c.54*>+CGA inframe_variant . inframe_insertion ENSP00000389119.1:p.Ser18_Trp19insAspENST00000... inframe_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->CGA,ENST00000003...
24398 ENSG00000001626 117144306 G GCTA . CODON_INSERTION CFTR ENST00000454343 p.Ser18X/c.54*>+CTA inframe_variant . inframe_insertion ENSP00000389119.1:p.Ser18_Trp19insTyrENST00000... inframe_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->CTA,ENST00000003...
28750 ENSG00000001626 117149087 G GCGA . CODON_CHANGE_PLUS_CODON_INSERTION CFTR ENST00000454343 p.Arg55X/c.165*>+CGA inframe_variant . inframe_insertion ENSP00000389119.1:p.Arg55delinsSerGluENST00000... inframe_variant splicing_variant splicing ENST00000454343:exon3:c.165-1->CGA,ENST0000000...
28754 ENSG00000001626 117149087 G GTAC . CODON_CHANGE_PLUS_CODON_INSERTION CFTR ENST00000454343 p.Arg55X/c.165*>+TAC inframe_variant . inframe_insertion ENSP00000389119.1:p.Arg55delinsSerThrENST00000... inframe_variant splicing_variant splicing ENST00000454343:exon3:c.165-1->TAC,ENST0000000...

422 rows

Annovar doesn't match for nonsynonymous_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136292 ENSG00000001626 117267828 A G . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Gly/c.544A>G nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183GlyENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN
136297 ENSG00000001626 117267829 G A . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Lys/c.545G>A nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183LysENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN
136298 ENSG00000001626 117267829 G C . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Thr/c.545G>C nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183ThrENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN
136307 ENSG00000001626 117267829 G T . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Ile/c.545G>T nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183IleENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN
136319 ENSG00000001626 117267830 A C . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Ser/c.546A>C nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183SerENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN

273 rows

Annovar doesn't match for stop_gained

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
21380 ENSG00000001626 117120185 TCCA T . STOP_GAINED CFTR ENST00000454343 p.X13*/c.38*>-CCA stop_gained . stop_gained ENSP00000389119.1:p.Ser13_Lys14delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon1:c.38_40d...
24810 ENSG00000001626 117144335 TACA T . STOP_GAINED CFTR ENST00000454343 p.X28*/c.83*>-ACA stop_gained . stop_gained ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon2:c.83_85d...
24824 ENSG00000001626 117144336 ACAG A . STOP_GAINED CFTR ENST00000454343 p.X28*/c.84*>-CAG stop_gained . stop_gained ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon2:c.84_86d...
25230 ENSG00000001626 117144365 TACC T . STOP_GAINED CFTR ENST00000454343 p.X38*/c.113*>-ACC stop_gained . stop_gained ENSP00000389119.1:p.Tyr38_Gln39delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon2:c.113_11...
25244 ENSG00000001626 117144366 ACCA A . STOP_GAINED CFTR ENST00000454343 p.X38*/c.114*>-CCA stop_gained . stop_gained ENSP00000389119.1:p.Tyr38_Gln39delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon2:c.114_11...

145 rows

Annovar doesn't match for synonymous_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136269 ENSG00000001626 117267827 G A . SYNONYMOUS_CODING CFTR ENST00000468795 p.Val181Val/c.543G>A synonymous_variant . synonymous_variant ENST00000468795.1:c.545G>A(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN
136270 ENSG00000001626 117267827 G C . SYNONYMOUS_CODING CFTR ENST00000468795 p.Val181Val/c.543G>C synonymous_variant . synonymous_variant ENST00000468795.1:c.545G>C(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN
136279 ENSG00000001626 117267827 G T . SYNONYMOUS_CODING CFTR ENST00000468795 p.Val181Val/c.543G>T synonymous_variant . synonymous_variant ENST00000468795.1:c.545G>T(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN
136291 ENSG00000001626 117267828 A C . SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Arg/c.544A>C synonymous_variant . synonymous_variant ENST00000468795.1:c.546A>C(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN
136320 ENSG00000001626 117267830 A G . SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Arg/c.546A>G synonymous_variant . synonymous_variant ENST00000468795.1:c.548A>G(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN

85 rows

Annovar doesn't match for downstream_gene_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
25976 ENSG00000001626 117144419 T TA . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->A,ENST000000030...
25977 ENSG00000001626 117144419 T TATG . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->ATG,ENST0000000...
25978 ENSG00000001626 117144419 T TC . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->C,ENST000000030...
25979 ENSG00000001626 117144419 T TCGA . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->CGA,ENST0000000...
25980 ENSG00000001626 117144419 T TG . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->G,ENST000000030...

41993 rows

Annovar doesn't match for stop_lost

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136604 ENSG00000001626 117267850 GTTA G . STOP_LOST CFTR ENST00000468795 p.X189Trpext*?/c.567*>-TTA stop_lost . stop_lost ENSP00000419254.1:p.CysTer190TrpENST0000046879... stop_lost ignored unknown UNKNOWN
136618 ENSG00000001626 117267851 TTAG T . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>-TAG stop_lost . stop_lost ENSP00000419254.1:p.Ter191delextTer5ENST000004... stop_lost ignored unknown UNKNOWN
136619 ENSG00000001626 117267852 T A . STOP_LOST CFTR ENST00000468795 p.*190Lysext*?/c.568T>A stop_lost . stop_lost ENSP00000419254.1:p.Ter191LysextTer6ENST000004... stop_lost ignored unknown UNKNOWN
136620 ENSG00000001626 117267852 T C . STOP_LOST CFTR ENST00000468795 p.*190Glnext*?/c.568T>C stop_lost . stop_lost ENSP00000419254.1:p.Ter191GlnextTer6ENST000004... stop_lost ignored unknown UNKNOWN
136621 ENSG00000001626 117267852 T G . STOP_LOST CFTR ENST00000468795 p.*190Gluext*?/c.568T>G stop_lost . stop_lost ENSP00000419254.1:p.Ter191GluextTer6ENST000004... stop_lost ignored unknown UNKNOWN

42 rows

Annovar doesn't match for 3_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136649 ENSG00000001626 117267854 G GA . UTR_3_PRIME CFTR ENST00000468795 3_prime_UTR_variant . 3_prime_UTR_variant ENST00000468795.1:c.*1_572insA 3_prime_UTR_variant ignored unknown UNKNOWN
136650 ENSG00000001626 117267854 G GACG . UTR_3_PRIME CFTR ENST00000468795 3_prime_UTR_variant . 3_prime_UTR_variant ENST00000468795.1:c.*1_572insACG 3_prime_UTR_variant ignored unknown UNKNOWN
136651 ENSG00000001626 117267854 G GC . UTR_3_PRIME CFTR ENST00000468795 3_prime_UTR_variant . 3_prime_UTR_variant ENST00000468795.1:c.*1_572insC 3_prime_UTR_variant ignored unknown UNKNOWN
136652 ENSG00000001626 117267854 G GCG . UTR_3_PRIME CFTR ENST00000468795 3_prime_UTR_variant . 3_prime_UTR_variant ENST00000468795.1:c.*1_572insCG 3_prime_UTR_variant ignored unknown UNKNOWN
136653 ENSG00000001626 117267854 G GG . UTR_3_PRIME CFTR ENST00000468795 3_prime_UTR_variant . 3_prime_UTR_variant ENST00000468795.1:c.572dupG 3_prime_UTR_variant ignored unknown UNKNOWN

1019 rows

Annovar doesn't match for nan

0 rows

In [51]:
sampletables = '<h1>At least 1 column doesn\'t match</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
    sampletables += "<h2> Annovar doesn't match for <em>" + str(effect) + "</em></h2>"
    query = master_df.loc[(master_df["normalized_so_annovar"]!=effect) & 
                                  ((master_df["normalized_so_snpeff"]==effect) | (master_df["normalized_so_vep"]==effect))]
    num_rows = query.count()[0]
    if num_rows > 0:
        sampletables += query.head(5).to_html()
    sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Out[51]:

At least 1 column doesn't match

Annovar doesn't match for intergenic_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
0 117105737 C A . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN
1 117105737 C CA . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN
2 117105737 C CAG . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN
3 117105737 C CC . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN
4 117105737 C CCTG . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN

1403 rows

Annovar doesn't match for upstream_gene_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
1403 ENSG00000001626 117105737 C A . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN
1404 ENSG00000001626 117105737 C CA . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN
1405 ENSG00000001626 117105737 C CAG . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN
1406 ENSG00000001626 117105737 C CC . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN
1407 ENSG00000001626 117105737 C CCTG . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . upstream_gene_variant upstream_gene_variant NaN NaN NaN

14170 rows

Annovar doesn't match for ignored

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
2788 ENSG00000001626 117105835 TTTT T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
2801 ENSG00000001626 117105836 TTT T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
2802 ENSG00000001626 117105836 TTTG T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
2806 ENSG00000001626 117105837 T TA . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2807 ENSG00000001626 117105837 T TAC . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN

680 rows

Annovar doesn't match for splicing_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
3460 ENSG00000001626 117105883 AAGG A . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_donor_variant ENST00000546407.1:n.47_48+1delAGG splicing_variant NaN NaN NaN
3473 ENSG00000001626 117105884 AGG A . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_donor_variant ENST00000546407.1:n.48_48+1delGG splicing_variant NaN NaN NaN
3474 ENSG00000001626 117105884 AGGT A . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_donor_variant ENST00000546407.1:n.48_48+2delGGT splicing_variant NaN NaN NaN
3477 ENSG00000001626 117105885 G GA . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48_48+1insA ignored NaN NaN NaN
3478 ENSG00000001626 117105885 G GC . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48_48+1insC ignored NaN NaN NaN

1038 rows

Annovar doesn't match for intron_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
3506 ENSG00000001626 117105887 T TA . INTRON CFTR ENST00000546407 n.48+3*>+A intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insA intron_variant NaN NaN NaN
3507 ENSG00000001626 117105887 T TAC . INTRON CFTR ENST00000546407 n.48+3*>+AC intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insAC intron_variant NaN NaN NaN
3508 ENSG00000001626 117105887 T TATC . INTRON CFTR ENST00000546407 n.48+3*>+ATC intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insATC intron_variant NaN NaN NaN
3509 ENSG00000001626 117105887 T TC . INTRON CFTR ENST00000546407 n.48+3*>+C intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insC intron_variant NaN NaN NaN
3510 ENSG00000001626 117105887 T TCTA . INTRON CFTR ENST00000546407 n.48+3*>+CTA intron_variant . intron_variant ENST00000546407.1:n.48+2_48+3insCTA intron_variant NaN NaN NaN

66288 rows

Annovar doesn't match for 5_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
10730 ENSG00000001626 117119357 T TA . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insA intron_variant upstream_gene_variant upstream NaN
10731 ENSG00000001626 117119357 T TAG . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insAG intron_variant upstream_gene_variant upstream NaN
10732 ENSG00000001626 117119357 T TC . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insC intron_variant upstream_gene_variant upstream NaN
10733 ENSG00000001626 117119357 T TG . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insG intron_variant upstream_gene_variant upstream NaN
10734 ENSG00000001626 117119357 T TGC . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insGC intron_variant upstream_gene_variant upstream NaN

27 rows

Annovar doesn't match for frameshift_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20850 ENSG00000001626 117120148 C CA . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+A frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insA 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20852 ENSG00000001626 117120148 C CC . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+C frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1dupC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20853 ENSG00000001626 117120148 C CG . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+G frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insG 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20854 ENSG00000001626 117120148 C CGC . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+GC frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insGC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20855 ENSG00000001626 117120148 C CT . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+T frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insT 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN

1823 rows

Annovar doesn't match for inframe_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20851 ENSG00000001626 117120148 C CACT . CODON_INSERTION CFTR ENST00000454343 p.Met1X/c.1*>+ACT inframe_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insACT 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20857 ENSG00000001626 117120148 C CTGC . CODON_INSERTION CFTR ENST00000454343 p.Met1X/c.1*>+TGC inframe_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insTGC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20862 ENSG00000001626 117120148 CATG C . CODON_DELETION CFTR ENST00000454343 p.Met1X/c.1*>-ATG inframe_variant . inframe_deletion ENSP00000389119.1:p.Met1?ENST00000426809.1:c.1... inframe_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
24397 ENSG00000001626 117144306 G GCGA . CODON_INSERTION CFTR ENST00000454343 p.Ser18X/c.54*>+CGA inframe_variant . inframe_insertion ENSP00000389119.1:p.Ser18_Trp19insAspENST00000... inframe_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->CGA,ENST00000003...
24398 ENSG00000001626 117144306 G GCTA . CODON_INSERTION CFTR ENST00000454343 p.Ser18X/c.54*>+CTA inframe_variant . inframe_insertion ENSP00000389119.1:p.Ser18_Trp19insTyrENST00000... inframe_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->CTA,ENST00000003...

424 rows

Annovar doesn't match for nonsynonymous_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136292 ENSG00000001626 117267828 A G . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Gly/c.544A>G nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183GlyENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN
136297 ENSG00000001626 117267829 G A . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Lys/c.545G>A nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183LysENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN
136298 ENSG00000001626 117267829 G C . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Thr/c.545G>C nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183ThrENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN
136307 ENSG00000001626 117267829 G T . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Ile/c.545G>T nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183IleENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN
136319 ENSG00000001626 117267830 A C . NON_SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Ser/c.546A>C nonsynonymous_variant . missense_variant ENSP00000419254.1:p.Arg183SerENST00000468795.1... nonsynonymous_variant ignored unknown UNKNOWN

277 rows

Annovar doesn't match for stop_gained

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20939 ENSG00000001626 117120154 G GT . STOP_GAINED CFTR ENST00000454343 p.Arg3*/c.7*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.6_7ins...
20940 ENSG00000001626 117120154 G GTA . STOP_GAINED CFTR ENST00000454343 p.Arg3*/c.7*>+TA stop_gained . frameshift_variant ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.6_7ins...
21109 ENSG00000001626 117120166 G GT . STOP_GAINED CFTR ENST00000454343 p.Glu7*/c.19*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Glu7TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.18_19i...
21148 ENSG00000001626 117120169 A AT . STOP_GAINED CFTR ENST00000454343 p.Lys8*/c.22*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.21_22i...
21149 ENSG00000001626 117120169 A ATA . STOP_GAINED CFTR ENST00000454343 p.Lys8*/c.22*>+TA stop_gained . frameshift_variant ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.21_22i...

797 rows

Annovar doesn't match for synonymous_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136269 ENSG00000001626 117267827 G A . SYNONYMOUS_CODING CFTR ENST00000468795 p.Val181Val/c.543G>A synonymous_variant . synonymous_variant ENST00000468795.1:c.545G>A(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN
136270 ENSG00000001626 117267827 G C . SYNONYMOUS_CODING CFTR ENST00000468795 p.Val181Val/c.543G>C synonymous_variant . synonymous_variant ENST00000468795.1:c.545G>C(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN
136279 ENSG00000001626 117267827 G T . SYNONYMOUS_CODING CFTR ENST00000468795 p.Val181Val/c.543G>T synonymous_variant . synonymous_variant ENST00000468795.1:c.545G>T(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN
136291 ENSG00000001626 117267828 A C . SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Arg/c.544A>C synonymous_variant . synonymous_variant ENST00000468795.1:c.546A>C(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN
136320 ENSG00000001626 117267830 A G . SYNONYMOUS_CODING CFTR ENST00000468795 p.Arg182Arg/c.546A>G synonymous_variant . synonymous_variant ENST00000468795.1:c.548A>G(p.%3D)ENST000004687... synonymous_variant ignored unknown UNKNOWN

88 rows

Annovar doesn't match for downstream_gene_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
25976 ENSG00000001626 117144419 T TA . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->A,ENST000000030...
25977 ENSG00000001626 117144419 T TATG . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->ATG,ENST0000000...
25978 ENSG00000001626 117144419 T TC . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->C,ENST000000030...
25979 ENSG00000001626 117144419 T TCGA . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->CGA,ENST0000000...
25980 ENSG00000001626 117144419 T TG . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->G,ENST000000030...

42041 rows

Annovar doesn't match for stop_lost

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136603 ENSG00000001626 117267850 GTT G . FRAME_SHIFT CFTR ENST00000468795 p.X189X/c.567*>-TT frameshift_variant . stop_lost ENSP00000419254.1:p.Cys190TerENST00000468795.1... stop_lost ignored unknown UNKNOWN
136604 ENSG00000001626 117267850 GTTA G . STOP_LOST CFTR ENST00000468795 p.X189Trpext*?/c.567*>-TTA stop_lost . stop_lost ENSP00000419254.1:p.CysTer190TrpENST0000046879... stop_lost ignored unknown UNKNOWN
136608 ENSG00000001626 117267851 T TA . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>+A stop_lost . frameshift_variant ENSP00000419254.1:p.Ter191IlefsTer6ENST0000046... frameshift_variant ignored unknown UNKNOWN
136609 ENSG00000001626 117267851 T TC . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>+C stop_lost . frameshift_variant ENSP00000419254.1:p.Ter191LeufsTer6ENST0000046... frameshift_variant ignored unknown UNKNOWN
136610 ENSG00000001626 117267851 T TCT . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>+CT stop_lost . frameshift_variant ENSP00000419254.1:p.Ter191LeufsTer11ENST000004... frameshift_variant ignored unknown UNKNOWN

98 rows

Annovar doesn't match for 3_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136632 ENSG00000001626 117267852 TAGA T . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.569*>-AGA stop_lost . 3_prime_UTR_variant ENST00000468795.1:c.*1_571delAGA 3_prime_UTR_variant ignored unknown UNKNOWN
136645 ENSG00000001626 117267853 AGA A . FRAME_SHIFT CFTR ENST00000468795 p.X190X/c.570*>-GA frameshift_variant . 3_prime_UTR_variant ENST00000468795.1:c.*1_572delGA 3_prime_UTR_variant ignored unknown UNKNOWN
136646 ENSG00000001626 117267853 AGAC A . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.570*>-GAC stop_lost . 3_prime_UTR_variant ENST00000468795.1:c.*2_572delGAC 3_prime_UTR_variant ignored unknown UNKNOWN
136649 ENSG00000001626 117267854 G GA . UTR_3_PRIME CFTR ENST00000468795 3_prime_UTR_variant . 3_prime_UTR_variant ENST00000468795.1:c.*1_572insA 3_prime_UTR_variant ignored unknown UNKNOWN
136650 ENSG00000001626 117267854 G GACG . UTR_3_PRIME CFTR ENST00000468795 3_prime_UTR_variant . 3_prime_UTR_variant ENST00000468795.1:c.*1_572insACG 3_prime_UTR_variant ignored unknown UNKNOWN

1036 rows

Annovar doesn't match for nan

0 rows

In [10]:
sampletables =''
for effect in master_df["normalized_so_snpeff"].unique():
    sampletables += "<h2> Snpeff doesn't match for <em>" + str(effect) + "</em></h2>"
    query = master_df.loc[(master_df["normalized_so_annovar"]==effect) & 
                                  (master_df["normalized_so_snpeff"]!=effect) & 
                                  (master_df["normalized_so_vep"]==effect)]
    num_rows = query.count()[0]
    if num_rows > 0:
        sampletables += query.tail(5).to_html()
    sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Out[10]:

Snpeff doesn't match for intergenic_variant

0 rows

Snpeff doesn't match for upstream_gene_variant

0 rows

Snpeff doesn't match for ignored

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
259692 ENSG00000232661 117204728 GAAC G . UPSTREAM AC000111.3 ENST00000441019 upstream_gene_variant . non_coding_exon_variant ignored ignored ncRNA_exonic NaN
259705 ENSG00000232661 117204729 AAC A . UPSTREAM AC000111.3 ENST00000441019 upstream_gene_variant . non_coding_exon_variant ignored ignored ncRNA_exonic NaN
259706 ENSG00000232661 117204729 AACT A . UPSTREAM AC000111.3 ENST00000441019 upstream_gene_variant . non_coding_exon_variant ignored ignored ncRNA_exonic NaN

3 rows

Snpeff doesn't match for splicing_variant

0 rows

Snpeff doesn't match for intron_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
187471 ENSG00000001626 117355809 C CCG . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insCG intron_variant intron_variant intronic NaN
187472 ENSG00000001626 117355809 C CG . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insG intron_variant intron_variant intronic NaN
187473 ENSG00000001626 117355809 C CGCA . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insGCA intron_variant intron_variant intronic NaN
187474 ENSG00000001626 117355809 C CGT . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insGT intron_variant intron_variant intronic NaN
187475 ENSG00000001626 117355809 C CT . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insT intron_variant intron_variant intronic NaN

208 rows

Snpeff doesn't match for 5_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20853 ENSG00000001626 117120148 C CG . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+G frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insG 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20854 ENSG00000001626 117120148 C CGC . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+GC frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insGC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20855 ENSG00000001626 117120148 C CT . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+T frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insT 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20856 ENSG00000001626 117120148 C CTG . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+TG frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insTG 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20857 ENSG00000001626 117120148 C CTGC . CODON_INSERTION CFTR ENST00000454343 p.Met1X/c.1*>+TGC inframe_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insTGC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN

24 rows

Snpeff doesn't match for frameshift_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
160354 ENSG00000001626 117307159 T TC . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+C stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420LeufsTer74ENST00000... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257_...
160356 ENSG00000001626 117307159 T TG . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+G stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420ValfsTer74ENST00000... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257_...
160357 ENSG00000001626 117307159 T TGA . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+GA stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420AspfsTer8ENST000004... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257_...
160358 ENSG00000001626 117307159 T TT . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+T stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420LeufsTer74ENST00000... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257d...
160359 ENSG00000001626 117307159 T TTC . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+TC stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420SerfsTer8ENST000004... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257_...

776 rows

Snpeff doesn't match for inframe_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
150498 ENSG00000001626 117304914 T TCGA . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . inframe_insertion ENSP00000389119.1:p.Val1349_Thr1350insGluENST0... inframe_variant inframe_variant nonframeshift insertion ENSG00000001626:ENST00000426809:exon24:c.4046_...
154782 ENSG00000001626 117305618 G GCGT . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . inframe_insertion ENSP00000389119.1:p.Leu1384_Val1385insArgENST0... inframe_variant inframe_variant nonframeshift insertion ENSG00000001626:ENST00000426809:exon25:c.4152_...
154785 ENSG00000001626 117305618 G GGTA . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . inframe_insertion ENSP00000389119.1:p.Leu1384_Val1385insValENST0... inframe_variant inframe_variant nonframeshift insertion ENSG00000001626:ENST00000426809:exon25:c.4152_...
159858 ENSG00000001626 117307123 AGAG A . FRAME_SHIFT CFTR ENST00000426809 p.X1439X/c.4315*>-GAG frameshift_variant . inframe_deletion ENSP00000403677.1:p.Glu1408delENST00000454343.... inframe_variant inframe_variant nonframeshift deletion ENSG00000001626:ENST00000454343:exon26:c.4222_...
159872 ENSG00000001626 117307124 GAGG G . FRAME_SHIFT CFTR ENST00000426809 p.X1439X/c.4316*>-AGG frameshift_variant . inframe_deletion ENSP00000403677.1:p.Glu1409delENST00000454343.... inframe_variant inframe_variant nonframeshift deletion ENSG00000001626:ENST00000454343:exon26:c.4223_...

53 rows

Snpeff doesn't match for nonsynonymous_variant

0 rows

Snpeff doesn't match for stop_gained

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
25948 ENSG00000001626 117144417 G GATG . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . stop_gained ENSP00000389119.1:p.Arg55_Glu56insTerENST00000... stop_gained stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.164_16...
49816 ENSG00000001626 117176727 A AATG . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . stop_gained ENSP00000389119.1:p.Gln260_Thr261insTerENST000... stop_gained stop_gained stopgain SNV ENSG00000001626:ENST00000426809:exon6:c.779_78...
150499 ENSG00000001626 117304914 T TCTA . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . stop_gained ENSP00000389119.1:p.Val1349_Thr1350insTerENST0... stop_gained stop_gained stopgain SNV ENSG00000001626:ENST00000426809:exon24:c.4046_...

3 rows

Snpeff doesn't match for synonymous_variant

0 rows

Snpeff doesn't match for downstream_gene_variant

0 rows

Snpeff doesn't match for stop_lost

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
160360 ENSG00000001626 117307159 TT T . FRAME_SHIFT CFTR ENST00000454343 p.X1420X/c.4258*>-T frameshift_variant . stop_lost ENSP00000403677.1:p.Ter1420ArgENST00000454343.... stop_lost stop_lost stoploss SNV ENSG00000001626:ENST00000454343:exon26:c.4258d...

1 rows

Snpeff doesn't match for 3_prime_UTR_variant

0 rows

Snpeff doesn't match for nan

0 rows

In [9]:
sampletables = '<h1>Other algo\'s agree, but...</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
    sampletables += "<h2> Snpeff doesn't match for <em>" + str(effect) + "</em></h2>"
    query = master_df.loc[(master_df["normalized_so_annovar"]==effect) & 
                                  (master_df["normalized_so_snpeff"]!=effect) & 
                                  (master_df["normalized_so_vep"]==effect)]
    num_rows = query.count()[0]
    if num_rows > 0:
        sampletables += query.tail(5).to_html()
    sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Out[9]:

Other algo's agree, but...

Snpeff doesn't match for intergenic_variant

0 rows

Snpeff doesn't match for upstream_gene_variant

0 rows

Snpeff doesn't match for ignored

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
259692 ENSG00000232661 117204728 GAAC G . UPSTREAM AC000111.3 ENST00000441019 upstream_gene_variant . non_coding_exon_variant ignored ignored ncRNA_exonic NaN
259705 ENSG00000232661 117204729 AAC A . UPSTREAM AC000111.3 ENST00000441019 upstream_gene_variant . non_coding_exon_variant ignored ignored ncRNA_exonic NaN
259706 ENSG00000232661 117204729 AACT A . UPSTREAM AC000111.3 ENST00000441019 upstream_gene_variant . non_coding_exon_variant ignored ignored ncRNA_exonic NaN

3 rows

Snpeff doesn't match for splicing_variant

0 rows

Snpeff doesn't match for intron_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
187471 ENSG00000001626 117355809 C CCG . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insCG intron_variant intron_variant intronic NaN
187472 ENSG00000001626 117355809 C CG . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insG intron_variant intron_variant intronic NaN
187473 ENSG00000001626 117355809 C CGCA . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insGCA intron_variant intron_variant intronic NaN
187474 ENSG00000001626 117355809 C CGT . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insGT intron_variant intron_variant intronic NaN
187475 ENSG00000001626 117355809 C CT . SPLICE_SITE_ACCEPTOR CFTR ENST00000600166 splicing_variant . intron_variant ENST00000610149.1:n.450-3_450-2insT intron_variant intron_variant intronic NaN

208 rows

Snpeff doesn't match for 5_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20853 ENSG00000001626 117120148 C CG . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+G frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insG 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20854 ENSG00000001626 117120148 C CGC . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+GC frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insGC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20855 ENSG00000001626 117120148 C CT . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+T frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insT 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20856 ENSG00000001626 117120148 C CTG . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+TG frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insTG 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20857 ENSG00000001626 117120148 C CTGC . CODON_INSERTION CFTR ENST00000454343 p.Met1X/c.1*>+TGC inframe_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insTGC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN

24 rows

Snpeff doesn't match for frameshift_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
160354 ENSG00000001626 117307159 T TC . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+C stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420LeufsTer74ENST00000... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257_...
160356 ENSG00000001626 117307159 T TG . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+G stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420ValfsTer74ENST00000... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257_...
160357 ENSG00000001626 117307159 T TGA . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+GA stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420AspfsTer8ENST000004... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257_...
160358 ENSG00000001626 117307159 T TT . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+T stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420LeufsTer74ENST00000... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257d...
160359 ENSG00000001626 117307159 T TTC . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4258*>+TC stop_lost . frameshift_variant ENSP00000403677.1:p.Ter1420SerfsTer8ENST000004... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000454343:exon26:c.4257_...

776 rows

Snpeff doesn't match for inframe_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
150498 ENSG00000001626 117304914 T TCGA . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . inframe_insertion ENSP00000389119.1:p.Val1349_Thr1350insGluENST0... inframe_variant inframe_variant nonframeshift insertion ENSG00000001626:ENST00000426809:exon24:c.4046_...
154782 ENSG00000001626 117305618 G GCGT . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . inframe_insertion ENSP00000389119.1:p.Leu1384_Val1385insArgENST0... inframe_variant inframe_variant nonframeshift insertion ENSG00000001626:ENST00000426809:exon25:c.4152_...
154785 ENSG00000001626 117305618 G GGTA . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . inframe_insertion ENSP00000389119.1:p.Leu1384_Val1385insValENST0... inframe_variant inframe_variant nonframeshift insertion ENSG00000001626:ENST00000426809:exon25:c.4152_...
159858 ENSG00000001626 117307123 AGAG A . FRAME_SHIFT CFTR ENST00000426809 p.X1439X/c.4315*>-GAG frameshift_variant . inframe_deletion ENSP00000403677.1:p.Glu1408delENST00000454343.... inframe_variant inframe_variant nonframeshift deletion ENSG00000001626:ENST00000454343:exon26:c.4222_...
159872 ENSG00000001626 117307124 GAGG G . FRAME_SHIFT CFTR ENST00000426809 p.X1439X/c.4316*>-AGG frameshift_variant . inframe_deletion ENSP00000403677.1:p.Glu1409delENST00000454343.... inframe_variant inframe_variant nonframeshift deletion ENSG00000001626:ENST00000454343:exon26:c.4223_...

53 rows

Snpeff doesn't match for nonsynonymous_variant

0 rows

Snpeff doesn't match for stop_gained

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
25948 ENSG00000001626 117144417 G GATG . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . stop_gained ENSP00000389119.1:p.Arg55_Glu56insTerENST00000... stop_gained stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.164_16...
49816 ENSG00000001626 117176727 A AATG . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . stop_gained ENSP00000389119.1:p.Gln260_Thr261insTerENST000... stop_gained stop_gained stopgain SNV ENSG00000001626:ENST00000426809:exon6:c.779_78...
150499 ENSG00000001626 117304914 T TCTA . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . stop_gained ENSP00000389119.1:p.Val1349_Thr1350insTerENST0... stop_gained stop_gained stopgain SNV ENSG00000001626:ENST00000426809:exon24:c.4046_...

3 rows

Snpeff doesn't match for synonymous_variant

0 rows

Snpeff doesn't match for downstream_gene_variant

0 rows

Snpeff doesn't match for stop_lost

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
160360 ENSG00000001626 117307159 TT T . FRAME_SHIFT CFTR ENST00000454343 p.X1420X/c.4258*>-T frameshift_variant . stop_lost ENSP00000403677.1:p.Ter1420ArgENST00000454343.... stop_lost stop_lost stoploss SNV ENSG00000001626:ENST00000454343:exon26:c.4258d...

1 rows

Snpeff doesn't match for 3_prime_UTR_variant

0 rows

Snpeff doesn't match for nan

0 rows

In [54]:
sampletables = '<h1>At least 1 column doesn\'t match</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
    sampletables += "<h2> Snpeff doesn't match for <em>" + str(effect) + "</em></h2>"
    query = num_rows = master_df.loc[(master_df["normalized_so_snpeff"]!=effect) & 
                                     ((master_df["normalized_so_annovar"]==effect) | (master_df["normalized_so_vep"]==effect))]
    num_rows = query.count()[0]
    if num_rows > 0:
        sampletables += query.head(5).to_html()
    sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Out[54]:

At least 1 column doesn't match

Snpeff doesn't match for intergenic_variant

0 rows

Snpeff doesn't match for upstream_gene_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
2806 ENSG00000001626 117105837 T TA . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2807 ENSG00000001626 117105837 T TAC . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2808 ENSG00000001626 117105837 T TC . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2809 ENSG00000001626 117105837 T TG . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2810 ENSG00000001626 117105837 T TGAT . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN

1424 rows

Snpeff doesn't match for ignored

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
2788 ENSG00000001626 117105835 TTTT T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
2801 ENSG00000001626 117105836 TTT T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
2802 ENSG00000001626 117105836 TTTG T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
3477 ENSG00000001626 117105885 G GA . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48_48+1insA ignored NaN NaN NaN
3478 ENSG00000001626 117105885 G GC . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48_48+1insC ignored NaN NaN NaN

17901 rows

Snpeff doesn't match for splicing_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
11346 ENSG00000001626 117119401 T TA . INTRON CFTR ENST00000446805 c.-424+3*>+A intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insA intron_variant splicing_variant splicing NaN
11347 ENSG00000001626 117119401 T TAT . INTRON CFTR ENST00000446805 c.-424+3*>+AT intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insAT intron_variant splicing_variant splicing NaN
11348 ENSG00000001626 117119401 T TC . INTRON CFTR ENST00000446805 c.-424+3*>+C intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insC intron_variant splicing_variant splicing NaN
11349 ENSG00000001626 117119401 T TCGT . INTRON CFTR ENST00000446805 c.-424+3*>+CGT intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insCGT intron_variant splicing_variant splicing NaN
11350 ENSG00000001626 117119401 T TG . INTRON CFTR ENST00000446805 c.-424+3*>+G intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insG intron_variant splicing_variant splicing NaN

489 rows

Snpeff doesn't match for intron_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
6250 ENSG00000001626 117115742 T TA . SPLICE_SITE_ACCEPTOR CFTR ENST00000546407 splicing_variant . intron_variant ENST00000546407.1:n.49-3_49-2insA intron_variant NaN NaN NaN
6251 ENSG00000001626 117115742 T TC . SPLICE_SITE_ACCEPTOR CFTR ENST00000546407 splicing_variant . intron_variant ENST00000546407.1:n.49-3_49-2insC intron_variant NaN NaN NaN
6252 ENSG00000001626 117115742 T TCA . SPLICE_SITE_ACCEPTOR CFTR ENST00000546407 splicing_variant . intron_variant ENST00000546407.1:n.49-3_49-2insCA intron_variant NaN NaN NaN
6253 ENSG00000001626 117115742 T TCAG . SPLICE_SITE_ACCEPTOR CFTR ENST00000546407 splicing_variant . intron_variant ENST00000546407.1:n.49-3_49-2insCAG intron_variant NaN NaN NaN
6254 ENSG00000001626 117115742 T TCAT . SPLICE_SITE_ACCEPTOR CFTR ENST00000546407 splicing_variant . intron_variant ENST00000546407.1:n.49-3_49-2insCAT intron_variant NaN NaN NaN

18177 rows

Snpeff doesn't match for 5_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
11300 ENSG00000001626 117119397 AAGG A . SPLICE_SITE_DONOR CFTR ENST00000446805 splicing_variant . splice_donor_variant ENST00000446805.1:c.-425_-424+1delAGG splicing_variant 5_prime_UTR_variant UTR5 NaN
11313 ENSG00000001626 117119398 AGG A . SPLICE_SITE_DONOR CFTR ENST00000446805 splicing_variant . splice_donor_variant ENST00000446805.1:c.-424_-424+1delGG splicing_variant 5_prime_UTR_variant UTR5 NaN
11314 ENSG00000001626 117119398 AGGT A . SPLICE_SITE_DONOR CFTR ENST00000446805 splicing_variant . splice_donor_variant ENST00000446805.1:c.-424_-424+2delGGT splicing_variant 5_prime_UTR_variant UTR5 NaN
11317 ENSG00000001626 117119399 G GA . SPLICE_SITE_DONOR CFTR ENST00000446805 splicing_variant . 5_prime_UTR_variant ENST00000446805.1:c.-424_-424+1insA 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
11318 ENSG00000001626 117119399 G GAT . SPLICE_SITE_DONOR CFTR ENST00000446805 splicing_variant . 5_prime_UTR_variant ENST00000446805.1:c.-424_-424+1insAT 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN

36 rows

Snpeff doesn't match for frameshift_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20862 ENSG00000001626 117120148 CATG C . CODON_DELETION CFTR ENST00000454343 p.Met1X/c.1*>-ATG inframe_variant . inframe_deletion ENSP00000389119.1:p.Met1?ENST00000426809.1:c.1... inframe_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
20939 ENSG00000001626 117120154 G GT . STOP_GAINED CFTR ENST00000454343 p.Arg3*/c.7*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.6_7ins...
20940 ENSG00000001626 117120154 G GTA . STOP_GAINED CFTR ENST00000454343 p.Arg3*/c.7*>+TA stop_gained . frameshift_variant ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.6_7ins...
21109 ENSG00000001626 117120166 G GT . STOP_GAINED CFTR ENST00000454343 p.Glu7*/c.19*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Glu7TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.18_19i...
21148 ENSG00000001626 117120169 A AT . STOP_GAINED CFTR ENST00000454343 p.Lys8*/c.22*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.21_22i...

1393 rows

Snpeff doesn't match for inframe_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
21380 ENSG00000001626 117120185 TCCA T . STOP_GAINED CFTR ENST00000454343 p.X13*/c.38*>-CCA stop_gained . stop_gained ENSP00000389119.1:p.Ser13_Lys14delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon1:c.38_40d...
21594 ENSG00000001626 117120201 G GACT . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . inframe_insertion ENSP00000389119.1:p.Ser18delinsArgLeuENST00000... inframe_variant inframe_variant nonframeshift insertion ENSG00000001626:ENST00000003084:exon1:c.53_54i...
21597 ENSG00000001626 117120201 G GCAG . SPLICE_SITE_DONOR CFTR ENST00000454343 splicing_variant . inframe_insertion ENSP00000389119.1:p.Ser18dupENST00000426809.1:... inframe_variant inframe_variant nonframeshift insertion ENSG00000001626:ENST00000003084:exon1:c.53_54i...
24810 ENSG00000001626 117144335 TACA T . STOP_GAINED CFTR ENST00000454343 p.X28*/c.83*>-ACA stop_gained . stop_gained ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon2:c.83_85d...
24824 ENSG00000001626 117144336 ACAG A . STOP_GAINED CFTR ENST00000454343 p.X28*/c.84*>-CAG stop_gained . stop_gained ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon2:c.84_86d...

175 rows

Snpeff doesn't match for nonsynonymous_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136647 ENSG00000001626 117267854 G A . SYNONYMOUS_STOP CFTR ENST00000468795 p.*190*/c.570G>A synonymous_variant . stop_retained_variant ENST00000468795.1:c.572G>A(p.%3D)ENST000004687... nonsynonymous_variant ignored unknown UNKNOWN
160391 ENSG00000001626 117307162 G A . SYNONYMOUS_STOP CFTR ENST00000454343 p.*1420*/c.4260G>A synonymous_variant . stop_retained_variant ENST00000454343.1:c.4260G>A(p.%3D)ENST00000454... nonsynonymous_variant synonymous_variant synonymous SNV ENSG00000001626:ENST00000454343:exon26:c.G4260...
188918 ENSG00000001626 117355912 A G . SYNONYMOUS_STOP CFTR ENST00000600166 p.*156*/c.467A>G synonymous_variant . stop_retained_variant ENST00000600166.1:c.469A>G(p.%3D)ENST000006001... nonsynonymous_variant ignored unknown UNKNOWN
188932 ENSG00000001626 117355913 A G . SYNONYMOUS_STOP CFTR ENST00000600166 p.*156*/c.468A>G synonymous_variant . stop_retained_variant ENST00000600166.1:c.470A>G(p.%3D)ENST000006001... nonsynonymous_variant ignored unknown UNKNOWN

4 rows

Snpeff doesn't match for stop_gained

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
24430 ENSG00000001626 117144308 TG T . FRAME_SHIFT CFTR ENST00000454343 p.X19X/c.56*>-G frameshift_variant . frameshift_variant ENSP00000389119.1:p.Trp19TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.56delG...
24444 ENSG00000001626 117144309 GG G . FRAME_SHIFT CFTR ENST00000454343 p.X19X/c.57*>-G frameshift_variant . frameshift_variant ENSP00000389119.1:p.Trp19TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.57delG...
24626 ENSG00000001626 117144322 TT T . FRAME_SHIFT CFTR ENST00000454343 p.X24X/c.70*>-T frameshift_variant . frameshift_variant ENSP00000389119.1:p.Leu24TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.70delT...
24640 ENSG00000001626 117144323 TT T . FRAME_SHIFT CFTR ENST00000454343 p.X24X/c.71*>-T frameshift_variant . frameshift_variant ENSP00000389119.1:p.Leu24TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.71delT...
24822 ENSG00000001626 117144336 AC A . FRAME_SHIFT CFTR ENST00000454343 p.X28X/c.84*>-C frameshift_variant . frameshift_variant ENSP00000389119.1:p.Tyr28TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.84delC...

214 rows

Snpeff doesn't match for synonymous_variant

0 rows

Snpeff doesn't match for downstream_gene_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
28718 ENSG00000001626 117149085 C CA . SPLICE_SITE_ACCEPTOR CFTR ENST00000454343 splicing_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN
28719 ENSG00000001626 117149085 C CAC . SPLICE_SITE_ACCEPTOR CFTR ENST00000454343 splicing_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN
28720 ENSG00000001626 117149085 C CATG . SPLICE_SITE_ACCEPTOR CFTR ENST00000454343 splicing_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN
28721 ENSG00000001626 117149085 C CC . SPLICE_SITE_ACCEPTOR CFTR ENST00000454343 splicing_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN
28722 ENSG00000001626 117149085 C CG . SPLICE_SITE_ACCEPTOR CFTR ENST00000454343 splicing_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN

48 rows

Snpeff doesn't match for stop_lost

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136603 ENSG00000001626 117267850 GTT G . FRAME_SHIFT CFTR ENST00000468795 p.X189X/c.567*>-TT frameshift_variant . stop_lost ENSP00000419254.1:p.Cys190TerENST00000468795.1... stop_lost ignored unknown UNKNOWN
136616 ENSG00000001626 117267851 TT T . FRAME_SHIFT CFTR ENST00000468795 p.X190X/c.568*>-T frameshift_variant . stop_lost ENSP00000419254.1:p.Ter191ArgENST00000468795.1... stop_lost ignored unknown UNKNOWN
136617 ENSG00000001626 117267851 TTA T . FRAME_SHIFT CFTR ENST00000468795 p.X190X/c.568*>-TA frameshift_variant . stop_lost ENSP00000419254.1:p.Ter191AspENST00000468795.1... stop_lost ignored unknown UNKNOWN
136622 ENSG00000001626 117267852 T TA . FRAME_SHIFT CFTR ENST00000468795 p.*190X/c.569*>+A frameshift_variant . stop_lost ENSP00000419254.1:p.Ter191TerENST00000468795.1... stop_lost ignored unknown UNKNOWN
136626 ENSG00000001626 117267852 T TG . FRAME_SHIFT CFTR ENST00000468795 p.*190X/c.569*>+G frameshift_variant . stop_lost ENSP00000419254.1:p.Ter191TerENST00000468795.1... stop_lost ignored unknown UNKNOWN

33 rows

Snpeff doesn't match for 3_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136632 ENSG00000001626 117267852 TAGA T . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.569*>-AGA stop_lost . 3_prime_UTR_variant ENST00000468795.1:c.*1_571delAGA 3_prime_UTR_variant ignored unknown UNKNOWN
136645 ENSG00000001626 117267853 AGA A . FRAME_SHIFT CFTR ENST00000468795 p.X190X/c.570*>-GA frameshift_variant . 3_prime_UTR_variant ENST00000468795.1:c.*1_572delGA 3_prime_UTR_variant ignored unknown UNKNOWN
136646 ENSG00000001626 117267853 AGAC A . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.570*>-GAC stop_lost . 3_prime_UTR_variant ENST00000468795.1:c.*2_572delGAC 3_prime_UTR_variant ignored unknown UNKNOWN
160376 ENSG00000001626 117307160 TAGA T . STOP_LOST CFTR ENST00000454343 p.*1420Xext*?/c.4259*>-AGA stop_lost . 3_prime_UTR_variant ENST00000454343.1:c.*1_4259delAGA 3_prime_UTR_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000454343:exon26:c.4259_...
160389 ENSG00000001626 117307161 AGA A . FRAME_SHIFT CFTR ENST00000454343 p.X1420X/c.4260*>-GA frameshift_variant . 3_prime_UTR_variant ENST00000454343.1:c.*1_4260delGA 3_prime_UTR_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000454343:exon26:c.4260_...

35 rows

Snpeff doesn't match for nan

0 rows

In [55]:
sampletables = '<h1>Other algo\'s agree, but...</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
    sampletables += "<h2> VEP doesn't match for <em>" + str(effect) + "</em></h2>"
    query = master_df.loc[(master_df["normalized_so_annovar"]==effect) & 
                                  (master_df["normalized_so_snpeff"]==effect) & 
                                  (master_df["normalized_so_vep"]!=effect)]
    num_rows = query.count()[0]
    if num_rows > 0:
        sampletables += query.head(5).to_html()
    sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Out[55]:

Other algo's agree, but...

VEP doesn't match for intergenic_variant

0 rows

VEP doesn't match for upstream_gene_variant

0 rows

VEP doesn't match for ignored

0 rows

VEP doesn't match for splicing_variant

0 rows

VEP doesn't match for intron_variant

0 rows

VEP doesn't match for 5_prime_UTR_variant

0 rows

VEP doesn't match for frameshift_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20834 ENSG00000001626 117120146 ACCA A . FRAME_SHIFT CFTR ENST00000426809 p.X1X/c.1*>-CCA frameshift_variant . 5_prime_UTR_variant 5_prime_UTR_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
20847 ENSG00000001626 117120147 CCA C . FRAME_SHIFT CFTR ENST00000454343 p.X1X/c.1*>-CA frameshift_variant . 5_prime_UTR_variant 5_prime_UTR_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
20848 ENSG00000001626 117120147 CCAT C . FRAME_SHIFT CFTR ENST00000426809 p.X1X/c.1*>-CAT frameshift_variant . 5_prime_UTR_variant 5_prime_UTR_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
160347 ENSG00000001626 117307158 TTT T . FRAME_SHIFT CFTR ENST00000454343 p.X1419X/c.4257*>-TT frameshift_variant . stop_lost ENSP00000403677.1:p.Ter1420GluENST00000454343.... stop_lost frameshift_variant frameshift deletion ENSG00000001626:ENST00000454343:exon26:c.4257_...
160361 ENSG00000001626 117307159 TTA T . FRAME_SHIFT CFTR ENST00000454343 p.X1420X/c.4258*>-TA frameshift_variant . stop_lost ENSP00000403677.1:p.Ter1420GluENST00000454343.... stop_lost frameshift_variant frameshift deletion ENSG00000001626:ENST00000454343:exon26:c.4258_...

12 rows

VEP doesn't match for inframe_variant

0 rows

VEP doesn't match for nonsynonymous_variant

0 rows

VEP doesn't match for stop_gained

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
24422 ENSG00000001626 117144308 T TA . STOP_GAINED CFTR ENST00000454343 p.Trp19*/c.56*>+A stop_gained . frameshift_variant ENSP00000389119.1:p.Trp19TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.55_56i...
24423 ENSG00000001626 117144308 T TAG . STOP_GAINED CFTR ENST00000454343 p.Trp19*/c.56*>+AG stop_gained . frameshift_variant ENSP00000389119.1:p.Trp19TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.55_56i...
24435 ENSG00000001626 117144309 G GA . STOP_GAINED CFTR ENST00000454343 p.Trp19*/c.57*>+A stop_gained . frameshift_variant ENSP00000389119.1:p.Trp19TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.56_57i...
24800 ENSG00000001626 117144335 T TA . STOP_GAINED CFTR ENST00000454343 p.Tyr28*/c.83*>+A stop_gained . frameshift_variant ENSP00000389119.1:p.Tyr28TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.83dupA...
24805 ENSG00000001626 117144335 T TG . STOP_GAINED CFTR ENST00000454343 p.Tyr28*/c.83*>+G stop_gained . frameshift_variant ENSP00000389119.1:p.Tyr28TerENST00000426809.1:... frameshift_variant stop_gained stopgain SNV ENSG00000001626:ENST00000003084:exon2:c.82_83i...

399 rows

VEP doesn't match for synonymous_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
160391 ENSG00000001626 117307162 G A . SYNONYMOUS_STOP CFTR ENST00000454343 p.*1420*/c.4260G>A synonymous_variant . stop_retained_variant ENST00000454343.1:c.4260G>A(p.%3D)ENST00000454... nonsynonymous_variant synonymous_variant synonymous SNV ENSG00000001626:ENST00000454343:exon26:c.G4260...

1 rows

VEP doesn't match for downstream_gene_variant

0 rows

VEP doesn't match for stop_lost

0 rows

VEP doesn't match for 3_prime_UTR_variant

0 rows

VEP doesn't match for nan

0 rows

In [56]:
sampletables = '<h1>At least 1 column doesn\'t match</h1>'
for effect in master_df["normalized_so_snpeff"].unique():
    sampletables += "<h2> VEP doesn't match for <em>" + str(effect) + "</em></h2>"
    query = master_df.loc[(master_df["normalized_so_vep"] != effect) &
                          ((master_df["normalized_so_annovar"]==effect) | (master_df["normalized_so_snpeff"]==effect))]
    num_rows = query.count()[0]
    if num_rows > 0:
        sampletables += query.head(5).to_html()
    sampletables += "<p>" + str(num_rows) + " rows</p>"
HTML(sampletables)
Out[56]:

At least 1 column doesn't match

VEP doesn't match for intergenic_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
0 117105737 C A . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN
1 117105737 C CA . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN
2 117105737 C CAG . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN
3 117105737 C CC . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN
4 117105737 C CCTG . INTERGENIC intergenic_variant NaN NaN NaN NaN NaN NaN NaN

1403 rows

VEP doesn't match for upstream_gene_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
2788 ENSG00000001626 117105835 TTTT T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
2801 ENSG00000001626 117105836 TTT T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
2802 ENSG00000001626 117105836 TTTG T . UPSTREAM CFTR ENST00000546407 upstream_gene_variant . non_coding_exon_variant ignored NaN NaN NaN
9327 ENSG00000001626 117119257 A AA . INTRON CFTR ENST00000546407 n.166+3396*>+A intron_variant . intron_variant ENST00000546407.1:n.166+3395dupA intron_variant upstream_gene_variant upstream NaN
9328 ENSG00000001626 117119257 A AAC . INTRON CFTR ENST00000546407 n.166+3396*>+AC intron_variant . intron_variant ENST00000546407.1:n.166+3395_166+3396insAC intron_variant upstream_gene_variant upstream NaN

1414 rows

VEP doesn't match for ignored

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
2806 ENSG00000001626 117105837 T TA . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2807 ENSG00000001626 117105837 T TAC . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2808 ENSG00000001626 117105837 T TC . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2809 ENSG00000001626 117105837 T TG . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN
2810 ENSG00000001626 117105837 T TGAT . EXON CFTR ENST00000546407 ignored . upstream_gene_variant upstream_gene_variant NaN NaN NaN

17895 rows

VEP doesn't match for splicing_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
3477 ENSG00000001626 117105885 G GA . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48_48+1insA ignored NaN NaN NaN
3478 ENSG00000001626 117105885 G GC . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48_48+1insC ignored NaN NaN NaN
3479 ENSG00000001626 117105885 G GCGA . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48_48+1insCGA ignored NaN NaN NaN
3480 ENSG00000001626 117105885 G GG . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48dupG ignored NaN NaN NaN
3481 ENSG00000001626 117105885 G GGA . SPLICE_SITE_DONOR CFTR ENST00000546407 splicing_variant . splice_region_variant ENST00000546407.1:n.48_48+1insGA ignored NaN NaN NaN

1041 rows

VEP doesn't match for intron_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
25984 ENSG00000001626 117144419 TA T . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN
25985 ENSG00000001626 117144419 TAT T . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN
25986 ENSG00000001626 117144419 TATG T . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN
25987 ENSG00000001626 117144420 A AA . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN
25988 ENSG00000001626 117144420 A AC . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant intron_variant intronic NaN

17889 rows

VEP doesn't match for 5_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
10730 ENSG00000001626 117119357 T TA . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insA intron_variant upstream_gene_variant upstream NaN
10731 ENSG00000001626 117119357 T TAG . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insAG intron_variant upstream_gene_variant upstream NaN
10732 ENSG00000001626 117119357 T TC . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insC intron_variant upstream_gene_variant upstream NaN
10733 ENSG00000001626 117119357 T TG . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insG intron_variant upstream_gene_variant upstream NaN
10734 ENSG00000001626 117119357 T TGC . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . intron_variant ENST00000546407.1:n.166+3495_166+3496insGC intron_variant upstream_gene_variant upstream NaN

25 rows

VEP doesn't match for frameshift_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20834 ENSG00000001626 117120146 ACCA A . FRAME_SHIFT CFTR ENST00000426809 p.X1X/c.1*>-CCA frameshift_variant . 5_prime_UTR_variant 5_prime_UTR_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
20847 ENSG00000001626 117120147 CCA C . FRAME_SHIFT CFTR ENST00000454343 p.X1X/c.1*>-CA frameshift_variant . 5_prime_UTR_variant 5_prime_UTR_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
20848 ENSG00000001626 117120147 CCAT C . FRAME_SHIFT CFTR ENST00000426809 p.X1X/c.1*>-CAT frameshift_variant . 5_prime_UTR_variant 5_prime_UTR_variant frameshift_variant frameshift deletion ENSG00000001626:ENST00000426809:wholegene,ENSG...
20850 ENSG00000001626 117120148 C CA . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+A frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insA 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20852 ENSG00000001626 117120148 C CC . FRAME_SHIFT CFTR ENST00000454343 p.Met1X/c.1*>+C frameshift_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1dupC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN

217 rows

VEP doesn't match for inframe_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20851 ENSG00000001626 117120148 C CACT . CODON_INSERTION CFTR ENST00000454343 p.Met1X/c.1*>+ACT inframe_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insACT 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
20857 ENSG00000001626 117120148 C CTGC . CODON_INSERTION CFTR ENST00000454343 p.Met1X/c.1*>+TGC inframe_variant . 5_prime_UTR_variant ENST00000454343.1:c.-1_1insTGC 5_prime_UTR_variant 5_prime_UTR_variant UTR5 NaN
21380 ENSG00000001626 117120185 TCCA T . STOP_GAINED CFTR ENST00000454343 p.X13*/c.38*>-CCA stop_gained . stop_gained ENSP00000389119.1:p.Ser13_Lys14delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon1:c.38_40d...
24810 ENSG00000001626 117144335 TACA T . STOP_GAINED CFTR ENST00000454343 p.X28*/c.83*>-ACA stop_gained . stop_gained ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon2:c.83_85d...
24824 ENSG00000001626 117144336 ACAG A . STOP_GAINED CFTR ENST00000454343 p.X28*/c.84*>-CAG stop_gained . stop_gained ENSP00000389119.1:p.Tyr28_Arg29delinsTerENST00... stop_gained inframe_variant nonframeshift deletion ENSG00000001626:ENST00000003084:exon2:c.84_86d...

124 rows

VEP doesn't match for nonsynonymous_variant

0 rows

VEP doesn't match for stop_gained

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
20939 ENSG00000001626 117120154 G GT . STOP_GAINED CFTR ENST00000454343 p.Arg3*/c.7*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.6_7ins...
20940 ENSG00000001626 117120154 G GTA . STOP_GAINED CFTR ENST00000454343 p.Arg3*/c.7*>+TA stop_gained . frameshift_variant ENSP00000389119.1:p.Arg3TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.6_7ins...
21109 ENSG00000001626 117120166 G GT . STOP_GAINED CFTR ENST00000454343 p.Glu7*/c.19*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Glu7TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.18_19i...
21148 ENSG00000001626 117120169 A AT . STOP_GAINED CFTR ENST00000454343 p.Lys8*/c.22*>+T stop_gained . frameshift_variant ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.21_22i...
21149 ENSG00000001626 117120169 A ATA . STOP_GAINED CFTR ENST00000454343 p.Lys8*/c.22*>+TA stop_gained . frameshift_variant ENSP00000389119.1:p.Lys8TerENST00000426809.1:c... frameshift_variant frameshift_variant frameshift insertion ENSG00000001626:ENST00000003084:exon1:c.21_22i...

1262 rows

VEP doesn't match for synonymous_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136647 ENSG00000001626 117267854 G A . SYNONYMOUS_STOP CFTR ENST00000468795 p.*190*/c.570G>A synonymous_variant . stop_retained_variant ENST00000468795.1:c.572G>A(p.%3D)ENST000004687... nonsynonymous_variant ignored unknown UNKNOWN
160391 ENSG00000001626 117307162 G A . SYNONYMOUS_STOP CFTR ENST00000454343 p.*1420*/c.4260G>A synonymous_variant . stop_retained_variant ENST00000454343.1:c.4260G>A(p.%3D)ENST00000454... nonsynonymous_variant synonymous_variant synonymous SNV ENSG00000001626:ENST00000454343:exon26:c.G4260...
188918 ENSG00000001626 117355912 A G . SYNONYMOUS_STOP CFTR ENST00000600166 p.*156*/c.467A>G synonymous_variant . stop_retained_variant ENST00000600166.1:c.469A>G(p.%3D)ENST000006001... nonsynonymous_variant ignored unknown UNKNOWN
188932 ENSG00000001626 117355913 A G . SYNONYMOUS_STOP CFTR ENST00000600166 p.*156*/c.468A>G synonymous_variant . stop_retained_variant ENST00000600166.1:c.470A>G(p.%3D)ENST000006001... nonsynonymous_variant ignored unknown UNKNOWN

4 rows

VEP doesn't match for downstream_gene_variant

0 rows

VEP doesn't match for stop_lost

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
136608 ENSG00000001626 117267851 T TA . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>+A stop_lost . frameshift_variant ENSP00000419254.1:p.Ter191IlefsTer6ENST0000046... frameshift_variant ignored unknown UNKNOWN
136609 ENSG00000001626 117267851 T TC . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>+C stop_lost . frameshift_variant ENSP00000419254.1:p.Ter191LeufsTer6ENST0000046... frameshift_variant ignored unknown UNKNOWN
136610 ENSG00000001626 117267851 T TCT . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>+CT stop_lost . frameshift_variant ENSP00000419254.1:p.Ter191LeufsTer11ENST000004... frameshift_variant ignored unknown UNKNOWN
136611 ENSG00000001626 117267851 T TG . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>+G stop_lost . frameshift_variant ENSP00000419254.1:p.Ter191ValfsTer6ENST0000046... frameshift_variant ignored unknown UNKNOWN
136612 ENSG00000001626 117267851 T TGA . STOP_LOST CFTR ENST00000468795 p.*190Xext*?/c.568*>+GA stop_lost . frameshift_variant ENSP00000419254.1:p.Ter191AspfsTer11ENST000004... frameshift_variant ignored unknown UNKNOWN

24 rows

VEP doesn't match for 3_prime_UTR_variant

Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
176140 ENSG00000001626 117350702 TGAT T . INTRON CFTR ENST00000600166 c.367-5109*>-GAT intron_variant . intron_variant ENST00000429014.1:n.210-5109_210-5107delGAT intron_variant 3_prime_UTR_variant UTR3 NaN
176153 ENSG00000001626 117350703 GAT G . INTRON CFTR ENST00000600166 c.367-5108*>-AT intron_variant . intron_variant ENST00000429014.1:n.210-5108_210-5107delAT intron_variant 3_prime_UTR_variant UTR3 NaN
176154 ENSG00000001626 117350703 GATT G . INTRON CFTR ENST00000600166 c.367-5108*>-ATT intron_variant . intron_variant ENST00000429014.1:n.210-5108_210-5106delATT intron_variant 3_prime_UTR_variant UTR3 NaN
176167 ENSG00000001626 117350704 ATT A . INTRON CFTR ENST00000600166 c.367-5107*>-TT intron_variant . intron_variant ENST00000429014.1:n.210-5107_210-5106delTT intron_variant 3_prime_UTR_variant UTR3 NaN
176168 ENSG00000001626 117350704 ATTT A . INTRON CFTR ENST00000600166 c.367-5107*>-TTT intron_variant . intron_variant ENST00000429014.1:n.210-5107_210-5105delTTT intron_variant 3_prime_UTR_variant UTR3 NaN

34 rows

VEP doesn't match for nan

0 rows

In [57]:
master_df.loc[(master_df["normalized_so_annovar"]=="splicing_variant") &
              (master_df["normalized_so_snpeff"]!="splicing_variant") &
              (master_df["normalized_so_vep"]!="splicing_variant")].head(50)
Out[57]:
Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
11346 ENSG00000001626 117119401 T TA . INTRON CFTR ENST00000446805 c.-424+3*>+A intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insA intron_variant splicing_variant splicing NaN
11347 ENSG00000001626 117119401 T TAT . INTRON CFTR ENST00000446805 c.-424+3*>+AT intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insAT intron_variant splicing_variant splicing NaN
11348 ENSG00000001626 117119401 T TC . INTRON CFTR ENST00000446805 c.-424+3*>+C intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insC intron_variant splicing_variant splicing NaN
11349 ENSG00000001626 117119401 T TCGT . INTRON CFTR ENST00000446805 c.-424+3*>+CGT intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insCGT intron_variant splicing_variant splicing NaN
11350 ENSG00000001626 117119401 T TG . INTRON CFTR ENST00000446805 c.-424+3*>+G intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insG intron_variant splicing_variant splicing NaN
11351 ENSG00000001626 117119401 T TGCT . INTRON CFTR ENST00000446805 c.-424+3*>+GCT intron_variant . intron_variant ENST00000446805.1:c.-424+2_-424+3insGCT intron_variant splicing_variant splicing NaN
11352 ENSG00000001626 117119401 T TGT . INTRON CFTR ENST00000446805 c.-424+3*>+GT intron_variant . intron_variant ENST00000446805.1:c.-424+1_-424+2dupGT intron_variant splicing_variant splicing NaN
11353 ENSG00000001626 117119401 T TT . INTRON CFTR ENST00000446805 c.-424+3*>+T intron_variant . intron_variant ENST00000446805.1:c.-424+2dupT intron_variant splicing_variant splicing NaN
12941 ENSG00000001626 117119515 G GA . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insA 5_prime_UTR_variant splicing_variant splicing NaN
12942 ENSG00000001626 117119515 G GAT . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insAT 5_prime_UTR_variant splicing_variant splicing NaN
12943 ENSG00000001626 117119515 G GC . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insC 5_prime_UTR_variant splicing_variant splicing NaN
12944 ENSG00000001626 117119515 G GCAT . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insCAT 5_prime_UTR_variant splicing_variant splicing NaN
12945 ENSG00000001626 117119515 G GCGA . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insCGA 5_prime_UTR_variant splicing_variant splicing NaN
12946 ENSG00000001626 117119515 G GG . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1dupG 5_prime_UTR_variant splicing_variant splicing NaN
12947 ENSG00000001626 117119515 G GGA . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insGA 5_prime_UTR_variant splicing_variant splicing NaN
12948 ENSG00000001626 117119515 G GT . UTR_5_PRIME CFTR ENST00000446805 5_prime_UTR_variant . 5_prime_UTR_variant ENST00000446805.1:c.-423-1_-423insT 5_prime_UTR_variant splicing_variant splicing NaN
16232 ENSG00000001626 117119750 T TA . INTRON CFTR ENST00000446805 c.-191+3*>+A intron_variant . intron_variant ENST00000446805.1:c.-191+2_-191+3insA intron_variant splicing_variant splicing NaN
16233 ENSG00000001626 117119750 T TC . INTRON CFTR ENST00000446805 c.-191+3*>+C intron_variant . intron_variant ENST00000446805.1:c.-191+2_-191+3insC intron_variant splicing_variant splicing NaN
16234 ENSG00000001626 117119750 T TCA . INTRON CFTR ENST00000446805 c.-191+3*>+CA intron_variant . intron_variant ENST00000446805.1:c.-191+2_-191+3insCA intron_variant splicing_variant splicing NaN
16235 ENSG00000001626 117119750 T TCTA . INTRON CFTR ENST00000446805 c.-191+3*>+CTA intron_variant . intron_variant ENST00000446805.1:c.-191+2_-191+3insCTA intron_variant splicing_variant splicing NaN
16236 ENSG00000001626 117119750 T TG . INTRON CFTR ENST00000446805 c.-191+3*>+G intron_variant . intron_variant ENST00000446805.1:c.-191+2_-191+3insG intron_variant splicing_variant splicing NaN
16237 ENSG00000001626 117119750 T TGC . INTRON CFTR ENST00000446805 c.-191+3*>+GC intron_variant . intron_variant ENST00000446805.1:c.-191+2_-191+3insGC intron_variant splicing_variant splicing NaN
16238 ENSG00000001626 117119750 T TT . INTRON CFTR ENST00000446805 c.-191+3*>+T intron_variant . intron_variant ENST00000446805.1:c.-191+2dupT intron_variant splicing_variant splicing NaN
16239 ENSG00000001626 117119750 T TTCG . INTRON CFTR ENST00000446805 c.-191+3*>+TCG intron_variant . intron_variant ENST00000446805.1:c.-191+2_-191+3insTCG intron_variant splicing_variant splicing NaN
21622 ENSG00000001626 117120203 T TA . INTRON CFTR ENST00000446805 c.-191+456*>+A intron_variant . intron_variant ENST00000446805.1:c.-191+455_-191+456insA intron_variant splicing_variant splicing ENST00000454343:exon1:c.53+2->A,ENST0000000308...
21623 ENSG00000001626 117120203 T TAC . INTRON CFTR ENST00000446805 c.-191+456*>+AC intron_variant . intron_variant ENST00000446805.1:c.-191+455_-191+456insAC intron_variant splicing_variant splicing ENST00000454343:exon1:c.53+2->AC,ENST000000030...
21624 ENSG00000001626 117120203 T TAGC . INTRON CFTR ENST00000446805 c.-191+456*>+AGC intron_variant . intron_variant ENST00000446805.1:c.-191+455_-191+456insAGC intron_variant splicing_variant splicing ENST00000454343:exon1:c.53+2->AGC,ENST00000003...
21625 ENSG00000001626 117120203 T TC . INTRON CFTR ENST00000446805 c.-191+456*>+C intron_variant . intron_variant ENST00000446805.1:c.-191+455_-191+456insC intron_variant splicing_variant splicing ENST00000454343:exon1:c.53+2->C,ENST0000000308...
21626 ENSG00000001626 117120203 T TG . INTRON CFTR ENST00000446805 c.-191+456*>+G intron_variant . intron_variant ENST00000446805.1:c.-191+455_-191+456insG intron_variant splicing_variant splicing ENST00000454343:exon1:c.53+2->G,ENST0000000308...
21627 ENSG00000001626 117120203 T TGC . INTRON CFTR ENST00000446805 c.-191+456*>+GC intron_variant . intron_variant ENST00000446805.1:c.-191+455_-191+456insGC intron_variant splicing_variant splicing ENST00000454343:exon1:c.53+2->GC,ENST000000030...
21628 ENSG00000001626 117120203 T TGCT . INTRON CFTR ENST00000446805 c.-191+456*>+GCT intron_variant . intron_variant ENST00000446805.1:c.-191+455_-191+456insGCT intron_variant splicing_variant splicing ENST00000454343:exon1:c.53+2->GCT,ENST00000003...
21629 ENSG00000001626 117120203 T TT . INTRON CFTR ENST00000446805 c.-191+456*>+T intron_variant . intron_variant ENST00000446805.1:c.-191+455dupT intron_variant splicing_variant splicing ENST00000454343:exon1:c.53+2->T,ENST0000000308...
24393 ENSG00000001626 117144306 G GA . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+A frameshift_variant . frameshift_variant ENSP00000389119.1:p.Ser18ArgfsTer27ENST0000042... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->A,ENST0000000308...
24394 ENSG00000001626 117144306 G GAG . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+AG frameshift_variant . frameshift_variant ENSP00000389119.1:p.Ser18ArgfsTer8ENST00000426... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->AG,ENST000000030...
24395 ENSG00000001626 117144306 G GAT . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+AT frameshift_variant . frameshift_variant ENSP00000389119.1:p.Ser18ArgfsTer8ENST00000426... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->AT,ENST000000030...
24396 ENSG00000001626 117144306 G GC . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+C frameshift_variant . frameshift_variant ENSP00000389119.1:p.Trp19LeufsTer26ENST0000042... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->C,ENST0000000308...
24397 ENSG00000001626 117144306 G GCGA . CODON_INSERTION CFTR ENST00000454343 p.Ser18X/c.54*>+CGA inframe_variant . inframe_insertion ENSP00000389119.1:p.Ser18_Trp19insAspENST00000... inframe_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->CGA,ENST00000003...
24398 ENSG00000001626 117144306 G GCTA . CODON_INSERTION CFTR ENST00000454343 p.Ser18X/c.54*>+CTA inframe_variant . inframe_insertion ENSP00000389119.1:p.Ser18_Trp19insTyrENST00000... inframe_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->CTA,ENST00000003...
24399 ENSG00000001626 117144306 G GG . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+G frameshift_variant . frameshift_variant ENSP00000389119.1:p.Ser18ArgfsTer27ENST0000042... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->G,ENST0000000308...
24400 ENSG00000001626 117144306 G GT . FRAME_SHIFT CFTR ENST00000454343 p.Ser18X/c.54*>+T frameshift_variant . frameshift_variant ENSP00000389119.1:p.Trp19LeufsTer26ENST0000042... frameshift_variant splicing_variant splicing ENST00000454343:exon2:c.54-1->T,ENST0000000308...
25976 ENSG00000001626 117144419 T TA . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->A,ENST000000030...
25977 ENSG00000001626 117144419 T TATG . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->ATG,ENST0000000...
25978 ENSG00000001626 117144419 T TC . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->C,ENST000000030...
25979 ENSG00000001626 117144419 T TCGA . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->CGA,ENST0000000...
25980 ENSG00000001626 117144419 T TG . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->G,ENST000000030...
25981 ENSG00000001626 117144419 T TGC . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->GC,ENST00000003...
25982 ENSG00000001626 117144419 T TT . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->T,ENST000000030...
25983 ENSG00000001626 117144419 T TTA . DOWNSTREAM CFTR ENST00000546407 downstream_gene_variant . downstream_gene_variant downstream_gene_variant splicing_variant splicing ENST00000454343:exon2:c.164+2->TA,ENST00000003...
28747 ENSG00000001626 117149087 G GA . FRAME_SHIFT CFTR ENST00000454343 p.Arg55X/c.165*>+A frameshift_variant . frameshift_variant ENSP00000389119.1:p.Glu56ArgfsTer4ENST00000426... frameshift_variant splicing_variant splicing ENST00000454343:exon3:c.165-1->A,ENST000000030...
28748 ENSG00000001626 117149087 G GAG . FRAME_SHIFT CFTR ENST00000454343 p.Arg55X/c.165*>+AG frameshift_variant . frameshift_variant ENSP00000389119.1:p.Trp57AsnfsTer35ENST0000042... frameshift_variant splicing_variant splicing ENST00000454343:exon3:c.165-1->AG,ENST00000003...

50 rows × 17 columns

In [58]:
master_df.loc[(master_df["normalized_so_annovar"]=="splicing_variant") &
              (master_df["normalized_so_snpeff"]!="splicing_variant")].tail()
Out[58]:
Gene POS REF ALT ID_x Effect Gene_Name Transcript_ID hgvs_snpeff normalized_so_snpeff ID_y Consequence hgvs_vep normalized_so_vep normalized_so_annovar combined_effect hgvs
249128 ENSG00000083622 117282491 G GG . INTRON AC000111.6 ENST00000456270 n.65+4913*>+C intron_variant . intron_variant ENST00000456270.1:n.65+4913dupC intron_variant splicing_variant splicing ENST00000454343:exon22:c.3535-1->G,ENST0000000...
249129 ENSG00000083622 117282491 G GGA . INTRON AC000111.6 ENST00000456270 n.65+4913*>+CT intron_variant . intron_variant ENST00000456270.1:n.65+4913_65+4914insTC intron_variant splicing_variant splicing ENST00000454343:exon22:c.3535-1->GA,ENST000000...
249131 ENSG00000083622 117282491 G GGT . INTRON AC000111.6 ENST00000456270 n.65+4913*>+CA intron_variant . intron_variant ENST00000456270.1:n.65+4912_65+4913dupAC intron_variant splicing_variant splicing ENST00000454343:exon22:c.3535-1->GT,ENST000000...
249132 ENSG00000083622 117282491 G GT . INTRON AC000111.6 ENST00000456270 n.65+4913*>+A intron_variant . intron_variant ENST00000456270.1:n.65+4913_65+4914insA intron_variant splicing_variant splicing ENST00000454343:exon22:c.3535-1->T,ENST0000000...
249133 ENSG00000083622 117282491 G T . INTRON AC000111.6 ENST00000456270 n.65+4914C>A intron_variant . intron_variant ENST00000456270.1:n.65+4914C>A intron_variant splicing_variant splicing ENST00000454343:exon22:c.3535-1G>T,ENST0000000...

5 rows × 17 columns