#!/usr/bin/env python
# coding: utf-8

# # Annotate Putative Somatic Variants with CIViC and VEP

# Welcome back to OpenCAP Jupyter Notebooks! Here you can run pre-designed code to generate an output report for your variants. 
# 
# To run Jupyter notebook cells hold-down on "shift" and select "enter".

# ## Commands to annotate variants

# We have created a python script called identified_variants_to_annotation.py that will take in variants from the sequencing pipeline and output a document with annotation information. The output will include variant effect predictions and clinical interpretations of variants in cancer.
# 
# Before running this cell, please upload your somatic variants to the home directory (see ReadTheDocs) and change the input variant list name to match your somatic variant list. You can run the cell by holding the "shift" key and selecting "enter".

# In[ ]:


#run command  python code             #input variant list
get_ipython().run_line_magic('run', "-i 'identified_variants_to_annotation.py' 'test_annotate_variants.tsv' 'Sample_name'")


# In[6]:


# In[50]:


from docx import Document
from docx.shared import Inches
import pandas as pd
import datetime
import solvebio
import myvariant
import utils


# In[64]:


# Pull in variant file
somatic_variants = pd.read_csv('test_annotate_variants.tsv', sep='\t')
sample_name = 'SCLC_5'


# In[83]:


mv.getvariant('chr7:g.140453136A>T')['civic']['hgvs_expressions']


# In[ ]:


def get_evidence_statements(variant_ids, sample):
    evidence_statements = []
    
    sample_evidence_count = {}
    sample_evidence_count[sample] = [0,0,0,0]
    
    for item in variant_ids:

                for evidence in current_variant['evidence_items']:

                    initial = str(gene) +  ' ' +str(variant) +  ' ' +evidence[0]['evidence_direction']+ ' ' + evidence[0]['clinical_significance']
                    final =  '(CIViC ' + evidence[0]['name'] + '- PMID:' + evidence[0]['source']['pubmed_id'] + ')'

                    if evidence[0]['evidence_level'] != 'D' and evidence[0]['evidence_level'] != 'E':

                        #PREDICTIVE
                        if evidence[0]['evidence_type'] == 'Predictive':
                            if evidence[0]['drug_interaction_type'] == 'Combination':
                                    drug_list = []
                                    for drug in evidence[0]['drugs']:
                                        drug_list.append(drug['name'])
                                    evidence_statements.append(initial + ' to ' + 'combination of ' + ', '.join(drug_list[:-1]) + ' and ' + str(drug_list[-1]) + ' for patients with ' + evidence[0]['disease'][ 'name'] + final)

                            elif not evidence[0]['drug_interaction_type']:
                                evidence_statements.append(initial + ' to ' + evidence[0]['drugs'][0]['name'] + ' for patients with ' + evidence[0]['disease'][ 'name'] + final)

                            elif evidence[0]['drug_interaction_type'] == 'Substitutes':
                                    drug_list = []
                                    for drug in evidence[0]['drugs']:
                                        drug_list.append(drug['name'])
                                    evidence_statements.append(initial + ' to ' + ', '.join(drug_list[:-1]) + ' or ' + str(drug_list[-1]) + ' for patients with ' + evidence[0]['disease'][ 'name'] + final)

                            elif evidence[0]['drug_interaction_type'] == 'Substitutes':
                                    drug_list = []
                                    for drug in evidence[0]['drugs']:
                                        drug_list.append(drug['name'])
                                    evidence_statements.append(initial + ' to ' + ', '.join(drug_list[:-1]) + ' or ' + str(drug_list[-1]) + ' for patients with ' + evidence[0]['disease'][ 'name'] + final)

                            elif evidence[0]['drug_interaction_type'] == 'Sequential':
                                    drug_list = []
                                    for drug in evidence[0]['drugs']:
                                        drug_list.append(drug['name'])
                                    evidence_statements.append(initial + ' to ' + 'sequence of ' + ', '.join(drug_list[:-1]) + ' and ' + str(drug_list[-1]) + ' for patients with ' + evidence[0]['disease'][ 'name'] + final)


                        #CREATE PROGNOSTIC EVIDENCE STATEMENT
                        if evidence[0]['evidence_type'] == 'Prognostic':
                            evidence_statements.append(initial + ' for patients with ' + evidence[0]['disease']['name'] + final)


                        #CREATE DIAGNOSTIC EVIDENCE STATEMENT
                        if evidence[0]['evidence_type'] == 'Diagnostic':
                            evidence_statements.append(initial + ' for patients with ' + evidence[0]['disease']['name'] + final)


                        #CREATE PREDISPOSING EVIDENCE STATEMENT
                        if evidence[0]['evidence_type'] == 'Predisposing':
                            evidence_statements.append(initial  + ' Predisposition For Cancer ' + ' for patients with ' + evidence[0]['disease']['name'] + final)

                        
                        if evidence[0]['evidence_type'] == 'Predictive':
                            sample_evidence_count[sample][0] += 1
                        if evidence[0]['evidence_type'] == 'Prognostic':
                            sample_evidence_count[sample][1] += 1
                        if evidence[0]['evidence_type'] == 'Diagnostic':
                            sample_evidence_count[sample][2] += 1
                        if evidence[0]['evidence_type'] == 'Predisposing':
                            sample_evidence_count[sample][3] += 1
                        
                        
    return evidence_statements, sample_evidence_count


# In[102]:


processed = 0
clinical_count = 0
information = {}

for i,row in somatic_variants.iterrows():
    processed +=1
    
    chrom = row['Chromosome']
    start = int(row['Start'])
    ref = row['Ref']
    var = row['Var']
    
    variant = myvariant.format_hgvs(chrom, start, ref, var)
    directory = mv.getvariant(variant)
    
    if directory:
        if 'civic' in directory.keys():
            clinical_correlates = {}
            clinical_count +=1
            gene = directory['civic']['entrez_name']
            varinat_type = directory['civic']['variant_types']['display_name']
            ENST = directory['cadd']['gene']['feature_id']
            ENSG = directory['cadd']['gene']['gene_id']
            protein_change = directory['civic']['name']
            pop_freq = directory['gnomad_exome']['af']['af']
            information[gene, varinat_type, ENSG, ENST, protein_change, pop_freq] = clinical_correlates
            
            
# In[103]:


information


# In[49]:


# def build_document(sample_name, input_list, CIViC_annotations):

currentDT = datetime.datetime.now()

document = Document()
document.add_picture('Extra/report_header.png', width=Inches(6))
document.add_heading('SOMATIC VARIANT ANNOTATION', 0)

p = document.add_paragraph()
p.add_run('Sample Name: ').bold = True
p.add_run(str(sample_name) + '\n')
p.add_run('Date ').bold = True
p.add_run(str(currentDT.strftime("%a, %b %d, %Y")) + '\n')
p.add_run('Time Processed: ').bold = True
p.add_run(str(currentDT.strftime("%I:%M:%S %p")) + '\n')
p.add_run('Variants Processed: ').bold = True
p.add_run(str(len(input_list)) + '\n')
p.add_run('Clinical Annotations: ').bold = True
p.add_run(str(len(input_list)) + '\n')

document.add_heading('Direct CIViC Annotations' + '\n', level=1) 


for item in information:
    
    
    table = document.add_table(rows=1, cols=5, style = 'Table Grid')
    table = document.add_table(rows=1, cols=5, style = 'Table Grid')
    table.allow_autofit= True
    hdr_cells = table.rows[0].cells
    run = hdr_cells[0].paragraphs[0].add_run('Gene')
    run.bold = True
    run = hdr_cells[1].paragraphs[0].add_run('Variant')
    run.bold = True
    run = hdr_cells[2].paragraphs[0].add_run('Description')
    run.bold = True
    run = hdr_cells[3].paragraphs[0].add_run('CIViC EID')
    run.bold = True
    run = hdr_cells[4].paragraphs[0].add_run('PubMedID')
    run.bold = True

for i, row in CIViC_annotation.iterrows():
    row_cells = table.add_row().cells
    row_cells[0].text = row['Gene']
    row_cells[1].text = row['Variant']
    row_cells[2].text = row['Description']
    row_cells[3].text = row['CIViC Link']
    row_cells[4].text = row['PubMedID Link']

    
p = document.add_paragraph('\n' + 'OpenCAP is intended for research use only and clinical applications of subsequent panels designed using the SOP would require further panel validation.')

document.save('demo.docx')


# In[ ]: