#!/usr/bin/env python # coding: utf-8 # # Annotate Putative Somatic Variants with CIViC and VEP # Welcome back to OpenCAP Jupyter Notebooks! Here you can run pre-designed code to generate an output report for your variants. # # To run Jupyter notebook cells hold-down on "shift" and select "enter". # ## Commands to annotate variants # We have created a python script called identified_variants_to_annotation.py that will take in variants from the sequencing pipeline and output a document with annotation information. The output will include variant effect predictions and clinical interpretations of variants in cancer. # # Before running this cell, please upload your somatic variants to the home directory (see ReadTheDocs) and change the input variant list name to match your somatic variant list. You can run the cell by holding the "shift" key and selecting "enter". # In[ ]: #run command python code #input variant list get_ipython().run_line_magic('run', "-i 'identified_variants_to_annotation.py' 'test_annotate_variants.tsv' 'Sample_name'") # In[6]: # In[50]: from docx import Document from docx.shared import Inches import pandas as pd import datetime import solvebio import myvariant import utils # In[64]: # Pull in variant file somatic_variants = pd.read_csv('test_annotate_variants.tsv', sep='\t') sample_name = 'SCLC_5' # In[83]: mv.getvariant('chr7:g.140453136A>T')['civic']['hgvs_expressions'] # In[ ]: def get_evidence_statements(variant_ids, sample): evidence_statements = [] sample_evidence_count = {} sample_evidence_count[sample] = [0,0,0,0] for item in variant_ids: for evidence in current_variant['evidence_items']: initial = str(gene) + ' ' +str(variant) + ' ' +evidence[0]['evidence_direction']+ ' ' + evidence[0]['clinical_significance'] final = '(CIViC ' + evidence[0]['name'] + '- PMID:' + evidence[0]['source']['pubmed_id'] + ')' if evidence[0]['evidence_level'] != 'D' and evidence[0]['evidence_level'] != 'E': #PREDICTIVE if evidence[0]['evidence_type'] == 'Predictive': if evidence[0]['drug_interaction_type'] == 'Combination': drug_list = [] for drug in evidence[0]['drugs']: drug_list.append(drug['name']) evidence_statements.append(initial + ' to ' + 'combination of ' + ', '.join(drug_list[:-1]) + ' and ' + str(drug_list[-1]) + ' for patients with ' + evidence[0]['disease'][ 'name'] + final) elif not evidence[0]['drug_interaction_type']: evidence_statements.append(initial + ' to ' + evidence[0]['drugs'][0]['name'] + ' for patients with ' + evidence[0]['disease'][ 'name'] + final) elif evidence[0]['drug_interaction_type'] == 'Substitutes': drug_list = [] for drug in evidence[0]['drugs']: drug_list.append(drug['name']) evidence_statements.append(initial + ' to ' + ', '.join(drug_list[:-1]) + ' or ' + str(drug_list[-1]) + ' for patients with ' + evidence[0]['disease'][ 'name'] + final) elif evidence[0]['drug_interaction_type'] == 'Substitutes': drug_list = [] for drug in evidence[0]['drugs']: drug_list.append(drug['name']) evidence_statements.append(initial + ' to ' + ', '.join(drug_list[:-1]) + ' or ' + str(drug_list[-1]) + ' for patients with ' + evidence[0]['disease'][ 'name'] + final) elif evidence[0]['drug_interaction_type'] == 'Sequential': drug_list = [] for drug in evidence[0]['drugs']: drug_list.append(drug['name']) evidence_statements.append(initial + ' to ' + 'sequence of ' + ', '.join(drug_list[:-1]) + ' and ' + str(drug_list[-1]) + ' for patients with ' + evidence[0]['disease'][ 'name'] + final) #CREATE PROGNOSTIC EVIDENCE STATEMENT if evidence[0]['evidence_type'] == 'Prognostic': evidence_statements.append(initial + ' for patients with ' + evidence[0]['disease']['name'] + final) #CREATE DIAGNOSTIC EVIDENCE STATEMENT if evidence[0]['evidence_type'] == 'Diagnostic': evidence_statements.append(initial + ' for patients with ' + evidence[0]['disease']['name'] + final) #CREATE PREDISPOSING EVIDENCE STATEMENT if evidence[0]['evidence_type'] == 'Predisposing': evidence_statements.append(initial + ' Predisposition For Cancer ' + ' for patients with ' + evidence[0]['disease']['name'] + final) if evidence[0]['evidence_type'] == 'Predictive': sample_evidence_count[sample][0] += 1 if evidence[0]['evidence_type'] == 'Prognostic': sample_evidence_count[sample][1] += 1 if evidence[0]['evidence_type'] == 'Diagnostic': sample_evidence_count[sample][2] += 1 if evidence[0]['evidence_type'] == 'Predisposing': sample_evidence_count[sample][3] += 1 return evidence_statements, sample_evidence_count # In[102]: processed = 0 clinical_count = 0 information = {} for i,row in somatic_variants.iterrows(): processed +=1 chrom = row['Chromosome'] start = int(row['Start']) ref = row['Ref'] var = row['Var'] variant = myvariant.format_hgvs(chrom, start, ref, var) directory = mv.getvariant(variant) if directory: if 'civic' in directory.keys(): clinical_correlates = {} clinical_count +=1 gene = directory['civic']['entrez_name'] varinat_type = directory['civic']['variant_types']['display_name'] ENST = directory['cadd']['gene']['feature_id'] ENSG = directory['cadd']['gene']['gene_id'] protein_change = directory['civic']['name'] pop_freq = directory['gnomad_exome']['af']['af'] information[gene, varinat_type, ENSG, ENST, protein_change, pop_freq] = clinical_correlates # In[103]: information # In[49]: # def build_document(sample_name, input_list, CIViC_annotations): currentDT = datetime.datetime.now() document = Document() document.add_picture('Extra/report_header.png', width=Inches(6)) document.add_heading('SOMATIC VARIANT ANNOTATION', 0) p = document.add_paragraph() p.add_run('Sample Name: ').bold = True p.add_run(str(sample_name) + '\n') p.add_run('Date ').bold = True p.add_run(str(currentDT.strftime("%a, %b %d, %Y")) + '\n') p.add_run('Time Processed: ').bold = True p.add_run(str(currentDT.strftime("%I:%M:%S %p")) + '\n') p.add_run('Variants Processed: ').bold = True p.add_run(str(len(input_list)) + '\n') p.add_run('Clinical Annotations: ').bold = True p.add_run(str(len(input_list)) + '\n') document.add_heading('Direct CIViC Annotations' + '\n', level=1) for item in information: table = document.add_table(rows=1, cols=5, style = 'Table Grid') table = document.add_table(rows=1, cols=5, style = 'Table Grid') table.allow_autofit= True hdr_cells = table.rows[0].cells run = hdr_cells[0].paragraphs[0].add_run('Gene') run.bold = True run = hdr_cells[1].paragraphs[0].add_run('Variant') run.bold = True run = hdr_cells[2].paragraphs[0].add_run('Description') run.bold = True run = hdr_cells[3].paragraphs[0].add_run('CIViC EID') run.bold = True run = hdr_cells[4].paragraphs[0].add_run('PubMedID') run.bold = True for i, row in CIViC_annotation.iterrows(): row_cells = table.add_row().cells row_cells[0].text = row['Gene'] row_cells[1].text = row['Variant'] row_cells[2].text = row['Description'] row_cells[3].text = row['CIViC Link'] row_cells[4].text = row['PubMedID Link'] p = document.add_paragraph('\n' + 'OpenCAP is intended for research use only and clinical applications of subsequent panels designed using the SOP would require further panel validation.') document.save('demo.docx') # In[ ]: