from google.cloud import bigquery from google.colab import auth import pandas as pd import seaborn as sns import pandas_gbq auth.authenticate_user() my_project_id = "" # write your project id here bqclient = bigquery.Client( my_project_id ) gene_name = 'RAB5A' protein_table = 'isb-cgc-bq.CPTAC_versioned.quant_proteome_CPTAC_CCRCC_discovery_study_pdc_V1_21' gexp_table = 'isb-cgc-bq.CPTAC_versioned.RNAseq_hg38_gdc_r28' prot = '''quant AS ( SELECT meta.sample_submitter_id, meta.sample_type, quant.case_id, quant.aliquot_id, quant.gene_symbol, CAST(quant.protein_abundance_log2ratio AS FLOAT64) AS protein_abundance_log2ratio FROM `{0}` as quant JOIN `isb-cgc-bq.PDC_metadata_versioned.aliquot_to_case_mapping_V1_21` as meta ON quant.case_id = meta.case_id AND quant.aliquot_id = meta.aliquot_id AND quant.gene_symbol = '{1}' ) '''.format(protein_table,gene_name) gexp = '''gexp AS ( SELECT DISTINCT meta.sample_submitter_id, meta.sample_type, rnaseq.gene_name , LOG(rnaseq.HTSeq__FPKM + 1) as HTSeq__FPKM FROM `{0}` as rnaseq JOIN `isb-cgc-bq.PDC_metadata.aliquot_to_case_mapping_current` as meta ON meta.sample_submitter_id = rnaseq.sample_barcode AND rnaseq.gene_name = '{1}' ) '''.format(gexp_table, gene_name) mysql = ( 'WITH ' + prot + ',' + gexp + ''' SELECT quant.sample_submitter_id, quant.sample_type, quant.gene_symbol, quant.protein_abundance_log2ratio, gexp.HTSeq__FPKM FROM quant JOIN gexp ON gexp.sample_submitter_id = quant.sample_submitter_id ''' ) df2 = pandas_gbq.read_gbq(mysql,project_id=my_project_id ) df2[0:10] grid = sns.FacetGrid(df2, col = "sample_type", hue = "sample_type", col_wrap=2) grid.map(sns.scatterplot, "HTSeq__FPKM", "protein_abundance_log2ratio")