#!/usr/bin/env python
# coding: utf-8
# # Defining RNA-seq (gene function) based Tracks
# **tldr** 4 "new" tracks
#
# ```
# /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf
# /Users/sr320/data-genomic/tentacle/rebuilt.gtf
# /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff
# /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff
# ```
# ### Diff Exp Genes
# In[96]:
#Track with DEGs defined by Cuffdiff
#how derived = {RNA-seq-Gene-ID}
get_ipython().system('tail -3 /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf')
# In[9]:
get_ipython().system('wc -l /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf')
# ### New GTF from Cuffdiff
# In[6]:
#GTF produced from Cuffdiff
#see /Volumes/web/halfshell/BS-heat/Cuffdiff2_heat-b-2014-12-20-22-27-15.4
get_ipython().system('head -3 /Users/sr320/data-genomic/tentacle/rebuilt.gtf')
# In[7]:
get_ipython().system('wc -l /Users/sr320/data-genomic/tentacle/rebuilt.gtf')
# ### GigaDB gene tracks - Isolated Housekeeping and Environment Stress Genes
#
# _Based on annotation from 10.3389/fphys.2011.00116 (see image above)_
# In[12]:
get_ipython().system('head -3 /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff')
# In[13]:
get_ipython().system('wc -l /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff')
# In[30]:
#adding extra CGI column to join GO info on
get_ipython().system('awk -F["\\t"] \'{print $9}\' /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff | rev | cut -c 2- | rev | sed s/ID=C/C/g > /Users/sr320/data-genomic/tentacle/Cgigas_v9_cgi')
get_ipython().system('head /Users/sr320/data-genomic/tentacle/Cgigas_v9_cgi')
# In[32]:
get_ipython().system('wc -l /Users/sr320/data-genomic/tentacle/Cgigas_v9_cgi')
# In[33]:
get_ipython().system('paste /Volumes/web-1/trilobite/Crassostrea_gigas_v9_tracks/Cgigas_v9_gene.gff /Users/sr320/data-genomic/tentacle/Cgigas_v9_cgi > /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID.tab')
# In[34]:
get_ipython().system('head /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID.tab')
# In[35]:
sqls="/Applications/bioinfo/sqlshare-pythonclient/tools/"
# In[36]:
get_ipython().system('python {sqls}singleupload.py -d Cgigas_v9_gene--ID /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID.tab')
# In[44]:
get_ipython().system('python {sqls}fetchdata.py -s "SELECT * FROM [sr320@washington.edu].[Cgigas_v9_gene--ID]md left join [sr320@washington.edu].[qDOD_Cgigas_GOslim_DISTINCT]go on md.Column10=go.CGI_ID" -f tsv -o /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab')
# In[45]:
get_ipython().system('head /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab')
# In[47]:
get_ipython().system('tail -3 /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab')
# In[79]:
get_ipython().system("grep 'DNA metabolism\\|RNA metabolism\\|protein metabolism' /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab | head")
# In[80]:
get_ipython().run_cell_magic('bash', '', 'grep --color \'cell-cell signaling\\|signal transduction\\|cell adhesion\' \\\n/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab \\\n| grep -v "signal transduction activity\tF" \\\n| head\n')
# In[87]:
#QC
get_ipython().system("grep 'DNA metabolism\\|RNA metabolism\\|protein metabolism' /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab | cut -f 12 | sort | uniq -c")
# In[88]:
#QC
get_ipython().system('grep \'cell-cell signaling\\|signal transduction\\|cell adhesion\' /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab | grep -v "signal transduction activity\tF" | cut -f 12 | sort | uniq -c')
# In[94]:
get_ipython().system("grep 'DNA metabolism\\|RNA metabolism\\|protein metabolism' /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab | cut -f 1,2,3,4,5,6,7,8,9 > /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff")
# In[95]:
get_ipython().system('grep \'cell-cell signaling\\|signal transduction\\|cell adhesion\' /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene--ID--GOslim.tab | grep -v "signal transduction activity\tF" | cut -f 1,2,3,4,5,6,7,8,9 > /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff')
# In[ ]: