Intersect bed on gene-centric features

In [3]:
!date
Fri Mar 13 08:02:12 PDT 2015
In [33]:
%pylab inline
import scipy.stats as stats
Populating the interactive namespace from numpy and matplotlib

Feature (from nb -03)

tldr 4 "new" tracks IGV_and_Directory_Listing_of__halfshell_2015-02-hs-bedgraph__1AA51F1B.png

/Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf
/Users/sr320/data-genomic/tentacle/rebuilt.gtf
/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff
/Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff

DEGs

-wb Write the original entry in B for each overlap. Useful for knowing what A overlaps. Restricted by -f and -r.

In [6]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \
| cut -f 6 \
| sort | uniq -c 
 880 Cufflinks
In [7]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \
| cut -f 6 \
| sort | uniq -c 
 704 Cufflinks
In [8]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \
| cut -f 6 \
| sort | uniq -c 
 632 Cufflinks
In [10]:
!intersectbed \
-wb \
-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \
-b /Users/sr320/data-genomic/tentacle/Cuffdiff_geneexp.sig.gtf \
| cut -f 11 \
| sort | uniq -c 
117460 Cufflinks
In [34]:
# Enter the data comparing Oyster 2 then Probes
obs = array([[880, 10028], [117460, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 352.138, with p=0.000
The uncorrected chi2 value is 352.654, with p=0.000
In [37]:
# Enter the data comparing Oyster 4 then Probes
obs = array([[704, 10148], [117460, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 547.532, with p=0.000
The uncorrected chi2 value is 548.178, with p=0.000
In [38]:
# Enter the data comparing Oyster 6 then Probes
obs = array([[632, 11690], [117460, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 853.613, with p=0.000
The uncorrected chi2 value is 854.371, with p=0.000

Rebuilt (new gtf based on RNAseq data)

In [15]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \
| cut -f 6 \
| sort | uniq -c 
8768 Cufflinks
In [16]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \
| cut -f 6 \
| sort | uniq -c 
7694 Cufflinks
In [17]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \
| cut -f 6 \
| sort | uniq -c 
6160 Cufflinks
In [18]:
!intersectbed \
-wb \
-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \
-b /Users/sr320/data-genomic/tentacle/rebuilt.gtf \
| cut -f 11 \
| sort | uniq -c 
1197818 Cufflinks
In [39]:
# Enter the data comparing Oyster 2 then Probes
obs = array([[8768, 10028], [1197818, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 2184.818, with p=0.000
The uncorrected chi2 value is 2185.528, with p=0.000
In [40]:
# Enter the data comparing Oyster 4 then Probes
obs = array([[7694, 10148], [1197818, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 3052.863, with p=0.000
The uncorrected chi2 value is 3053.724, with p=0.000
In [41]:
# Enter the data comparing Oyster 6 then Probes
obs = array([[6160, 11690], [1197818, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 6233.645, with p=0.000
The uncorrected chi2 value is 6234.874, with p=0.000

Housekeeping Genes

In [23]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \
| cut -f 6 \
| sort | uniq -c 
3210 GLEAN
In [24]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \
| cut -f 6 \
| sort | uniq -c 
3369 GLEAN
In [25]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \
| cut -f 6 \
| sort | uniq -c 
3819 GLEAN
In [26]:
!intersectbed \
-wb \
-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \
-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-housekeeping.gff \
| cut -f 11 \
| sort | uniq -c 
251970 GLEAN
In [42]:
# Enter the data comparing Oyster 2 then Probes
obs = array([[3210, 10028], [251970, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 34.806, with p=0.000
The uncorrected chi2 value is 34.923, with p=0.000
In [43]:
# Enter the data comparing Oyster 4 then Probes
obs = array([[3369, 10148], [251970, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 17.578, with p=0.000
The uncorrected chi2 value is 17.661, with p=0.000
In [47]:
# Enter the data comparing Oyster 6 then Probes
obs = array([[3819, 11690], [251970, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 28.378, with p=0.000
The uncorrected chi2 value is 28.476, with p=0.000
In [ ]:
 
In [ ]:
 

Environmental Response Genes

In [29]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.2M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \
| cut -f 6 \
| sort | uniq -c 
2809 GLEAN
In [30]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.4M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \
| cut -f 6 \
| sort | uniq -c 
2738 GLEAN
In [31]:
!intersectbed \
-wb \
-a ./data/2014.07.02.colson/genomeBrowserTracks/logFC_HS-preHS/2014.07.02.6M_sig.bedGraph \
-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \
| cut -f 6 \
| sort | uniq -c 
3216 GLEAN
In [27]:
!intersectbed \
-wb \
-a /Users/sr320/git-repos/paper-Temp-stress/ipynb/data/array-design/OID40453_probe_locations.gff \
-b /Users/sr320/data-genomic/tentacle/Cgigas_v9_gene-env-response.gff \
| cut -f 11 \
| sort | uniq -c 
190475 GLEAN
In [45]:
# Enter the data comparing Oyster 2 then Probes
obs = array([[2809, 10028], [190475, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 1.413, with p=0.235
The uncorrected chi2 value is 1.439, with p=0.230
In [48]:
# Enter the data comparing Oyster 4 then Probes
obs = array([[2738, 10148], [190475, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 0.280, with p=0.597
The uncorrected chi2 value is 0.291, with p=0.589
In [49]:
# Enter the data comparing Oyster 6 then Probes
obs = array([[3216, 11690], [190475, 697753]])

# Calculate the chi-square test
chi2_corrected = stats.chi2_contingency(obs, correction=True)
chi2_uncorrected = stats.chi2_contingency(obs, correction=False)

# Print the result
print('CHI SQUARE')
print('The corrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_corrected[0], chi2_corrected[1]))
print('The uncorrected chi2 value is {0:5.3f}, with p={1:5.3f}'.format(chi2_uncorrected[0], chi2_uncorrected[1]))
CHI SQUARE
The corrected chi2 value is 0.141, with p=0.707
The uncorrected chi2 value is 0.149, with p=0.700
In [ ]: