This notebook exemplifies some steps involved in working with annotations from the IDR. It will build a figure similar to the Figure 1.b of the paper from Rohn et al. "Comparative RNAi screening identifies a conserved core metazoan actinome by phenotype" (IDR008).
import os
import sys
import random
import omero
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.cluster.hierarchy as hchy
from seaborn import clustermap
from pandas import Series
from pandas import DataFrame
from pandas import read_csv
from pandas import merge,concat
from pandas import read_hdf,HDFStore
from idr import connection
%matplotlib inline
plt.rcParams['image.cmap']='gray'
screenId = 206
def buildComposite(st, n, m, smpl=None):
"""
nxm shots from st in a grid, as an image
"""
nr = st.shape[0]
nc = st.shape[1]
if smpl == None:
smpl = st.shape[2]/(n*m)
res = np.zeros((nr*n,nc*m))
for i in range(n):
for j in range(m):
try:
res[i*nr:i*nr+nr,j*nc:j*nc+nc]=st[:,:,(i*m+j)*smpl]
except:
break
return res
def getRohnTile(imid, x, y, w, h, conn, chan=0):
"""
fetches one tile from one image
"""
im = conn.getObject("Image", imid)
pix = im.getPrimaryPixels()
z=0
c=chan
tile = (x, y, w, h)
plane = pix.getTile(theC=c, tile=tile)
return plane
def getBulkAnnotationAsDf(screenID, conn):
ofId = None
sc = conn.getObject('Screen', screenID)
for ann in sc.listAnnotations():
if isinstance(ann, omero.gateway.FileAnnotationWrapper):
if (ann.getFile().getName() == 'bulk_annotations'):
if (ann.getFile().getSize()> 1476250900): #about 140Mb?
print "that's a big file..."
return None
ofId = ann.getFile().getId()
break
if ofId is None:
return None
original_file = omero.model.OriginalFileI(ofId, False)
table = conn.c.sf.sharedResources().openTable(original_file)
try:
rowCount = table.getNumberOfRows()
column_names = [col.name for col in table.getHeaders()]
black_list = []
column_indices = []
for column_name in column_names:
if column_name in black_list:
continue
column_indices.append(column_names.index(column_name))
table_data = table.slice(column_indices, None)
finally:
table.close()
data = []
for index in range(rowCount):
row_values = [column.values[index] for column in table_data.columns]
data.append(row_values)
dfAnn = DataFrame(data)
dfAnn.columns = column_names
return dfAnn
conn = connection()
# Downloading the annotation file for the whole screen
# as a panda DataFrame
dfRhonAnn = getBulkAnnotationAsDf(screenId, conn)
Connected to IDR...
BoolCols = []
PhenLab = []
for iphen in range(1, 41):
col = 'Phenotype '+str(iphen)
dfRhonAnn['Bool'+col] =~ (dfRhonAnn[col] =='')
BoolCols.append('Bool'+col)
PhenLab.append(dfRhonAnn[col].unique()[1])
# count a gene as having a phenotype
# if at least one well annotated with
# it has a phenotype
phenMap = dfRhonAnn[dfRhonAnn['Has Phenotype']=='yes'].groupby('Gene Symbol')[BoolCols].sum()>0
phenMap = phenMap[phenMap.sum(axis=1)>0]
phenMap.columns = PhenLab
phenMap.replace([False, True], [0, 1], inplace=True)
Z = hchy.linkage(phenMap, 'ward')
Zt = hchy.linkage(phenMap.transpose(), 'ward')
import warnings
warnings.filterwarnings('ignore')
cg = clustermap(phenMap, row_linkage=Z, col_linkage=Zt, figsize=(10,10))
for item in cg.ax_heatmap.get_xticklabels():
item.set_rotation(90)
for item in cg.ax_heatmap.get_yticklabels():
item.set_rotation(0)
A. Gallery of tiles for a bunch of phenotypes
# hard coded, but corresponds to tile size used for CHARM features computation
X = [ 0, 580, 116, 348, 232, 464]
Y = [ 0, 348, 87, 174, 261]
ni = 15
w = 116
h = 86
#some phenotype
phs = range(20, 30)
tiles = np.zeros((h, w, len(phs)*ni))
for kk, ph in enumerate(phs):
ws = dfRhonAnn[dfRhonAnn['Phenotype '+str(ph)]!=''].Well
for ii in range(ni):
we = random.choice(ws.values)
we = conn.getObject('Well',we)
i = we.getImage()
x = random.choice(X)
y = random.choice(Y)
#print x,y,w,h,i.getId()
tiles[:, :, ii+kk*ni] = getRohnTile(i.getId(), x, y, w, h, conn, chan=1)
plt.figure(figsize = (15, 15))
imc = buildComposite(tiles, len(phs), ni)
plt.grid(False)
plt.imshow(imc)
<matplotlib.image.AxesImage at 0x7f5331e14c50>
conn.close()
Copyright (C) 2016 University of Dundee. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.